rino 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. data/README +44 -0
  2. data/Rakefile +123 -0
  3. data/ext/extconf.rb +26 -0
  4. data/ext/ruby_inchi_main.so +0 -0
  5. data/ext/src/aux2atom.h +2786 -0
  6. data/ext/src/comdef.h +148 -0
  7. data/ext/src/e_0dstereo.c +3014 -0
  8. data/ext/src/e_0dstereo.h +31 -0
  9. data/ext/src/e_comdef.h +57 -0
  10. data/ext/src/e_ctl_data.h +147 -0
  11. data/ext/src/e_ichi_io.c +498 -0
  12. data/ext/src/e_ichi_io.h +40 -0
  13. data/ext/src/e_ichi_parms.c +37 -0
  14. data/ext/src/e_ichi_parms.h +41 -0
  15. data/ext/src/e_ichicomp.h +50 -0
  16. data/ext/src/e_ichierr.h +40 -0
  17. data/ext/src/e_ichimain.c +593 -0
  18. data/ext/src/e_ichisize.h +43 -0
  19. data/ext/src/e_inchi_atom.c +75 -0
  20. data/ext/src/e_inchi_atom.h +33 -0
  21. data/ext/src/e_inpdef.h +41 -0
  22. data/ext/src/e_mode.h +706 -0
  23. data/ext/src/e_mol2atom.c +649 -0
  24. data/ext/src/e_readinch.c +58 -0
  25. data/ext/src/e_readmol.c +54 -0
  26. data/ext/src/e_readmol.h +180 -0
  27. data/ext/src/e_readstru.c +251 -0
  28. data/ext/src/e_readstru.h +33 -0
  29. data/ext/src/e_util.c +284 -0
  30. data/ext/src/e_util.h +61 -0
  31. data/ext/src/extr_ct.h +251 -0
  32. data/ext/src/ichi.h +206 -0
  33. data/ext/src/ichi_bns.c +7999 -0
  34. data/ext/src/ichi_bns.h +231 -0
  35. data/ext/src/ichican2.c +5000 -0
  36. data/ext/src/ichicano.c +2195 -0
  37. data/ext/src/ichicano.h +49 -0
  38. data/ext/src/ichicans.c +1625 -0
  39. data/ext/src/ichicant.h +379 -0
  40. data/ext/src/ichicomn.h +260 -0
  41. data/ext/src/ichicomp.h +50 -0
  42. data/ext/src/ichidrp.h +119 -0
  43. data/ext/src/ichierr.h +124 -0
  44. data/ext/src/ichiisot.c +101 -0
  45. data/ext/src/ichilnct.c +286 -0
  46. data/ext/src/ichimain.h +132 -0
  47. data/ext/src/ichimak2.c +1189 -0
  48. data/ext/src/ichimake.c +3812 -0
  49. data/ext/src/ichimake.h +205 -0
  50. data/ext/src/ichimap1.c +851 -0
  51. data/ext/src/ichimap2.c +2856 -0
  52. data/ext/src/ichimap4.c +1609 -0
  53. data/ext/src/ichinorm.c +741 -0
  54. data/ext/src/ichinorm.h +67 -0
  55. data/ext/src/ichiparm.c +45 -0
  56. data/ext/src/ichiparm.h +1441 -0
  57. data/ext/src/ichiprt1.c +3612 -0
  58. data/ext/src/ichiprt2.c +1511 -0
  59. data/ext/src/ichiprt3.c +3011 -0
  60. data/ext/src/ichiqueu.c +1003 -0
  61. data/ext/src/ichiring.c +326 -0
  62. data/ext/src/ichiring.h +49 -0
  63. data/ext/src/ichisize.h +35 -0
  64. data/ext/src/ichisort.c +539 -0
  65. data/ext/src/ichister.c +3538 -0
  66. data/ext/src/ichister.h +35 -0
  67. data/ext/src/ichitaut.c +3843 -0
  68. data/ext/src/ichitaut.h +387 -0
  69. data/ext/src/ichitime.h +74 -0
  70. data/ext/src/inchi_api.h +670 -0
  71. data/ext/src/inchi_dll.c +1480 -0
  72. data/ext/src/inchi_dll.h +34 -0
  73. data/ext/src/inchi_dll_main.c +23 -0
  74. data/ext/src/inchi_dll_main.h +31 -0
  75. data/ext/src/inpdef.h +328 -0
  76. data/ext/src/lreadmol.h +1246 -0
  77. data/ext/src/mode.h +706 -0
  78. data/ext/src/ruby_inchi_main.c +558 -0
  79. data/ext/src/runichi.c +4179 -0
  80. data/ext/src/strutil.c +3861 -0
  81. data/ext/src/strutil.h +182 -0
  82. data/ext/src/util.c +1130 -0
  83. data/ext/src/util.h +85 -0
  84. data/lib/clean_tempfile.rb +220 -0
  85. data/lib/rino.rb +111 -0
  86. data/test/test.rb +386 -0
  87. metadata +130 -0
@@ -0,0 +1,3843 @@
1
+ /*
2
+ * International Union of Pure and Applied Chemistry (IUPAC)
3
+ * International Chemical Identifier (InChI)
4
+ * Version 1
5
+ * Software version 1.00
6
+ * April 13, 2005
7
+ * Developed at NIST
8
+ */
9
+
10
+ #include <stdio.h>
11
+ #include <stdlib.h>
12
+ #include <string.h>
13
+
14
+ #include "mode.h"
15
+
16
+ #include "inpdef.h"
17
+ #include "extr_ct.h"
18
+ #include "inpdef.h"
19
+ #include "ichitaut.h"
20
+ #include "ichinorm.h"
21
+ #include "ichicant.h"
22
+ #include "ichicomn.h"
23
+
24
+ #include "ichicomp.h"
25
+
26
+ #include "util.h"
27
+
28
+ #include "ichi_bns.h"
29
+ /* local prototypes */
30
+ int SetTautomericBonds( inp_ATOM *at, int nNumBondPos, T_BONDPOS *BondPos );
31
+ int CompRankTautomer(const void* a1, const void* a2 );
32
+ int RegisterEndPoints( T_GROUP_INFO *t_group_info, /* T_GROUP *t_group, int *pnum_t, int max_num_t,*/
33
+ T_ENDPOINT *EndPoint, int nNumEndPoints, inp_ATOM *at, int num_atoms, C_GROUP_INFO *cgi
34
+ , struct BalancedNetworkStructure *pBNS );
35
+ int cmpTGroupNumber( const void *a1, const void *a2 );
36
+ int comp_candidates( const void *a1, const void *a2 );
37
+ int MoveEndpoint( inp_ATOM *at, S_CANDIDATE *s_candidate, AT_NUMB endpoint, AT_NUMB *nTGroupNewNumbers,
38
+ AT_NUMB *nTGroupPosition, int nNewTGroupOrd, T_GROUP_INFO *t_group_info);
39
+
40
+ int FindAccessibleEndPoints( T_ENDPOINT *EndPoint, int *nNumEndPoints, T_BONDPOS *BondPos, int *nNumBondPos,
41
+ struct BalancedNetworkStructure *pBNS, struct BalancedNetworkData *pBD,
42
+ inp_ATOM *at, int num_atoms, C_GROUP_INFO *cgi );
43
+
44
+ /* bits for GetChargeType */
45
+
46
+ #define C_SUBTYPE_CHARGED 0
47
+ #define C_SUBTYPE_p_DONOR 1 /* new */
48
+ #define C_SUBTYPE_p_ACCEPT 2 /* new */
49
+ #define C_SUBTYPE_H_ACCEPT 4
50
+ #define C_SUBTYPE_H_DONOR 8
51
+ #define C_SUBTYPE_NEUTRAL 16
52
+
53
+
54
+ /* internal stack array size */
55
+ #define MAX_STACK_ARRAY_LEN 127
56
+ #define MAX_TGROUP_ARRAY_LEN 127
57
+
58
+ /* local prototypes */
59
+ int GetChargeType( inp_ATOM *atom, int iat, S_CHAR *cChargeSubtype );
60
+ int GetNeutralRepsIfNeeded( AT_NUMB *pri, AT_NUMB *prj, inp_ATOM *at, int num_atoms, T_ENDPOINT *EndPoint, int nNumEndPoints, C_GROUP_INFO *cgi );
61
+ int bCanBeACPoint( inp_ATOM *at, S_CHAR cCharge, S_CHAR cChangeValence, S_CHAR neutral_bonds_valence,
62
+ S_CHAR neutral_valence, S_CHAR nEndpointValence, S_CHAR *cChargeSubtype );
63
+ int CmpCCandidates( const void *a1, const void *a2 );
64
+ int RegisterCPoints( C_GROUP *c_group, int *pnum_c, int max_num_c, T_GROUP_INFO *t_group_info,
65
+ int point1, int point2, int ctype, inp_ATOM *at, int num_atoms );
66
+ int GetSaltChargeType( inp_ATOM *at, int at_no, T_GROUP_INFO *t_group_info, int *s_subtype );
67
+ int GetOtherSaltChargeType( inp_ATOM *at, int at_no, T_GROUP_INFO *t_group_info, int *s_subtype, int bAccept_O );
68
+ int MergeSaltTautGroupsBlind( inp_ATOM *at, int s_type, int num_atoms, S_GROUP_INFO *s_group_info, int nNumCandidates,
69
+ T_GROUP_INFO *t_group_info, C_GROUP_INFO *c_group_info,
70
+ struct BalancedNetworkStructure *pBNS );
71
+ int ConnectSaltTGroups2SuperTGroup( inp_ATOM *at, int num_atoms, S_GROUP_INFO *s_group_info, int nNumCandidates,
72
+ T_GROUP_INFO *t_group_info, C_GROUP_INFO *c_group_info,
73
+ struct BalancedNetworkStructure *pBNS, int *nNewTGroupNumber, int *vertSuperTGroup );
74
+ int bDoNotMergeNonTautAtom(inp_ATOM *at, int at_no);
75
+ int GetOtherSaltType( inp_ATOM *at, int at_no, int *s_subtype );
76
+
77
+
78
+ /****************************************************************/
79
+ /* tautomers: Sorting globals */
80
+ AT_RANK *pn_tRankForSort;
81
+
82
+ /*************************************************************************************/
83
+ int is_centerpoint_elem( U_CHAR el_number )
84
+ {
85
+ static U_CHAR el_numb[12];
86
+ static int len;
87
+ int i;
88
+ if ( !el_numb[0] && !len ) {
89
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "C" );
90
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "N" );
91
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "P" );
92
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "S" );
93
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "I" );
94
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "As" );
95
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "Sb" );
96
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "Se" );
97
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "Te" );
98
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "Cl" );
99
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "Br" );
100
+ }
101
+ for ( i = 0; i < len; i ++ ) {
102
+ if ( el_numb[i] == el_number ) {
103
+ return 1;
104
+ }
105
+ }
106
+ return 0;
107
+ }
108
+ /*************************************************************************************/
109
+ int is_centerpoint_elem_strict( U_CHAR el_number )
110
+ {
111
+ static U_CHAR el_numb[6];
112
+ static int len;
113
+ int i;
114
+ if ( !el_numb[0] && !len ) {
115
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "C" );
116
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "N" );
117
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "P" );
118
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "As" );
119
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "Sb" );
120
+ }
121
+ for ( i = 0; i < len; i ++ ) {
122
+ if ( el_numb[i] == el_number ) {
123
+ return 1;
124
+ }
125
+ }
126
+ return 0;
127
+ }
128
+ /*************************************************************************************/
129
+ int get_endpoint_valence( U_CHAR el_number )
130
+ {
131
+ static U_CHAR el_numb[6];
132
+ static int len, len2;
133
+ int i;
134
+ if ( !el_numb[0] && !len ) {
135
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "O" );
136
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "S" );
137
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "Se" );
138
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "Te" );
139
+ len2 = len;
140
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "N" );
141
+ }
142
+ for ( i = 0; i < len; i ++ ) {
143
+ if ( el_numb[i] == el_number ) {
144
+ return i < len2? 2 : 3;
145
+ }
146
+ }
147
+ return 0;
148
+ }
149
+ /********************************************************************************************************/
150
+ int AddAtom2num( AT_RANK num[], inp_ATOM *atom, int at_no, int bSubtract )
151
+ { /* bSubtract: 0=> add, 1=>subtract, 2=> fill */
152
+ inp_ATOM *at = atom + at_no;
153
+ int k;
154
+ int nMobile = (at->charge == -1);
155
+ if ( bSubtract == 1 ) {
156
+ /* 1: subtract */
157
+ num[1] -= nMobile;
158
+ nMobile += at->num_H;
159
+ num[0] -= nMobile;
160
+ for ( k = 0; k < T_NUM_ISOTOPIC; k ++ ) {
161
+ /* T (3H isotope) first because it has higher weight */
162
+ num[T_NUM_NO_ISOTOPIC+k] -= at->num_iso_H[NUM_H_ISOTOPES-k-1];
163
+ }
164
+ } else {
165
+ if ( bSubtract == 2 ) {
166
+ /* fill */
167
+ memset( num, 0, (T_NUM_NO_ISOTOPIC + T_NUM_ISOTOPIC)*sizeof(num[0]) );
168
+ }
169
+ /* else (0): add */
170
+ num[1] += nMobile;
171
+ nMobile += at->num_H;
172
+ num[0] += nMobile;
173
+ for ( k = 0; k < T_NUM_ISOTOPIC; k ++ ) {
174
+ /* T (3H isotope) first because it has higher weight */
175
+ num[T_NUM_NO_ISOTOPIC+k] += at->num_iso_H[NUM_H_ISOTOPES-k-1];
176
+ }
177
+ }
178
+ return nMobile;
179
+ }
180
+ /********************************************************************************************************/
181
+ void AddAtom2DA( AT_RANK num_DA[], inp_ATOM *atom, int at_no, int bSubtract )
182
+ { /* bSubtract: 0=> add, 1=>subtract, 2=> fill */
183
+ inp_ATOM *at = atom + at_no;
184
+ int nDelta, nAcidic_O;
185
+
186
+ if (at->charge < -1 || at->charge == 1 && !at->c_point || at->charge > 1 )
187
+ return;
188
+
189
+ nDelta = ( bSubtract == 1 )? -1 : 1;
190
+
191
+ /* "Acidic" O, S, Se, Te recognition */
192
+ if ( at->at_type & ATT_ACIDIC_CO ) {
193
+ nAcidic_O = nDelta;
194
+ } else {
195
+ nAcidic_O = 0;
196
+ }
197
+
198
+ if ( bSubtract == 2 ) { /* 2: fill, otherwise add */
199
+ memset( num_DA, 0, TG_NUM_DA * sizeof(num_DA[0]) );
200
+ }
201
+ if ( at->charge <= 0 && at->valence == at->chem_bonds_valence ||
202
+ /* neutral or negative donor */
203
+ at->charge > 0 && at->valence + 1 == at->chem_bonds_valence
204
+ /* positively charged donor */
205
+ ) {
206
+ if ( at->charge < 0 ) {
207
+ num_DA[TG_Num_dM] += nDelta;
208
+ num_DA[TG_Num_dO] += nAcidic_O;
209
+ } else
210
+ if ( at->num_H ) {
211
+ num_DA[TG_Num_dH] += nDelta;
212
+ num_DA[TG_Num_dO] += nAcidic_O;
213
+ }
214
+ } else
215
+ if ( at->charge <= 0 && at->valence + 1 == at->chem_bonds_valence ||
216
+ at->charge > 0 && at->valence + 2 == at->chem_bonds_valence ) {
217
+ /* acceptor */
218
+ if ( at->charge < 0 ) {
219
+ num_DA[TG_Num_aM] += nDelta;
220
+ } else
221
+ if ( at->num_H ) {
222
+ num_DA[TG_Num_aH] += nDelta;
223
+ } else {
224
+ num_DA[TG_Num_aO] += nAcidic_O; /* acidic O-acceptor has no H or charge */
225
+ }
226
+ }
227
+ return;
228
+ }
229
+ /********************************************************************************************************/
230
+ int AddEndPoint( T_ENDPOINT *pEndPoint, inp_ATOM *at, int iat )
231
+ {
232
+ pEndPoint->nAtomNumber = iat;
233
+ pEndPoint->nEquNumber = 0;
234
+ pEndPoint->nGroupNumber = at[iat].endpoint;
235
+ if ( at[iat].endpoint ) {
236
+ /* already an endpoint */
237
+ memset( pEndPoint->num, 0, sizeof(pEndPoint->num) );
238
+ } else {
239
+ /* not an endpoint yet, make it an endpoint */
240
+ AddAtom2num( pEndPoint->num, at, iat, 2 ); /* fill */
241
+ AddAtom2DA( pEndPoint->num_DA, at, iat, 2 );
242
+ /*
243
+ nMobile = pEndPoint->num[1] = (at[iat].charge == -1);
244
+ nMobile = pEndPoint->num[0] = at[iat].num_H + nMobile;
245
+ for ( k = 0; k < T_NUM_ISOTOPIC; k ++ ) {
246
+ pEndPoint->num[T_NUM_NO_ISOTOPIC+k] = at[iat].num_iso_H[NUM_H_ISOTOPES-k-1];
247
+ }
248
+ */
249
+ }
250
+ return 0;
251
+ }
252
+ /********************************************************************************************************/
253
+ int nGetEndpointInfo( inp_ATOM *atom, int iat, ENDPOINT_INFO *eif )
254
+ {
255
+ int nEndpointValence;
256
+ int nMobile;
257
+ S_CHAR cChargeSubtype;
258
+
259
+ if ( atom[iat].radical && atom[iat].radical != RADICAL_SINGLET )
260
+ return 0; /* a radical */
261
+ if ( !(nEndpointValence = get_endpoint_valence( atom[iat].el_number )) )
262
+ return 0; /* not an endpoint */
263
+ if ( nEndpointValence <= atom[iat].valence )
264
+ return 0; /* not an endpoint, for example >N(+)< or >N< or >O(+)- or >O- or >N- or -O- */
265
+
266
+ if ( atom[iat].charge == -1 || atom[iat].charge == 0 ) {
267
+ /* not a positive charge-point */
268
+ if ( nEndpointValence < atom[iat].chem_bonds_valence )
269
+ return 0; /* abnormal valence > standard endpoint valence */
270
+ nMobile = atom[iat].num_H + (atom[iat].charge == -1);
271
+ if ( nMobile + atom[iat].chem_bonds_valence != nEndpointValence )
272
+ return 0; /* non-standard endpoint valence */
273
+ switch ( atom[iat].chem_bonds_valence - atom[iat].valence ) {
274
+ case 0:
275
+ eif->cDonor = 1;
276
+ eif->cAcceptor = 0;
277
+ break;
278
+ case 1:
279
+ eif->cDonor = 0;
280
+ eif->cAcceptor = 1;
281
+ break;
282
+ default:
283
+ return 0;
284
+ }
285
+ eif->cMobile = nMobile;
286
+ eif->cNeutralBondsValence = nEndpointValence-nMobile;
287
+ eif->cMoveableCharge = 0;
288
+ return nEndpointValence;
289
+ } else
290
+ if ( atom[iat].c_point &&
291
+ 0 <= GetChargeType( atom, iat, &cChargeSubtype ) &&
292
+ ((int)cChargeSubtype & (C_SUBTYPE_H_ACCEPT|C_SUBTYPE_H_DONOR))
293
+ ) {
294
+ /* charge-point */
295
+ if ( cChargeSubtype & C_SUBTYPE_H_ACCEPT ) {
296
+ eif->cDonor = 0;
297
+ eif->cAcceptor = 1;
298
+ } else
299
+ if ( cChargeSubtype & C_SUBTYPE_H_DONOR ) {
300
+ eif->cDonor = 1;
301
+ eif->cAcceptor = 0;
302
+ } else {
303
+ return 0;
304
+ }
305
+ eif->cMobile = atom[iat].num_H;
306
+ eif->cNeutralBondsValence = nEndpointValence-atom[iat].num_H;
307
+ eif->cMoveableCharge = atom[iat].charge;
308
+ return nEndpointValence;
309
+ }
310
+ return 0;
311
+ }
312
+ /********************************************************************************************************/
313
+ /* RegisterEndPoints ret>0 => new registration happened, 0 => no changes, -1 => program error (debug) */
314
+ int RegisterEndPoints( T_GROUP_INFO *t_group_info, /* T_GROUP *t_group, int *pnum_t, int max_num_t,*/
315
+ T_ENDPOINT *EndPoint, int nNumEndPoints, inp_ATOM *at, int num_atoms, C_GROUP_INFO *cgi
316
+ , struct BalancedNetworkStructure *pBNS )
317
+ {
318
+ T_GROUP *t_group = t_group_info->t_group;
319
+ int *pnum_t = &t_group_info->num_t_groups;
320
+ int max_num_t = t_group_info->max_num_t_groups;
321
+ int nNumZeroEqu, nNumNewTGroups;
322
+ AT_NUMB group, prev_group, prev_eqnum, nNextGroupNumber, nLeastGroupNumber;
323
+ int nNumGroups, num_t, difference;
324
+ int i, j, k, ret;
325
+ AT_NUMB nNewTgNumberStackArray[MAX_STACK_ARRAY_LEN+1];
326
+ AT_NUMB nGroupNumberStackArray[MAX_STACK_ARRAY_LEN+1];
327
+ AT_NUMB nGroupNewNumberStackArray[MAX_STACK_ARRAY_LEN+1];
328
+ AT_NUMB *nNewTgNumber = nNewTgNumberStackArray;
329
+ AT_NUMB *nGroupNumber = nGroupNumberStackArray;
330
+ AT_NUMB *nGroupNewNumber = nGroupNewNumberStackArray;
331
+
332
+ if ( nNumEndPoints <= 0 )
333
+ return 0; /* nothing to do */
334
+ num_t = *pnum_t;
335
+ difference = 0;
336
+ nNextGroupNumber = 0;
337
+ nNumZeroEqu = 0;
338
+ ret = 0;
339
+ /* find max group number; increment it to obtain next available group number */
340
+ for ( i = 0; i < num_t; i ++ ) {
341
+ if ( nNextGroupNumber < t_group[i].nGroupNumber )
342
+ nNextGroupNumber = t_group[i].nGroupNumber;
343
+ }
344
+ nNextGroupNumber ++;
345
+
346
+ /* find min non-zero group number nLeastGroupNumber;
347
+ count zero EndPoint[i].nEquNumber
348
+ if all EndPoint[i].nGroupNumber are equal and non-zero then exit: nothing to do.
349
+ */
350
+ nLeastGroupNumber = nNextGroupNumber;
351
+ prev_group = EndPoint[0].nGroupNumber;
352
+ prev_eqnum = EndPoint[0].nEquNumber;
353
+ for ( i = j = k = 0; i < nNumEndPoints; i ++ ) {
354
+ if ( group = EndPoint[i].nGroupNumber ) {
355
+ if ( group < nLeastGroupNumber ) {
356
+ nLeastGroupNumber = group;
357
+ }
358
+ }
359
+ j += (prev_group == EndPoint[i].nGroupNumber); /* count endpoints that belong to the 1st group */
360
+ k += (prev_eqnum == EndPoint[i].nEquNumber); /* count endpoints that belongo to a group equivalent to the 1st group */
361
+ nNumZeroEqu += !EndPoint[i].nEquNumber; /* count endpoints that have been processed by FindAccessibleEndPoints() */
362
+ }
363
+ if ( j == nNumEndPoints && prev_group && k == nNumEndPoints ) {
364
+ /* all endpoints already belong to one t-group;
365
+ the last comparison is not needed for now
366
+ because EndPoint[i].nEquNumber cannot make
367
+ endpont partitioning finer
368
+ */
369
+ return 0;
370
+ }
371
+
372
+ nNumNewTGroups = 0;
373
+
374
+ if ( !nNumZeroEqu ) {
375
+ /* EndPoint[] has been processed by FindAccessibleEndPoints;
376
+ * equal EndPoint[i].nEquNumber mark endpoints belonging to
377
+ * the same t-group
378
+ * Since now the next available t-group number, nNextGroupNumber,
379
+ * is known,replace fict. IDs assigned by FindAccessibleEndPoints
380
+ * with correct new t-group numbers.
381
+ */
382
+ for ( i = 0; i < nNumEndPoints; i ++ ) {
383
+ if ( (group = EndPoint[i].nEquNumber) >= nNextGroupNumber ) {
384
+ /* replace fict. IDs assigned by FindAccessibleEndPoints() with new t-group numbers */
385
+ /* these fict. IDs have values = (num_atoms+1), (num_atoms+2),...; they may be non-contiguous */
386
+ for ( j = 0; j < nNumNewTGroups; j ++ ) {
387
+ if ( group == nGroupNewNumber[j] )
388
+ break;
389
+ }
390
+ if ( j == nNumNewTGroups ) {
391
+ /* found new fict. ID = group */
392
+ if ( j == MAX_STACK_ARRAY_LEN && nGroupNewNumber == nGroupNewNumberStackArray ) {
393
+ /* stack array overflow; allocate more memory than may be needed */
394
+ nGroupNewNumber = (AT_NUMB *)inchi_malloc(nNumEndPoints*sizeof(nGroupNewNumber[0]));
395
+ if ( !nGroupNewNumber ) {
396
+ ret = -1;
397
+ goto exit_function;
398
+ }
399
+ memcpy( nGroupNewNumber, nGroupNewNumberStackArray, nNumNewTGroups*sizeof(nGroupNewNumber[0]));
400
+ }
401
+ /* save newly found fict. t-group ID to compare to the next values of EndPoint[].nEquNumber */
402
+ nGroupNewNumber[j] = group;
403
+ nNumNewTGroups ++;
404
+ }
405
+ EndPoint[i].nEquNumber = nNextGroupNumber + j;
406
+ }
407
+ } /* after this point the values just stored in nGroupNewNumber[] will not
408
+ be used. However, the obtained nNumNewTGroups value will be used */
409
+ } else
410
+ if ( nNumZeroEqu == nNumEndPoints ) {
411
+ /* EndPoint[] has NOT been processed by FindAccessibleEndPoints;
412
+ all atoms and t-groups to which endpoints belong should be merged into a single t-group
413
+ */
414
+ if ( nLeastGroupNumber == nNextGroupNumber ) {
415
+ /* flag to create a new t-group: none of the found
416
+ * endpoints belong to an already known t-group
417
+ */
418
+ nNumNewTGroups = 1; /* otherwise 0 */
419
+ }
420
+ /* All EndPoint[*].nEquNumber are zeroes. All endpoints will
421
+ * belong to one new or old t-group; its ID is nLeastGroupNumber.
422
+ * Set EndPoint[i].nEquNumber = nLeastGroupNumber;
423
+ */
424
+ for ( i = 0; i < nNumEndPoints; i ++ ) {
425
+ EndPoint[i].nEquNumber = nLeastGroupNumber;
426
+ }
427
+ } else {
428
+ ret = -1; /* program error: only some of EndPoint[i].nEquNumber are zero */ /* <BRKPT> */
429
+ goto exit_function;
430
+ }
431
+
432
+ if ( nNumNewTGroups ) {
433
+ /* create new nNumNewTGroups t-group(s) */
434
+ if ( num_t + nNumNewTGroups > max_num_t ) {
435
+ ret = -1; /* found too many t-groups */ /* <BRKPT> */
436
+ goto exit_function;
437
+ }
438
+ /* initialize new t-group(s) */
439
+ memset( t_group + num_t, 0, nNumNewTGroups * sizeof(t_group[0]) );
440
+ for ( i = 0; i < nNumNewTGroups; i ++ ) {
441
+ t_group[num_t+i].nGroupNumber = nNextGroupNumber + i;
442
+ }
443
+ }
444
+
445
+ /* At this point:
446
+ * EndPoint[i].nGroupNumber == 0 => the endpoint atom does not belong to a t-group yet
447
+ * EndPoint[i].nGroupNumber > 0 => current t-group ID of the endpoint atom
448
+ * EndPoint[i].nEquNumber --> new ID of a tautomeric group of this endpoint atom
449
+ * EndPoint[i].nAtomNumber --> number of the endpoint atom
450
+ */
451
+
452
+ nNumGroups = 0; /* counts the groups to be renumbered */
453
+ for ( i = j = 0; i < nNumEndPoints; i ++ ) {
454
+ if ( group = EndPoint[i].nGroupNumber ) {
455
+ if ( group == EndPoint[i].nEquNumber ) {
456
+ continue; /* ignore: the endpoint belongs to the same t-group as before */
457
+ }
458
+ /* save information for renumbering of the existing t-groups */
459
+ for ( j = 0; j < nNumGroups; j ++ ) {
460
+ if ( group == nGroupNumber[j] ) {
461
+ if ( EndPoint[i].nEquNumber != nGroupNewNumber[j] ) {
462
+ ret = -1; /* program error */ /* <BRKPT> */
463
+ goto exit_function;
464
+ }
465
+ break;
466
+ }
467
+ }
468
+ if ( j == nNumGroups ) {
469
+ /* discovered a new t-group number; store it together with its nEquNumber */
470
+ if ( j == MAX_STACK_ARRAY_LEN ) {
471
+ if ( nGroupNewNumber == nGroupNewNumberStackArray ) {
472
+ nGroupNewNumber = (AT_NUMB *)inchi_malloc(nNumEndPoints*sizeof(nGroupNewNumber[0]));
473
+ if ( !nGroupNewNumber ) {
474
+ ret = -1;
475
+ goto exit_function;
476
+ }
477
+ memcpy( nGroupNewNumber, nGroupNewNumberStackArray, nNumGroups*sizeof(nGroupNewNumber[0]));
478
+ }
479
+ if ( nGroupNumber == nGroupNumberStackArray ) {
480
+ nGroupNumber = (AT_NUMB *)inchi_malloc(nNumEndPoints*sizeof(nGroupNumber[0]));
481
+ if ( !nGroupNumber ) {
482
+ ret = -1;
483
+ goto exit_function;
484
+ }
485
+ memcpy( nGroupNumber, nGroupNumberStackArray, nNumGroups*sizeof(nGroupNumber[0]));
486
+ }
487
+ }
488
+
489
+ nGroupNumber[j] = group; /* old t-group ID */
490
+ nGroupNewNumber[j] = EndPoint[i].nEquNumber; /* new t-group ID */
491
+ nNumGroups ++;
492
+ }
493
+ } else {
494
+ /* add a new endpoint to the newly created or previously existing t-groups */
495
+ group = EndPoint[i].nEquNumber;
496
+ if ( group >= nNextGroupNumber ) {
497
+ /* get index of a new t-group from equ number */
498
+ j = num_t + group - nNextGroupNumber; /* newly assigned IDs are contiguous */
499
+ } else {
500
+ /* old t-group */
501
+ if ( j >= num_t || group != t_group[j].nGroupNumber ) {
502
+ /* search only if j is not a needed group index */
503
+ for ( j = 0; j < num_t; j ++ ) {
504
+ if ( group == t_group[j].nGroupNumber )
505
+ break;
506
+ }
507
+ if ( j == num_t ) {
508
+ ret = -1; /* program error: t-group not found */ /* <BRKPT> */
509
+ goto exit_function;
510
+ }
511
+ }
512
+ }
513
+ /* add aton to existing or new t-group */
514
+ t_group[j].nNumEndpoints ++;
515
+ for ( k = 0; k < (int)(sizeof(t_group->num)/sizeof(t_group->num[0])); k ++ )
516
+ t_group[j].num[k] += EndPoint[i].num[k];
517
+ for ( k = 0; k < (int)(sizeof(t_group->num_DA)/sizeof(t_group->num_DA[0])); k ++ )
518
+ t_group[j].num_DA[k] += EndPoint[i].num_DA[k];
519
+ /* mark endpoint */
520
+ at[EndPoint[i].nAtomNumber].endpoint = group;
521
+ difference ++;
522
+ }
523
+ }
524
+
525
+ difference += nNumGroups;
526
+ num_t += nNumNewTGroups;
527
+ if ( !difference ) {
528
+ ret = 0; /* nothing to do. Not necessarily a program error: happens if all EndPoint[i].nGroupNumber==EndPoint[i].nEquNumber */
529
+ goto exit_function;
530
+ }
531
+
532
+ if ( nNumGroups ) {
533
+ /* prepare for renumbering: find max t-group number */
534
+ for ( i = 0, nNextGroupNumber = 0; i < num_t; i ++ ) {
535
+ if ( nNextGroupNumber < t_group[i].nGroupNumber ) {
536
+ nNextGroupNumber = t_group[i].nGroupNumber;
537
+ }
538
+ }
539
+ }
540
+ /* renumber and merge t-groups */
541
+ for ( i = 0; i < nNumGroups; i ++ ) {
542
+ int i1, i2;
543
+ AT_NUMB group1 = nGroupNumber[i];
544
+ AT_NUMB group2 = nGroupNewNumber[i];
545
+ /* add group1 to group2, then delete group1. */
546
+ for ( j = 0, i1 = i2 = -1; j < num_t && (i1 < 0 || i2 < 0); j ++ ) {
547
+ if ( i1 < 0 && group1 == t_group[j].nGroupNumber )
548
+ i1 = j;
549
+ if ( i2 < 0 && group2 == t_group[j].nGroupNumber )
550
+ i2 = j;
551
+ }
552
+ if ( i1 < 0 || i2 < 0 ) {
553
+ ret = -1; /* program error */ /* <BRKPT> */
554
+ goto exit_function;
555
+ }
556
+ /* add t_group[i1] to t_group[i2] and remove t_group[i1] */
557
+ for ( k = 0; k < (int)(sizeof(t_group->num)/sizeof(t_group->num[0])); k ++ )
558
+ t_group[i2].num[k] += t_group[i1].num[k];
559
+ for ( k = 0; k < (int)(sizeof(t_group->num_DA)/sizeof(t_group->num_DA[0])); k ++ )
560
+ t_group[i2].num_DA[k] += t_group[i1].num_DA[k];
561
+ t_group[i2].nNumEndpoints += t_group[i1].nNumEndpoints;
562
+ num_t --;
563
+ if ( num_t > i1 ) {
564
+ memmove( t_group+i1, t_group+i1+1, ( num_t - i1)*sizeof(t_group[0]) );
565
+ }
566
+ }
567
+
568
+ if ( nNumGroups ) {
569
+ /* there are groups to merge */
570
+ if ( nNextGroupNumber >= MAX_STACK_ARRAY_LEN ) {
571
+ nNewTgNumber = (AT_NUMB *)inchi_malloc((nNextGroupNumber+1)*sizeof(*nNewTgNumber));
572
+ if ( !nNewTgNumber ) {
573
+ ret = -1;
574
+ goto exit_function; /* error: out of RAM */
575
+ }
576
+ }
577
+ memset( nNewTgNumber, 0, (nNextGroupNumber+1)*sizeof(*nNewTgNumber) );
578
+ for ( i = 0; i < num_t; i ++ ) {
579
+ nNewTgNumber[t_group[i].nGroupNumber] = i+1; /* new t-group numbers */
580
+ }
581
+ for ( j = 0; j < nNumGroups; j ++ ) {
582
+ if ( !nNewTgNumber[nGroupNumber[j]] && nNewTgNumber[nGroupNewNumber[j]] ) {
583
+ nNewTgNumber[nGroupNumber[j]] = nNewTgNumber[nGroupNewNumber[j]];
584
+ } else {
585
+ ret = -1; /* program error: all new numbers must have been marked */
586
+ goto exit_function;
587
+ }
588
+ }
589
+ /* renumber t-groups */
590
+ for ( i = 0; i < num_t; i ++ ) {
591
+ t_group[i].nGroupNumber = nNewTgNumber[t_group[i].nGroupNumber];
592
+ }
593
+ #if( bRELEASE_VERSION != 1 )
594
+ /* Check: debug only */
595
+ for ( i = 1; i < num_t; i ++ ) {
596
+ if ( 1 != t_group[i].nGroupNumber - t_group[i-1].nGroupNumber ) {
597
+ ret = -1; /* debug */
598
+ goto exit_function;
599
+ }
600
+ }
601
+ #endif
602
+ /* renumber endpoints */
603
+ for ( i = 0; i < num_atoms; i ++ ) {
604
+ if ( group = at[i].endpoint ) {
605
+ if ( !(at[i].endpoint = nNewTgNumber[group]) || nNextGroupNumber <= nNewTgNumber[group] ) {
606
+ ret = -1; /* program error */
607
+ goto exit_function;
608
+ }
609
+ }
610
+ }
611
+ }
612
+ if ( nNewTgNumber != nNewTgNumberStackArray ) {
613
+ inchi_free( nNewTgNumber );
614
+ nNewTgNumber = nNewTgNumberStackArray;
615
+ }
616
+ if ( nGroupNumber != nGroupNumberStackArray ) {
617
+ inchi_free(nGroupNumber);
618
+ nGroupNumber = nGroupNumberStackArray;
619
+ }
620
+ if ( nGroupNewNumber != nGroupNewNumberStackArray ) {
621
+ inchi_free( nGroupNewNumber );
622
+ nGroupNewNumber = nGroupNewNumberStackArray;
623
+ }
624
+ if ( !t_group_info->tGroupNumber ) {
625
+ t_group_info->tGroupNumber = (AT_NUMB *)inchi_malloc(2*max_num_t*sizeof(t_group_info->tGroupNumber[0]));
626
+ if ( !t_group_info->tGroupNumber ) {
627
+ ret = -1;
628
+ goto exit_function;
629
+ }
630
+ }
631
+ /* fill out t-group index 2004-02-27 */
632
+ memset( t_group_info->tGroupNumber, 0, 2*max_num_t*sizeof(t_group_info->tGroupNumber[0]) );
633
+ for ( i = 0; i < num_t; i ++ ) {
634
+ if ( t_group[i].nNumEndpoints && t_group[i].nGroupNumber )
635
+ t_group_info->tGroupNumber[t_group[i].nGroupNumber] = i+1;
636
+ }
637
+
638
+ if ( pBNS && (pBNS->tot_st_cap == pBNS->tot_st_flow || ALWAYS_ADD_TG_ON_THE_FLY) ) {
639
+ T_GROUP_INFO tgi;
640
+ int ret_bns;
641
+ memset( &tgi, 0, sizeof(tgi) );
642
+ tgi.num_t_groups = num_t;
643
+ tgi.t_group = t_group;
644
+ /* reinitialize BN Structure */
645
+ ret_bns = ReInitBnStruct( pBNS, at, num_atoms, 0 );
646
+ if ( IS_BNS_ERROR( ret_bns ) ) {
647
+ return ret_bns;
648
+ }
649
+ if ( *pBNS->pbTautFlags & TG_FLAG_MOVE_POS_CHARGES ) {
650
+ /* set new charge groups */
651
+ ret_bns = AddCGroups2BnStruct( pBNS, at, num_atoms, cgi );
652
+ if ( IS_BNS_ERROR( ret_bns ) ) {
653
+ return ret_bns;
654
+ }
655
+ }
656
+ /* set new tautomeric groups */
657
+ ret_bns = AddTGroups2BnStruct( pBNS, at, num_atoms, &tgi );
658
+ if ( IS_BNS_ERROR( ret_bns ) ) {
659
+ return ret_bns;
660
+ }
661
+ }
662
+
663
+ *pnum_t = num_t;
664
+ return difference;
665
+
666
+ exit_function:
667
+ if ( nNewTgNumber != nNewTgNumberStackArray ) {
668
+ inchi_free( nNewTgNumber );
669
+ }
670
+ if ( nGroupNumber != nGroupNumberStackArray ) {
671
+ inchi_free(nGroupNumber);
672
+ }
673
+ if ( nGroupNewNumber != nGroupNewNumberStackArray ) {
674
+ inchi_free( nGroupNewNumber );
675
+ }
676
+ return ret;
677
+ }
678
+ /*******************************************************************************************************
679
+ * change non-alternating and non-tautomeric bonds
680
+ * (that is, single and double bonds) to tautomeric
681
+ */
682
+ int SetTautomericBonds( inp_ATOM *at, int nNumBondPos, T_BONDPOS *BondPos )
683
+ {
684
+ int k, n;
685
+ for ( k = n = 0; k < nNumBondPos; k ++ ) {
686
+ int neighbor_index = BondPos[k].neighbor_index;
687
+ int center = BondPos[k].nAtomNumber;
688
+ int bond_mark = at[center].bond_type[neighbor_index];
689
+ int bond_type = bond_mark & ~BOND_MARK_ALL;
690
+ int neighbor;
691
+ #if( REPLACE_ALT_WITH_TAUT == 1 )
692
+ if ( bond_type != BOND_TAUTOM )
693
+ #else
694
+ if ( bond_type != BOND_ALTERN && bond_type != BOND_TAUTOM )
695
+ #endif
696
+ {
697
+ int ii;
698
+ /* change bond type to BOND_TAUTOM presering higher bits marks */
699
+ bond_type = (bond_mark & BOND_MARK_ALL) | BOND_TAUTOM;
700
+ /* change center-neighbor bond */
701
+ at[center].bond_type[neighbor_index] = bond_type;
702
+ neighbor = at[center].neighbor[neighbor_index];
703
+ for ( ii = 0; ii < at[neighbor].valence; ii ++ ) {
704
+ if ( at[neighbor].neighbor[ii] == center ) {
705
+ /* neighbor-center bond found */
706
+ at[neighbor].bond_type[ii] = bond_type;
707
+ break;
708
+ }
709
+ }
710
+ n ++;
711
+ }
712
+ }
713
+ return n;
714
+ }
715
+
716
+ /********************************************************************************************************/
717
+ int GetNeutralRepsIfNeeded( AT_NUMB *pri, AT_NUMB *prj, inp_ATOM *at, int num_atoms, T_ENDPOINT *EndPoint, int nNumEndPoints, C_GROUP_INFO *cgi )
718
+ {
719
+ AT_NUMB ri = *pri;
720
+ AT_NUMB rj = *prj;
721
+ int i, k;
722
+ AT_NUMB c_point, endpoint, r;
723
+
724
+ if ( (c_point = at[ri].c_point) && (c_point == at[rj].c_point) &&
725
+ (at[ri].charge == 1 || at[rj].charge == 1) && cgi && cgi->num_c_groups > 0 ) {
726
+ /* at[ri] and at[rj] belong to the same charge group, at least one is charged */
727
+ for ( k = 0; k < cgi->num_c_groups; k ++ ) {
728
+ if ( cgi->c_group[k].nGroupNumber == c_point ) {
729
+ /* cgi->c_group[k] is found to be this charge group */
730
+ if ( cgi->c_group[k].num_CPoints - cgi->c_group[k].num[0] < 2 ) {
731
+ /* Only one neutral in the c-group: we will not be able to neutralize both
732
+ when looking for the alt path to discover the tautomerism.
733
+ Therefore we need to find a neutral t-group representative */
734
+ /* at[rj] */
735
+ if ( endpoint = at[rj].endpoint ) {
736
+ for ( i = 0; i < nNumEndPoints; i ++ ) {
737
+ if ( (r=EndPoint[i].nAtomNumber) == *prj )
738
+ continue; /* ignore at[*prj] */
739
+ if ( at[r].endpoint != endpoint )
740
+ continue; /* at[r] does not belong to the same t-group as at[*prj]; ignore the atom */
741
+ if ( !at[r].c_point ) {
742
+ rj = r; /* found a neutral t-group representative */
743
+ break;
744
+ }
745
+ if ( at[r].c_point != c_point && c_point == at[rj].c_point ) {
746
+ /* replace only once because of (c_point == at[rj].c_point) condition */
747
+ rj = r;
748
+ }
749
+ }
750
+ if ( rj == *prj /*&& at[ri].endpoint*/ ) {
751
+ /* !!! "&& at[ri].endpoint": only between 2 t-groups 2004-02-27;
752
+ the change disabled due to undiscovered yet possibility of ambiguity*/
753
+ /* no replacement has been found in EndPoint[]; try all atoms in the t-group */
754
+ for ( i = 0; i < num_atoms; i ++ ) {
755
+ if ( at[i].endpoint != endpoint )
756
+ continue;
757
+ if ( i == (int)*prj )
758
+ continue;
759
+ if ( !at[i].c_point ) {
760
+ rj = (AT_NUMB)i; /* found neutral t-group representative */
761
+ break;
762
+ }
763
+ if ( at[i].c_point != c_point && c_point == at[rj].c_point ) {
764
+ /* replace only once */
765
+ rj = (AT_NUMB)i;
766
+ }
767
+ }
768
+ }
769
+ }
770
+ /* at[ri] */
771
+ if ( endpoint = at[ri].endpoint ) {
772
+ for ( i = 0; i < nNumEndPoints; i ++ ) {
773
+ if ( (r=EndPoint[i].nAtomNumber) == *pri )
774
+ continue;
775
+ if ( at[r].endpoint != endpoint )
776
+ continue;
777
+ if ( !at[r].c_point ) {
778
+ ri = r; /* found neutral t-group representative */
779
+ break;
780
+ }
781
+ if ( at[r].c_point != c_point && c_point == at[ri].c_point &&
782
+ at[r].c_point != at[rj].c_point ) {
783
+ /* replace only once */
784
+ ri = r;
785
+ }
786
+ }
787
+ if ( ri == *pri && at[rj].endpoint ) {
788
+ /* !!! "&& at[rj].endpoint": only between 2 t-groups 2004-02-27;
789
+ the change disabled due to undiscovered yet possibility of ambiguity */
790
+ for ( i = 0; i < num_atoms; i ++ ) {
791
+ if ( at[i].endpoint != endpoint )
792
+ continue;
793
+ if ( i == (int)*pri )
794
+ continue;
795
+ if ( !at[i].c_point ) {
796
+ ri = (AT_NUMB)i; /* found neutral t-group representative */
797
+ break;
798
+ }
799
+ if ( at[i].c_point != c_point && c_point == at[ri].c_point &&
800
+ at[i].c_point != at[rj].c_point) {
801
+ /* replace only once */
802
+ ri = (AT_NUMB)i;
803
+ }
804
+ }
805
+ }
806
+ }
807
+
808
+ }
809
+ }
810
+ break;
811
+ }
812
+ *prj = rj;
813
+ *pri = ri;
814
+ }
815
+ return 0;
816
+ }
817
+
818
+ /********************************************************************************************************/
819
+ int FindAccessibleEndPoints( T_ENDPOINT *EndPoint, int *nNumEndPoints, T_BONDPOS *BondPos, int *nNumBondPos,
820
+ struct BalancedNetworkStructure *pBNS, struct BalancedNetworkData *pBD,
821
+ inp_ATOM *at, int num_atoms, C_GROUP_INFO *cgi )
822
+ {
823
+ AT_NUMB nTGroupRepresenative[MAXVAL], nTGroupEqu[MAXVAL], nTGEndPointNo[MAXVAL], ri, rj;
824
+ AT_NUMB nCurTGroupNumber, nMaxTGroupNumber, nNumTgroupNumbers, nMaxEquNumber;
825
+ int i, j, k, nNumDiffTGroupNumbers = 0, nNumFoundEqu, nErr;
826
+
827
+ if ( *nNumEndPoints != *nNumBondPos )
828
+ return 0;
829
+ /* collect all group numbers. Fill EndPoint[i].nEquNumber */
830
+ for ( i = 0; i < *nNumEndPoints; i ++ ) {
831
+ nCurTGroupNumber = EndPoint[i].nEquNumber = EndPoint[i].nGroupNumber; /* initial equivalence */
832
+ if ( nCurTGroupNumber ) {
833
+ /* found endpoint that already belongs to a t-group */
834
+ for ( j = 0; j < nNumDiffTGroupNumbers; j ++ ) {
835
+ if ( nTGroupEqu[j] == nCurTGroupNumber )
836
+ break;
837
+ }
838
+ if ( j == nNumDiffTGroupNumbers ) {
839
+ nTGroupRepresenative[nNumDiffTGroupNumbers] = EndPoint[i].nAtomNumber;
840
+ nTGroupEqu[nNumDiffTGroupNumbers] = EndPoint[i].nGroupNumber;
841
+ nTGEndPointNo[nNumDiffTGroupNumbers] = i;
842
+ nNumDiffTGroupNumbers ++;
843
+ }
844
+ }
845
+ }
846
+
847
+
848
+ /* check whether each pair belongs to the same t-group and establish the equivalence(s) */
849
+ for ( i = 0, nNumFoundEqu=0; i < nNumDiffTGroupNumbers; i ++ ) {
850
+ for ( j = i+1; j < nNumDiffTGroupNumbers; j ++ ) {
851
+ ri = nTGroupRepresenative[i];
852
+ rj = nTGroupRepresenative[j];
853
+ /* both at[ri] and at[rj] are known to belong to tautomeric groups */
854
+ GetNeutralRepsIfNeeded( &ri, &rj, at, num_atoms, EndPoint, *nNumEndPoints, cgi );
855
+ nErr = bExistsAnyAltPath( pBNS, pBD, at, num_atoms, ri, rj, ALT_PATH_MODE_TAUTOM );
856
+ if ( IS_BNS_ERROR(nErr) )
857
+ return nErr;
858
+ if ( 0 == nErr )
859
+ continue; /* alt path between at[ri] and at[rj] not found */
860
+ nCurTGroupNumber = inchi_min( nTGroupEqu[i], nTGroupEqu[j] );
861
+ nMaxTGroupNumber = inchi_max( nTGroupEqu[i], nTGroupEqu[j] );
862
+ for ( k = 0; k < nNumDiffTGroupNumbers; k ++ ) {
863
+ if ( nTGroupEqu[k]==nMaxTGroupNumber ) {
864
+ nTGroupEqu[k] = nCurTGroupNumber;
865
+ nNumFoundEqu ++;
866
+ }
867
+ }
868
+ for ( k = 0; k < *nNumEndPoints; k ++ ) {
869
+ if ( EndPoint[k].nEquNumber == nMaxTGroupNumber ) {
870
+ EndPoint[k].nEquNumber = nCurTGroupNumber;
871
+ }
872
+ }
873
+ }
874
+ }
875
+ if ( nNumFoundEqu ) {
876
+ /* leave in only non-equivalent representatives */
877
+ for ( i = 1, k = 0; i < nNumDiffTGroupNumbers; i ++ ) {
878
+ for ( j = 0; j < i; j ++ ) {
879
+ if ( nTGroupEqu[j] == nTGroupEqu[i] ) {
880
+ nTGroupEqu[i] = 0; /* i > j; mark equivalent for removal*/
881
+ break;
882
+ }
883
+ }
884
+ }
885
+ for ( i = j = 0; i < nNumDiffTGroupNumbers; i ++ ) {
886
+ if ( nTGroupEqu[i] ) {
887
+ if ( i != j ) { /* remove the marked */
888
+ nTGroupEqu[j] = nTGroupEqu[i];
889
+ nTGroupRepresenative[j] = nTGroupRepresenative[i];
890
+ nTGEndPointNo[j] = nTGEndPointNo[i];
891
+ }
892
+ j ++;
893
+ }
894
+ }
895
+ nNumDiffTGroupNumbers = j; /* number of known t-group representatives */
896
+ }
897
+ /* collect endpoints that have not been assigned to t-groups */
898
+ for ( i = 0, j = nNumDiffTGroupNumbers; i < *nNumEndPoints; i ++ ) {
899
+ if ( EndPoint[i].nEquNumber )
900
+ continue;
901
+ nTGroupEqu[j] = 0;
902
+ nTGroupRepresenative[j] = EndPoint[i].nAtomNumber;
903
+ nTGEndPointNo[j] = i;
904
+ j ++;
905
+
906
+ }
907
+ nNumTgroupNumbers = j;
908
+ nMaxEquNumber = num_atoms + 1; /* impossible atom or t-group number */
909
+
910
+ /* check whether each pair belongs to the same group and establish the equivalence(s) */
911
+ for ( i = 0, nNumFoundEqu=0; i < nNumTgroupNumbers; i ++ ) {
912
+ for ( j = i+1; j < nNumTgroupNumbers; j ++ ) {
913
+ if ( nTGroupEqu[i] != nTGroupEqu[j] && (i>=nNumDiffTGroupNumbers || j>=nNumDiffTGroupNumbers) ||
914
+ /* equivalence of a t-group and a non-t-group atom */
915
+ !nTGroupEqu[i] && !nTGroupEqu[j]
916
+ /* equivalence of two non-t-group atoms */
917
+ ) {
918
+ ri = nTGroupRepresenative[i];
919
+ rj = nTGroupRepresenative[j];
920
+ /*------------------------------!!!---------------------------------------------
921
+ Explanation why GetNeutralRepsIfNeeded() may need to be changed 2004-02-27
922
+ The change has been disabled due to undiscovered yet possibility of ambiguity
923
+ to search for neutral only among EndPoint[] in case taut-not_taut pairs
924
+
925
+ Counterexample: O=C-NH(+)=C-NH2
926
+ 1 2 3
927
+
928
+ Has already been found: 2-3 (+)-charge exchange
929
+ 1-2 tautomerism (charge removed to 3)
930
+ Now testing: 2-3 tautomerism. If not commented out,
931
+ GetNeutralRepsIfNeeded() would replace 2-3 test with 1-3 test because:
932
+ o Charge group has only one neutral and both 2 and 3 belong to it,
933
+ therefore we cannot neutralize both; search for neutral representative;
934
+ o Since 1 and 2 belong to the same t-group and 1 is neutral,
935
+ test 1-3 instead of 2-3.
936
+ This breaks our condition:
937
+ Test tautomeric H movement only between neutral atoms.
938
+ -----------------------------------------------------------------------------*/
939
+ GetNeutralRepsIfNeeded( &ri, &rj, at, num_atoms, EndPoint, *nNumEndPoints, cgi );
940
+
941
+ nErr = bExistsAnyAltPath( pBNS, pBD, at, num_atoms, ri, rj, ALT_PATH_MODE_TAUTOM );
942
+ if ( IS_BNS_ERROR(nErr) )
943
+ return nErr;
944
+ if ( nErr <= 0 )
945
+ continue;
946
+ if ( nTGroupEqu[i] && nTGroupEqu[j] ) {
947
+ /* found equivalence of two t-groups; at least one of them must be a new one */
948
+ nCurTGroupNumber = inchi_min( nTGroupEqu[i], nTGroupEqu[j] );
949
+ nMaxTGroupNumber = inchi_max( nTGroupEqu[i], nTGroupEqu[j] );
950
+ for ( k = 0; k < nNumTgroupNumbers; k ++ ) {
951
+ if ( nTGroupEqu[k]==nMaxTGroupNumber ) {
952
+ nTGroupEqu[k] = nCurTGroupNumber;
953
+ nNumFoundEqu ++;
954
+ }
955
+ }
956
+ for ( k = 0; k < *nNumEndPoints; k ++ ) {
957
+ if ( EndPoint[k].nEquNumber == nMaxTGroupNumber ) {
958
+ EndPoint[k].nEquNumber = nCurTGroupNumber;
959
+ }
960
+ }
961
+ } else
962
+ if ( nTGroupEqu[i] ) { /* extend existing t-group */
963
+ nTGroupEqu[j] = nTGroupEqu[i];
964
+ EndPoint[nTGEndPointNo[j]].nEquNumber = nTGroupEqu[i];
965
+
966
+ } else
967
+ if ( nTGroupEqu[j] ) { /* extend existing t-group */
968
+ nTGroupEqu[i] = nTGroupEqu[j];
969
+ EndPoint[nTGEndPointNo[i]].nEquNumber = nTGroupEqu[j];
970
+
971
+ } else { /* establis a new t-group */
972
+ nTGroupEqu[i] =
973
+ nTGroupEqu[j] = nMaxEquNumber; /* assign a fict. ID to establish equivalence */
974
+ EndPoint[nTGEndPointNo[i]].nEquNumber =
975
+ EndPoint[nTGEndPointNo[j]].nEquNumber = nMaxEquNumber;
976
+ nMaxEquNumber ++;
977
+ }
978
+ }
979
+ }
980
+ }
981
+ /* eliminate endpoints and bonds that do not belong to t-group(s)
982
+ (they have not been found connected by an alt path to any other endpoint)
983
+ */
984
+ for ( i = 0, j = 0; i < *nNumEndPoints; i ++ ) {
985
+ if ( EndPoint[i].nEquNumber ) {
986
+ #if( IGNORE_SINGLE_ENDPOINTS == 1 ) /* 1-28-2003 */
987
+ for ( k = 0, nNumFoundEqu = 0; k < *nNumEndPoints; k ++ ) {
988
+ nNumFoundEqu += (EndPoint[i].nEquNumber == EndPoint[k].nEquNumber);
989
+ }
990
+ if ( nNumFoundEqu <= 1 ) { /* one time it is equal to itself when i == k above */
991
+ /* if EndPoint[i] is not "equivalent" to any other EndPoint then ignore it */
992
+ continue;
993
+ }
994
+ #endif
995
+ if ( i != j ) { /* save endpoints that are found to be connected to other endpoints by alt paths */
996
+ EndPoint[j] = EndPoint[i];
997
+ BondPos[j] = BondPos[i];
998
+ }
999
+ j ++;
1000
+ }
1001
+ }
1002
+
1003
+ #if( IGNORE_SINGLE_ENDPOINTS != 1 ) /* 1-28-2003 */
1004
+ /* Do not allow a centerpoint to have only one tautomeric bond */
1005
+ /* Hack: we may have only one centerpoint */
1006
+ /* BondPos[*].nAtomNumber are centerpoints */
1007
+ if ( j == 1 ) {
1008
+ /* check if there exist other centerpoint neighbors
1009
+ * connected to it by another tautomeric-bond
1010
+ */
1011
+ for ( i = 0, k = 0; i < at[BondPos[0].nAtomNumber].valence; i ++ ) {
1012
+ k += ( i != BondPos[0].neighbor_index &&
1013
+ BOND_TAUTOM == (at[BondPos[0].nAtomNumber].bond_type[i] & ~BOND_MARK_ALL));
1014
+ }
1015
+ if ( !k ) {
1016
+ j = 0;
1017
+ }
1018
+ }
1019
+ #endif
1020
+
1021
+ *nNumEndPoints = *nNumBondPos = j;
1022
+ return j;
1023
+
1024
+ }
1025
+
1026
+ /*#if( MOVE_CHARGES == 1 ) */ /* { */
1027
+ /********************************************************************************************************/
1028
+
1029
+ /**********************************************/
1030
+ /* */
1031
+ /* definitions for positive ion recognition */
1032
+ /* */
1033
+ /**********************************************/
1034
+
1035
+ typedef struct tagChargeType { /* meaning see in bCanBeACPoint() */
1036
+ char elname[3];
1037
+ S_CHAR charge;
1038
+ S_CHAR neutral_valence;
1039
+ S_CHAR neutral_bonds_valence; /* valence of a neutral atom */
1040
+ S_CHAR cChangeValence; /* charge increases valence by this value */
1041
+ S_CHAR cChargeType; /* different types are treated separately */
1042
+ S_CHAR num_bonds; /* added 02-06-2005 */
1043
+ } CHARGE_TYPE;
1044
+
1045
+ CHARGE_TYPE CType[] = {
1046
+ { "N\0", 1, 3, 3, 1, 0, 0 },
1047
+ { "P\0", 1, 3, 3, 1, 1, 0 },
1048
+ #if( ADD_MOVEABLE_O_PLUS == 1 )
1049
+ { "O\0", 1, 2, 2, 1, 2, 2 }, /* added 02-06-2005 */
1050
+ { "S\0", 1, 2, 2, 1, 3, 2 }, /* added 03-18-2005 */
1051
+ { "Se", 1, 2, 2, 1, 4, 2 }, /* added 03-18-2005 */
1052
+ { "Te", 1, 2, 2, 1, 5, 2 }, /* added 03-18-2005 */
1053
+ #endif
1054
+ };
1055
+
1056
+ /* bits */
1057
+
1058
+ #define C_SUBTYPE_CHARGED 0
1059
+ #define C_SUBTYPE_p_DONOR 1 /* new */
1060
+ #define C_SUBTYPE_p_ACCEPT 2 /* new */
1061
+ #define C_SUBTYPE_H_ACCEPT 4
1062
+ #define C_SUBTYPE_H_DONOR 8
1063
+ #define C_SUBTYPE_NEUTRAL 16
1064
+
1065
+ /* make sure any C_SUBTYPE_CHARGED_... < any C_SUBTYPE_NEUTRAL_... */
1066
+ /* charged */
1067
+ #define C_SUBTYPE_CHARGED_NON_TAUT (C_SUBTYPE_CHARGED)
1068
+ #define C_SUBTYPE_CHARGED_p_DONOR (C_SUBTYPE_CHARGED|C_SUBTYPE_p_DONOR)
1069
+ #define C_SUBTYPE_CHARGED_H_ACCEPT (C_SUBTYPE_CHARGED|C_SUBTYPE_H_ACCEPT)
1070
+ #define C_SUBTYPE_CHARGED_H_ACCEPT_p_DONOR (C_SUBTYPE_CHARGED|C_SUBTYPE_H_ACCEPT|C_SUBTYPE_p_DONOR)
1071
+ #define C_SUBTYPE_CHARGED_H_DONOR (C_SUBTYPE_CHARGED|C_SUBTYPE_H_DONOR |C_SUBTYPE_p_DONOR)
1072
+ /* neutral */
1073
+ #define C_SUBTYPE_NEUTRAL_NON_TAUT (C_SUBTYPE_NEUTRAL)
1074
+ #define C_SUBTYPE_NEUTRAL_H_ACCEPT (C_SUBTYPE_NEUTRAL|C_SUBTYPE_H_ACCEPT)
1075
+ #define C_SUBTYPE_NEUTRAL_H_ACCEPT_p_ACCEPT (C_SUBTYPE_NEUTRAL|C_SUBTYPE_H_ACCEPT|C_SUBTYPE_p_ACCEPT)
1076
+ #define C_SUBTYPE_NEUTRAL_H_DONOR (C_SUBTYPE_NEUTRAL|C_SUBTYPE_H_DONOR)
1077
+
1078
+ #define NUM_C_TYPES (int)(sizeof( CType )/sizeof(CType[0]))
1079
+
1080
+
1081
+ /********************************************************************************************************/
1082
+ int bCanBeACPoint( inp_ATOM *at, S_CHAR cCharge, S_CHAR cChangeValence, S_CHAR neutral_bonds_valence,
1083
+ S_CHAR neutral_valence, S_CHAR nEndpointValence, S_CHAR *cChargeSubtype )
1084
+ {
1085
+ int nChangeValence;
1086
+ int nNumBonds;
1087
+ int nBondsValence;
1088
+ int bNegCharge = (at->charge == -1); /* add fict. bonds to (-) 2004-02-24*/
1089
+ if ( at->charge == cCharge && at->valence == at->chem_bonds_valence && at->num_H ) {
1090
+ /* proton donors candidates >NH(+)-, >NH2(+), -NH3(+), >OH(+), -OH2(+) */
1091
+ /* charged, added p-transfer -- 01-28-2004 */
1092
+ nChangeValence = at->charge * cChangeValence; /* +1 or -1; currently only +1 */
1093
+ nBondsValence = at->chem_bonds_valence + at->num_H;
1094
+ if ( nBondsValence == neutral_bonds_valence + nChangeValence && nEndpointValence ) {
1095
+ *cChargeSubtype = C_SUBTYPE_CHARGED_p_DONOR; /* ignore Phosphorus p-donors for now */
1096
+ }
1097
+ return 0;
1098
+ } else
1099
+ if ( at->charge == cCharge && at->valence < at->chem_bonds_valence ) {
1100
+ /* the requirement at->valence < at->chem_bonds_valence rejects
1101
+ candidates >NH(+)-, >NH2(+), -NH3(+), >N(+)<, >OH(+), -OH2(+), >O(+)-
1102
+ Moveable charge requires double bonds; these ions have no double bonds
1103
+ */
1104
+
1105
+ /* charged */
1106
+ nChangeValence = at->charge * cChangeValence; /* +1 or -1; currently only +1 */
1107
+ nBondsValence = at->chem_bonds_valence + at->num_H;
1108
+ nNumBonds = at->valence + at->num_H;
1109
+ if ( nBondsValence == neutral_bonds_valence + nChangeValence ) { /* known valence */
1110
+ if ( nNumBonds == neutral_valence ) {
1111
+ /* non-tautomeric: >N(+)=, =O(+)-
1112
+ possibly tautomeric donor: =NH(+)-, =NH2(+), =OH(+) */
1113
+ if ( at->valence == neutral_valence || !nEndpointValence ) {
1114
+ /* non-tautomeric: >N(+)=, =O(+)-; any suitable P+: >P(+)=, =PH(+)-, =PH2(+) */
1115
+ *cChargeSubtype = C_SUBTYPE_CHARGED_NON_TAUT;
1116
+ } else {
1117
+ /* possibly tautomeric donor: =NH(+)-, =NH2(+), =OH(+) */
1118
+ *cChargeSubtype = C_SUBTYPE_CHARGED_H_DONOR;
1119
+ }
1120
+ return 1;
1121
+ }
1122
+ if ( nNumBonds == neutral_valence - 1 ) {
1123
+ /* possibly tutomeric acceptor: =N(+)=, #N(+)-, #NH(+), #O(+) */
1124
+ if ( nEndpointValence ) {
1125
+ *cChargeSubtype = at->num_H? C_SUBTYPE_CHARGED_H_ACCEPT_p_DONOR : C_SUBTYPE_CHARGED_H_ACCEPT;
1126
+ } else {
1127
+ /* =P(+)=, #P(+)-, #PH(+) */
1128
+ *cChargeSubtype = C_SUBTYPE_CHARGED_NON_TAUT;
1129
+ }
1130
+ return 1; /* charge type, charged */
1131
+ }
1132
+ }
1133
+
1134
+ } else
1135
+ if ( at->charge == 0 || bNegCharge ) {
1136
+ /* neutral atom or anion, all bonds are single */
1137
+ nBondsValence = at->chem_bonds_valence + at->num_H + bNegCharge; /* add fict. bonds to (-) 2004-02-24*/
1138
+ nNumBonds = at->valence + at->num_H + bNegCharge; /* add fict. bonds to (-) 2004-02-24*/
1139
+ if ( nBondsValence == neutral_bonds_valence ) {
1140
+ if ( nNumBonds == neutral_valence ) {
1141
+ /* only single bonds: >N-, >NH, -NH2, -O-, -OH, >P- >PH -PH2 */
1142
+ /* >N(-), -NH(-), -O(-). >P(-) -PH(-) */
1143
+ if ( at->valence == neutral_valence || !nEndpointValence ) {
1144
+ /* >N-, -O-, any P(3 single bonds): >P- >PH -PH2 */
1145
+ *cChargeSubtype = C_SUBTYPE_NEUTRAL_NON_TAUT;
1146
+ } else
1147
+ if ( at->valence < neutral_valence /*&& nEndpointValence */ ) {
1148
+ /* num_H > 0: >NH -NH2 -OH */
1149
+ /* num_H = 0: none C_SUBTYPE_NEUTRAL_H_ACCEPT for now */
1150
+ *cChargeSubtype = at->num_H? C_SUBTYPE_NEUTRAL_H_DONOR: C_SUBTYPE_NEUTRAL_H_ACCEPT;
1151
+ } else {
1152
+ return 0;
1153
+ }
1154
+ return 1; /* charge type, neutral */
1155
+ }
1156
+ if ( nNumBonds == neutral_valence - 1 ) {
1157
+ /* possibly tautomeric acceptor =N-, =NH, =O or non-taut =P-, =PH */
1158
+ if ( nEndpointValence ) {
1159
+ /* =N-, =NH, =O */
1160
+ *cChargeSubtype = C_SUBTYPE_NEUTRAL_H_ACCEPT_p_ACCEPT;
1161
+ } else {
1162
+ /* =P-, =PH */
1163
+ *cChargeSubtype = C_SUBTYPE_NEUTRAL_NON_TAUT;
1164
+ }
1165
+ return 1; /* charge type, (+) => neutral */
1166
+ }
1167
+ }
1168
+ }
1169
+ return 0;
1170
+ }
1171
+ /********************************************************************************************************/
1172
+ int GetChargeType( inp_ATOM *atom, int iat, S_CHAR *cChargeSubtype )
1173
+ {
1174
+ int i, n;
1175
+ S_CHAR nEndpointValence;
1176
+ inp_ATOM *at = atom + iat;
1177
+
1178
+ *cChargeSubtype = 0;
1179
+ /* ignore ion pairs and charges != 1 */
1180
+ if ( abs(at->charge) == 1 ) {
1181
+ for ( i = 0; i < at->valence; i ++ ) {
1182
+ n = at->neighbor[i];
1183
+ /* allow negatively charged tautomeric neighbors 2004-02-26 */
1184
+ if ( abs(atom[n].charge + at->charge) < abs(atom[n].charge - at->charge) && !atom[n].endpoint ) {
1185
+ return -1; /* charges have different signs */
1186
+ }
1187
+ }
1188
+ } else
1189
+ if ( at->charge ) {
1190
+ return -1; /* abs(charge) != 1 */
1191
+ }
1192
+ /* find candidates */
1193
+ for ( i = 0; i < NUM_C_TYPES; i ++ ) {
1194
+ if ( !strcmp( at->elname, CType[i].elname ) &&
1195
+ (!CType[i].num_bonds || CType[i].num_bonds==at->valence && at->nNumAtInRingSystem >= 5) ) {
1196
+ nEndpointValence = (S_CHAR)get_endpoint_valence(at->el_number );
1197
+ if ( bCanBeACPoint( at, CType[i].charge, CType[i].cChangeValence, CType[i].neutral_bonds_valence,
1198
+ CType[i].neutral_valence, nEndpointValence, cChargeSubtype ) ) {
1199
+ return CType[i].cChargeType;
1200
+ }
1201
+ }
1202
+ }
1203
+ return -1;
1204
+ }
1205
+ /********************************************************************************************************/
1206
+ int CmpCCandidates( const void *a1, const void *a2 )
1207
+ {
1208
+ const C_CANDIDATE *c1 = (const C_CANDIDATE *)a1;
1209
+ const C_CANDIDATE *c2 = (const C_CANDIDATE *)a2;
1210
+ int ret;
1211
+ if ( ret = (int)c1->type - (int)c2->type )
1212
+ return ret;
1213
+ if ( ret = (int)c1->subtype - (int)c2->subtype )
1214
+ return ret;
1215
+ ret = (int)c1->atnumber - (int)c2->atnumber;
1216
+ return ret;
1217
+ }
1218
+ /********************************************************************************************************/
1219
+ int RegisterCPoints( C_GROUP *c_group, int *pnum_c, int max_num_c, T_GROUP_INFO *t_group_info,
1220
+ int point1, int point2, int ctype, inp_ATOM *at, int num_atoms )
1221
+ {
1222
+ int num_c = *pnum_c, i, i1, i2;
1223
+ AT_NUMB nGroupNumber = 0, nNewGroupNumber;
1224
+
1225
+
1226
+ if ( at[point1].c_point == at[point2].c_point ) {
1227
+ if ( at[point1].c_point )
1228
+ return 0;
1229
+ memset( c_group+num_c, 0, sizeof(c_group[0]) );
1230
+ if ( num_c < max_num_c ) {
1231
+ c_group[num_c].num[0] = CHARGED_CPOINT(at,point1) + CHARGED_CPOINT(at, point2);
1232
+ c_group[num_c].num_CPoints += 2;
1233
+ c_group[num_c].cGroupType = ctype;
1234
+ /* get next available c-group number */
1235
+ for ( i = 0; i < num_c; i ++ ) {
1236
+ if ( nGroupNumber < c_group[i].nGroupNumber )
1237
+ nGroupNumber = c_group[i].nGroupNumber;
1238
+ }
1239
+ nGroupNumber ++;
1240
+ c_group[num_c].nGroupNumber =
1241
+ at[point1].c_point =
1242
+ at[point2].c_point = nGroupNumber;
1243
+ *pnum_c = num_c+1;
1244
+ /* count protons */
1245
+ if ( at[point1].num_H ) {
1246
+ c_group[num_c].num[1] ++;
1247
+ } else
1248
+ if ( at[point2].num_H ) {
1249
+ c_group[num_c].num[1] ++;
1250
+ } else
1251
+ if ( (at[point1].endpoint || at[point2].endpoint) && t_group_info && t_group_info->t_group && t_group_info->num_t_groups ) {
1252
+ /* !!! add later !!! */
1253
+ }
1254
+
1255
+
1256
+ return 1;
1257
+ }
1258
+ return BNS_CPOINT_ERR; /* overflow */
1259
+ }
1260
+ if ( at[point1].c_point > at[point2].c_point ) {
1261
+ /* make sure at[point1].c_point < at[point2].c_point */
1262
+ i = point1;
1263
+ point1 = point2;
1264
+ point2 = i;
1265
+ }
1266
+ if ( !at[point1].c_point ) {
1267
+ /* add a new c-endpoint to an existing c-group */
1268
+ nGroupNumber = at[point2].c_point;
1269
+ for ( i = 0; i < num_c; i ++ ) {
1270
+ if ( nGroupNumber == c_group[i].nGroupNumber ) {
1271
+ at[point1].c_point = at[point2].c_point;
1272
+ c_group[i].num_CPoints ++;
1273
+ c_group[i].num[0] += CHARGED_CPOINT(at,point1);
1274
+ return 1;
1275
+ }
1276
+ }
1277
+ return BNS_CPOINT_ERR; /* program error: c-group not found */
1278
+ } else {
1279
+ /* merge two c-groups */
1280
+ nNewGroupNumber = at[point1].c_point;
1281
+ nGroupNumber = at[point2].c_point;
1282
+ for ( i = 0, i1=i2=-1; i < num_c && (i1 < 0 || i2 < 0); i ++ ) {
1283
+ if ( nNewGroupNumber == c_group[i].nGroupNumber ) {
1284
+ i1 = i;
1285
+ continue;
1286
+ }
1287
+ if ( nGroupNumber == c_group[i].nGroupNumber ) {
1288
+ i2 = i;
1289
+ continue;
1290
+ }
1291
+ }
1292
+ if ( i1 < 0 || i2 < 0 ) {
1293
+ return BNS_CPOINT_ERR; /* at least one not found */
1294
+ }
1295
+
1296
+ c_group[i1].num[0] += c_group[i2].num[0];
1297
+ c_group[i1].num_CPoints += c_group[i2].num_CPoints;
1298
+ num_c --;
1299
+ if ( num_c > i2 ) {
1300
+ memmove( c_group+i2, c_group+i2+1, ( num_c - i2)*sizeof(c_group[0]) );
1301
+ }
1302
+ *pnum_c = num_c;
1303
+ /* renumber c-groups */
1304
+ for ( i = 0; i < num_c; i ++ ) {
1305
+ if ( c_group[i].nGroupNumber > nGroupNumber ) {
1306
+ c_group[i].nGroupNumber --;
1307
+ }
1308
+ }
1309
+ /* renumber c-points */
1310
+ for ( i = 0; i < num_atoms; i ++ ) {
1311
+ if ( at[i].c_point > nGroupNumber ) {
1312
+ at[i].c_point --;
1313
+ } else
1314
+ if ( at[i].c_point == nGroupNumber ) {
1315
+ at[i].c_point = nNewGroupNumber;
1316
+ }
1317
+ }
1318
+ return 1;
1319
+ }
1320
+ }
1321
+
1322
+ /********************************************************************************************************/
1323
+ int MarkChargeGroups ( inp_ATOM *at, int num_atoms, C_GROUP_INFO *c_group_info, T_GROUP_INFO *t_group_info,
1324
+ struct BalancedNetworkStructure *pBNS, struct BalancedNetworkData *pBD )
1325
+ {
1326
+
1327
+ int nNumChanges = 0;
1328
+ if ( c_group_info && c_group_info->c_candidate && c_group_info->max_num_candidates > 0 ) {
1329
+ int i, i1, i2, i3, j, num_tested;
1330
+ C_CANDIDATE *c_candidate = c_group_info->c_candidate;
1331
+ int nMaxNumCandidates = c_group_info->max_num_candidates;
1332
+ int nNumCandidates = c_group_info->num_candidates;
1333
+ S_CHAR c_type, c_subtype;
1334
+ int iat1, iat2, ret, nDelta;
1335
+
1336
+ if ( nNumCandidates == -1 ) {
1337
+ nNumCandidates = 0; /* 2004-02-26 they could appear after t-group discovery */
1338
+ /*return 0;*/
1339
+ }
1340
+ if ( nNumCandidates == 0 ) {
1341
+ for ( i = 0, nNumCandidates = 0; i < num_atoms; i ++ ) {
1342
+ if ( 0 <= (c_type = GetChargeType( at, i, &c_subtype )) ) {
1343
+ if ( nNumCandidates >= nMaxNumCandidates ) {
1344
+ return BNS_VERT_EDGE_OVFL;
1345
+ }
1346
+ c_candidate[nNumCandidates].atnumber = i;
1347
+ c_candidate[nNumCandidates].type = c_type;
1348
+ c_candidate[nNumCandidates].subtype = c_subtype;
1349
+ nNumCandidates ++;
1350
+ }
1351
+ }
1352
+ if ( nNumCandidates <= 1 ) {
1353
+ c_group_info->num_candidates = -1; /* no candidate exists */
1354
+ return 0;
1355
+ }
1356
+ }
1357
+ /* sorting keys: (1) atom type (N,P); (2) uncharged=16/charged=0; (3) other;
1358
+ atom-charged-N .... i1
1359
+ ...
1360
+ atom-charged-N
1361
+ atom-neutral-N .... i2
1362
+ ...
1363
+ atom-neutral-N
1364
+ atom-charged-P .... i3 ... i1
1365
+ ...
1366
+ atom-charged-P
1367
+ atom-neutral-P ........... i2
1368
+ ...
1369
+ atom-neutral-P
1370
+ end. ........... i3
1371
+ */
1372
+ qsort( c_candidate, nNumCandidates, sizeof(c_candidate[0]), CmpCCandidates );
1373
+
1374
+ i1 = 0;
1375
+ num_tested = 0;
1376
+ nDelta = 0;
1377
+
1378
+ while ( i1 < nNumCandidates ) {
1379
+
1380
+ /* the the first charged candidate of a new atom type */
1381
+ for ( ; i1 < nNumCandidates && (c_candidate[i1].subtype & C_SUBTYPE_NEUTRAL); i1 ++ )
1382
+ ;
1383
+ if ( i1 == nNumCandidates )
1384
+ break; /* not found */
1385
+
1386
+ /* bypass other charged candidates of the same atom type */
1387
+ for ( i2 = i1+1; i2 < nNumCandidates &&
1388
+ c_candidate[i2].type == c_candidate[i1].type &&
1389
+ !(c_candidate[i2].subtype & C_SUBTYPE_NEUTRAL); i2++ )
1390
+ ;
1391
+ if ( i2 == nNumCandidates )
1392
+ break; /* no neutral candidates */
1393
+
1394
+ /* find next to the last neutral candidate of the same atom type */
1395
+ for ( i3 = i2; i3 < nNumCandidates &&
1396
+ c_candidate[i3].type == c_candidate[i1].type; i3 ++ )
1397
+ ;
1398
+
1399
+ if ( i3 == i2 ) {
1400
+ /* no neutral candidates found */
1401
+ if ( i2 < nNumCandidates ) {
1402
+ i1 = i3;
1403
+ continue; /* move to the next atom type */
1404
+ }
1405
+ break; /* nothing more to do */
1406
+ }
1407
+
1408
+ /* found charged candidates: i1...i2-1; neutral candidates: i2...i3-1 */
1409
+ for ( i = i1; i < i2; i ++ ) {
1410
+ iat1 = c_candidate[i].atnumber;
1411
+ for ( j = i2; j < i3; j ++ ) {
1412
+ /* check alt path at[iat1]=-=-...-at[iat2]; at[iat1] is charged, at[iat2] is neutral */
1413
+ num_tested ++;
1414
+ iat2 = c_candidate[j].atnumber;
1415
+ if ( at[iat1].c_point && at[iat1].c_point == at[iat2].c_point )
1416
+ continue;
1417
+ ret = bExistsAltPath( pBNS, pBD, NULL, at, num_atoms, iat1, iat2, ALT_PATH_MODE_CHARGE );
1418
+ if ( IS_BNS_ERROR( ret ) ) {
1419
+ return ret;
1420
+ }
1421
+ if ( ret & 1 ) {
1422
+ nDelta = (ret & ~3) >> 2;
1423
+ nNumChanges += (ret & 2);
1424
+ ret = RegisterCPoints( c_group_info->c_group, &c_group_info->num_c_groups,
1425
+ c_group_info->max_num_c_groups, t_group_info,
1426
+ iat1, iat2, c_candidate[i1].type, at, num_atoms );
1427
+ if ( IS_BNS_ERROR( ret ) ) {
1428
+ return ret;
1429
+ }
1430
+ if ( nDelta ) {
1431
+ goto quick_exit;
1432
+ }
1433
+
1434
+ }
1435
+ }
1436
+ }
1437
+ i1 = i3;
1438
+ }
1439
+ quick_exit:
1440
+ if ( c_group_info->num_candidates == 0 ) {
1441
+ /* first time: initialize */
1442
+ c_group_info->num_candidates = num_tested? nNumCandidates : -1; /* no candidate exists */
1443
+ }
1444
+
1445
+ }
1446
+ return nNumChanges;
1447
+ }
1448
+
1449
+ /********************************************************************************************************/
1450
+ int GetSaltChargeType( inp_ATOM *at, int at_no, T_GROUP_INFO *t_group_info, int *s_subtype )
1451
+ {
1452
+ static int el_number_C = 0;
1453
+ static int el_number_O = 0;
1454
+ static int el_number_S = 0;
1455
+ static int el_number_Se = 0;
1456
+ static int el_number_Te = 0;
1457
+
1458
+ /*
1459
+ type (returned value):
1460
+ -1 => ignore
1461
+ 0 => oxygen
1462
+ subtype:
1463
+ 1 = SALT_DONOR_H => has H
1464
+ 2 = SALT_DONOR_Neg => has (-) charge
1465
+ 4 = SALT_ACCEPTOR => may be an acceptor of H or (-), but not necessarily
1466
+
1467
+ O-atom should be:
1468
+ - a terminal atom
1469
+ - connected to unsaturated, uncharged, non-radical atom C that has chemical valence 4:
1470
+ H-donors: =CH-OH, =C(-X)-OH
1471
+ possible H-acceptors: -CH=O, >C=O
1472
+ H-acceptors are true if O is tautomeric
1473
+ */
1474
+ int iC, tg, i, type;
1475
+ /* one-time initialization */
1476
+ if ( !el_number_O ) {
1477
+ el_number_C = get_periodic_table_number( "C" );
1478
+ el_number_O = get_periodic_table_number( "O" );
1479
+ el_number_S = get_periodic_table_number( "S" );
1480
+ el_number_Se = get_periodic_table_number( "Se" );
1481
+ el_number_Te = get_periodic_table_number( "Te" );
1482
+ }
1483
+ *s_subtype = 0; /* initialize the output */
1484
+ /* check whether it is a candidate */
1485
+ if ( at[at_no].valence != 1 ||
1486
+ at[at_no].radical && at[at_no].radical != RADICAL_SINGLET ||
1487
+ at[at_no].charge < -1 ||
1488
+ at[at_no].charge > 0 && !at[at_no].c_point ) {
1489
+ return -1;
1490
+ }
1491
+
1492
+ if ( at[at_no].el_number == el_number_O ||
1493
+ at[at_no].el_number == el_number_S ||
1494
+ at[at_no].el_number == el_number_Se ||
1495
+ at[at_no].el_number == el_number_Te ) {
1496
+ type = 0; /* terminal oxygen atom, needs more to be checked... */
1497
+ } else {
1498
+ type = -1; /* ignore this atom */
1499
+ }
1500
+
1501
+ if ( type < 0 ||
1502
+ at[at_no].chem_bonds_valence + at[at_no].num_H !=
1503
+ get_el_valence(at[at_no].el_number, at[at_no].charge, 0) ) {
1504
+ return -1; /* non-standard valence or not an oxygen */
1505
+ }
1506
+
1507
+ iC = at[at_no].neighbor[0];
1508
+
1509
+ #if ( SALT_WITH_PROTONS == 1 )
1510
+ if ( at[iC].el_number != el_number_C ||
1511
+ at[iC].chem_bonds_valence + at[iC].num_H != 4 || /* allow =C(H)-OH or -C(H)=O */
1512
+ at[iC].charge ||
1513
+ at[iC].radical && at[iC].radical != RADICAL_SINGLET ||
1514
+ at[iC].valence == at[iC].chem_bonds_valence ) {
1515
+ return -1; /* oxigen is connected to a wrong atom */
1516
+ }
1517
+ #else
1518
+ if ( at[iC].el_number != el_number_C ||
1519
+ at[iC].num_H ||
1520
+ at[iC].chem_bonds_valence != 4 || /* allow only no H on C */
1521
+ at[iC].charge ||
1522
+ at[iC].radical && at[iC].radical != RADICAL_SINGLET ||
1523
+ at[iC].valence == at[iC].chem_bonds_valence ) {
1524
+ return -1; /* oxigen is connected to a wrong atom */
1525
+ }
1526
+ #endif
1527
+ if ( (tg = at[at_no].endpoint) && t_group_info && t_group_info->t_group ) {
1528
+ /* O-atom is in a tautomeric group */
1529
+ for ( i = 0; i < t_group_info->num_t_groups; i ++ ) {
1530
+ if ( tg == t_group_info->t_group[i].nGroupNumber ) {
1531
+ /*
1532
+ t_group_info->t_group[i].num[0] = number of attached H-atoms and negative charges
1533
+ t_group_info->t_group[i].num[1] = number of attached negative charges
1534
+ */
1535
+ if ( t_group_info->t_group[i].num[0] > t_group_info->t_group[i].num[1] ) {
1536
+ *s_subtype |= SALT_DONOR_H; /* has H */
1537
+ }
1538
+ if ( t_group_info->t_group[i].num[1] ) {
1539
+ *s_subtype |= SALT_DONOR_Neg; /* has (-) */
1540
+ }
1541
+ *s_subtype |= SALT_ACCEPTOR; /* there is always an acceptor in a t-group */
1542
+ return type;
1543
+ }
1544
+ }
1545
+ return -1; /* error: t-group not found */
1546
+ }
1547
+ /* O is not not in a tautomeric group */
1548
+ /* assume valence(O-) < valence(O) < valence(O+) */
1549
+ if ( at[at_no].charge == -1 ) {
1550
+ *s_subtype |= SALT_DONOR_Neg; /* has (-) */
1551
+ }
1552
+ if ( at[at_no].charge <= 0 && at[at_no].num_H ) {
1553
+ *s_subtype |= SALT_DONOR_H; /* has H */
1554
+ }
1555
+ if ( at[at_no].charge == 0 && at[at_no].chem_bonds_valence == 2 ) {
1556
+ *s_subtype |= SALT_ACCEPTOR;
1557
+ }
1558
+ /* since O cannot be a charge point, the following cannot happen: */
1559
+ if ( at[at_no].charge == 1 && at[at_no].c_point && at[at_no].chem_bonds_valence == 2 && at[at_no].num_H ) {
1560
+ *s_subtype |= SALT_DONOR_H; /* has H */
1561
+ }
1562
+ return type;
1563
+ }
1564
+ /********************************************************************************************************/
1565
+ int bDoNotMergeNonTautAtom(inp_ATOM *at, int at_no)
1566
+ {
1567
+ static int el_number_N = 0;
1568
+
1569
+ if ( !el_number_N ) {
1570
+ el_number_N = get_periodic_table_number( "N" );
1571
+ }
1572
+ if ( at[at_no].el_number == el_number_N )
1573
+ {
1574
+ return 1;
1575
+ }
1576
+ return 0;
1577
+ }
1578
+ /********************************************************************************************************/
1579
+ int GetOtherSaltChargeType( inp_ATOM *at, int at_no, T_GROUP_INFO *t_group_info, int *s_subtype, int bAccept_O )
1580
+ {
1581
+ /* static int el_number_C = 0; */
1582
+ /* static int el_number_N = 0; */
1583
+ static int el_number_O = 0;
1584
+ static int el_number_S = 0;
1585
+ static int el_number_Se = 0;
1586
+ static int el_number_Te = 0;
1587
+
1588
+ /*
1589
+ type (returned value):
1590
+ -1 => ignore
1591
+ 1 => not an oxygen
1592
+ subtype:
1593
+ 1 = SALT_DONOR_H => has H
1594
+ 2 = SALT_DONOR_Neg => has (-) charge
1595
+ 4 = SALT_ACCEPTOR => may be an acceptor of H or (-), but not necessarily
1596
+
1597
+ the atom should be:
1598
+ - a tautomeric endpoint atom
1599
+ - connected to possible centerpoint atom
1600
+
1601
+ another description of the atom searched here:
1602
+
1603
+ any possibly tautomeric atom adjacent to a possibly centerpoint
1604
+ that has at least one double bond (possibly if positively charged);
1605
+ if eif.cAcceptor then the bond between the atom and the centerpoint must be possibly double
1606
+ if eif.cAcceptor then the bond must be possibly single
1607
+ Donors that belong to a t-group are also acceptors
1608
+
1609
+
1610
+ */
1611
+ int tg, i, j, type, endpoint_valence, num_centerpoints, bond_type, centerpoint;
1612
+ ENDPOINT_INFO eif;
1613
+ /* one-time initialization */
1614
+ if ( !el_number_O && !bAccept_O ) {
1615
+ /* el_number_C = get_periodic_table_number( "C" ); */
1616
+ /* el_number_N = get_periodic_table_number( "N" ); */
1617
+ el_number_O = get_periodic_table_number( "O" );
1618
+ el_number_S = get_periodic_table_number( "S" );
1619
+ el_number_Se = get_periodic_table_number( "Se" );
1620
+ el_number_Te = get_periodic_table_number( "Te" );
1621
+ }
1622
+ *s_subtype = 0; /* initialize the output */
1623
+ if ( !bAccept_O /* only N */ &&
1624
+ (at[at_no].el_number == el_number_O ||
1625
+ at[at_no].el_number == el_number_S ||
1626
+ at[at_no].el_number == el_number_Se ||
1627
+ at[at_no].el_number == el_number_Te ) ) {
1628
+ return -1; /* we are not looking for oxygen here */
1629
+ }
1630
+
1631
+ type = 1;
1632
+ if ( !(endpoint_valence = nGetEndpointInfo( at, at_no, &eif )) ) {
1633
+ return -1; /* not a possible endpoint */
1634
+ } else {
1635
+ /* at[at_no] is not not in a tautomeric group; use eif previously filled out by nGetEndpointInfo */
1636
+ /* check whether there is adjacent atom-candidate for a centerpoint */
1637
+ num_centerpoints = 0;
1638
+ for ( j = 0; j < at[at_no].valence; j ++ ) {
1639
+ bond_type = (int)at[at_no].bond_type[j] & BOND_TYPE_MASK;
1640
+ centerpoint = (int)at[at_no].neighbor[j]; /* a centerpoint candidate */
1641
+ if ( ( eif.cAcceptor && (bond_type == BOND_DOUBLE ||
1642
+ bond_type == BOND_ALTERN || /* possibly double */
1643
+ bond_type == BOND_ALT12NS ||
1644
+ bond_type == BOND_TAUTOM ) ||
1645
+ eif.cDonor && (bond_type == BOND_SINGLE ||
1646
+ bond_type == BOND_ALTERN || /* possibly single */
1647
+ bond_type == BOND_ALT12NS ||
1648
+ bond_type == BOND_TAUTOM ) ) &&
1649
+ (at[centerpoint].chem_bonds_valence > at[centerpoint].valence ||
1650
+ /* check for possible endpoint added 2004-02-24 */
1651
+ at[centerpoint].chem_bonds_valence == at[centerpoint].valence &&
1652
+ (at[centerpoint].endpoint || at[centerpoint].c_point) /* tautomerism or charge may increment at[centerpoint].chem_bonds_valence*/ ) &&
1653
+ is_centerpoint_elem( at[centerpoint].el_number ) ) {
1654
+ num_centerpoints ++;
1655
+ break; /* at least one possibly centerpoint neighbor has been found */
1656
+ }
1657
+ }
1658
+ if ( !num_centerpoints ) {
1659
+ return -1;
1660
+ }
1661
+ /* moved here from just after "type = 1;" line 2004-02-26 */
1662
+ if ( (tg = at[at_no].endpoint) && t_group_info && t_group_info->t_group ) {
1663
+ /* atom is in a tautomeric group */
1664
+ for ( i = 0; i < t_group_info->num_t_groups; i ++ ) {
1665
+ if ( tg == t_group_info->t_group[i].nGroupNumber ) {
1666
+ /*
1667
+ t_group_info->t_group[i].num[0] = number of attached H-atoms and negative charges
1668
+ t_group_info->t_group[i].num[1] = number of attached negative charges
1669
+ */
1670
+ if ( t_group_info->t_group[i].num[0] > t_group_info->t_group[i].num[1] ) {
1671
+ *s_subtype |= SALT_DONOR_H; /* has H */
1672
+ }
1673
+ if ( t_group_info->t_group[i].num[1] ) {
1674
+ *s_subtype |= SALT_DONOR_Neg; /* has (-) */
1675
+ }
1676
+ *s_subtype |= SALT_ACCEPTOR; /* there is always an acceptor in a t-group */
1677
+ return type;
1678
+ }
1679
+ }
1680
+ return -1; /* error: t-group not found */
1681
+ }
1682
+
1683
+ if ( eif.cAcceptor ) {
1684
+ *s_subtype |= SALT_ACCEPTOR;
1685
+ }
1686
+ if ( eif.cDonor ) {
1687
+ if ( at[at_no].charge == -1 ) {
1688
+ *s_subtype |= SALT_DONOR_Neg; /* has (-) */
1689
+ }
1690
+ if ( at[at_no].num_H ) {
1691
+ *s_subtype |= SALT_DONOR_H; /* has H */
1692
+ }
1693
+ }
1694
+ }
1695
+ return type;
1696
+ }
1697
+ /********************************************************************************************************/
1698
+ int GetOtherSaltType( inp_ATOM *at, int at_no, int *s_subtype )
1699
+ {
1700
+ static int el_number_C = 0;
1701
+ /* static int el_number_N = 0; */
1702
+ /* static int el_number_O = 0; */
1703
+ static int el_number_S = 0;
1704
+ static int el_number_Se = 0;
1705
+ static int el_number_Te = 0;
1706
+
1707
+ /*
1708
+ type (returned value):
1709
+ -1 => ignore
1710
+ 2 => found: SH
1711
+ proton donor -CH2-SH, >CH-SH, >C< S(-)
1712
+ proton acceptor -CH2-S(-), >CH-S(-), >C<
1713
+ subtype:
1714
+ 1 = SALT_DONOR_H => has H
1715
+ 2 = SALT_DONOR_Neg => has (-) charge
1716
+ 4 = SALT_ACCEPTOR => may be an acceptor of H or (-), but not necessarily
1717
+
1718
+ non-O-atom should be:
1719
+ - a tautomeric endpoint atom
1720
+ - connected to possible middle point atom
1721
+ */
1722
+ int type, endpoint_valence, bond_type, centerpoint;
1723
+ ENDPOINT_INFO eif;
1724
+
1725
+ if ( at[at_no].valence != 1 || at[at_no].chem_bonds_valence != 1 ||
1726
+ 1 != (at[at_no].num_H==1) + (at[at_no].charge==-1) ) {
1727
+ return -1;
1728
+ }
1729
+ /* one-time initialization */
1730
+ if ( !el_number_S ) {
1731
+ el_number_C = get_periodic_table_number( "C" );
1732
+ /* el_number_N = get_periodic_table_number( "N" ); */
1733
+ /* el_number_O = get_periodic_table_number( "O" ); */
1734
+ el_number_S = get_periodic_table_number( "S" );
1735
+ el_number_Se = get_periodic_table_number( "Se" );
1736
+ el_number_Te = get_periodic_table_number( "Te" );
1737
+ }
1738
+ *s_subtype = 0; /* initialize the output */
1739
+ if ( !(at[at_no].el_number == el_number_S ||
1740
+ at[at_no].el_number == el_number_Se ||
1741
+ at[at_no].el_number == el_number_Te ) ) {
1742
+ return -1; /* we are not looking for oxygen here */
1743
+ }
1744
+
1745
+ type = 2; /* non-tautomeric p-donor or acceptor: C-SH, C-S(-) */
1746
+
1747
+ if ( !(endpoint_valence = nGetEndpointInfo( at, at_no, &eif )) ||
1748
+ eif.cMoveableCharge && !at[at_no].c_point || !eif.cDonor || eif.cAcceptor ) {
1749
+ return -1; /* not a possible -SH or -S(-) */
1750
+ } else {
1751
+ /* at[at_no] is not not in a tautomeric group; use eif previously filled out by nGetEndpointInfo */
1752
+ /* check whether there is adjacent atom-candidate for a centerpoint */
1753
+ centerpoint = (int)at[at_no].neighbor[0];
1754
+ bond_type = (int)at[at_no].bond_type[0] & BOND_TYPE_MASK;
1755
+ if ( at[centerpoint].el_number != el_number_C ||
1756
+ at[centerpoint].charge ||
1757
+ at[centerpoint].radical && at[centerpoint].radical != RADICAL_SINGLET ||
1758
+ at[centerpoint].valence != at[centerpoint].chem_bonds_valence ) {
1759
+ return -1; /* not a carbon with all single bonds */
1760
+ }
1761
+ if ( at[at_no].num_H == 1 ) {
1762
+ *s_subtype |= SALT_p_DONOR;
1763
+ } else
1764
+ if ( at[at_no].charge == -1 ) {
1765
+ *s_subtype |= SALT_p_ACCEPTOR;
1766
+ } else {
1767
+ return -1;
1768
+ }
1769
+ }
1770
+ return type;
1771
+ }
1772
+
1773
+ /********************************************************************************************************/
1774
+ /* new version: merge all, check alt paths, then unmerge unreachable O-atoms if any */
1775
+ /* Check for oxygen negative charge-H tautomerism (Salts)
1776
+ allowed long-range tautomerism; more than one H or (-) can be moved, for example:
1777
+ HO-C=C-O(-) O=C-C=O
1778
+ / \ / \
1779
+ R R R R
1780
+ | | => | |
1781
+ R' R' R' R'
1782
+ \ / \ /
1783
+ O=C-C=O HO-C=C-O(-)
1784
+
1785
+ To check:
1786
+
1787
+ | |
1788
+ -add all possible HO-C=, O=C, (-)O-C= (including all containing O t-groups) into one t-group;
1789
+ -temporarily disconnect one of previously not belonging to any t-group O-atoms from the one t-group;
1790
+ -find whether there is an alt path allowing H or (-) to migrate
1791
+ from the temp. disconnected O to any one left in the group.
1792
+ If the alt path does not exist then the temp. disconnected atom does not
1793
+ participate in the H/(-) migrartion and it will be unmarked/unmerged.
1794
+
1795
+ */
1796
+ /********************************************************************************************************/
1797
+ int comp_candidates( const void *a1, const void *a2 )
1798
+ {
1799
+ const S_CANDIDATE *s1 = (const S_CANDIDATE *)a1;
1800
+ const S_CANDIDATE *s2 = (const S_CANDIDATE *)a2;
1801
+ int ret;
1802
+ if ( s1->type >= 0 /* enabled < */ && s2->type < 0 /* disabled */ )
1803
+ return -1; /* enabled goes first */
1804
+ if ( s1->type < 0 /* disabled > */ && s2->type >= 0 /* enabled */ )
1805
+ return 1;
1806
+ if ( s1->endpoint && !s2->endpoint )
1807
+ return -1; /* tautomeric goes first; only tautomeric may be disabled */
1808
+ if ( !s1->endpoint && s2->endpoint )
1809
+ return 1; /* tautomeric goes first; only tautomeric may be disabled */
1810
+ if ( s1->endpoint && s2->endpoint && (ret = (int)s1->endpoint - (int)s2->endpoint) ) {
1811
+ return ret;
1812
+ }
1813
+ return (int)s1->atnumber - (int)s2->atnumber;
1814
+ }
1815
+ /********************************************************************************************************/
1816
+ int MarkSaltChargeGroups2 ( inp_ATOM *at, int num_atoms, S_GROUP_INFO *s_group_info,
1817
+ T_GROUP_INFO *t_group_info, C_GROUP_INFO *c_group_info,
1818
+ struct BalancedNetworkStructure *pBNS, struct BalancedNetworkData *pBD )
1819
+ {
1820
+ /* BNS_EDGE_FORBIDDEN_TEMP */
1821
+ #define ALT_PATH_FOUND (MAX_ATOMS+1)
1822
+ #define NO_ENDPOINT (MAX_ATOMS+2) /* the two defines must be different */
1823
+ #define DISABLE_CANDIDATE 10
1824
+ #define cPAIR(a,b) cPair[a+b*nNumLeftCandidates]
1825
+ #define ACCEPTOR_PAIR 1
1826
+ #define DONOR_PAIR 2
1827
+
1828
+ int nNumChanges = 0, nNumOtherChanges = 0, nNumAcidicChanges = 0, nTotNumChanges = 0;
1829
+ S_CHAR *cPair = NULL;
1830
+ T_ENDPOINT *EndPoint = NULL;
1831
+ if ( s_group_info && s_group_info->s_candidate && s_group_info->max_num_candidates > 0 ) {
1832
+ int i, j, i1, j1;
1833
+ S_CANDIDATE *s_candidate = s_group_info->s_candidate;
1834
+ int nMaxNumCandidates = s_group_info->max_num_candidates;
1835
+ int nNumCandidates = s_group_info->num_candidates;
1836
+ int nNumOtherCandidates = s_group_info->num_other_candidates;
1837
+ int nNumPOnlyCandidates = s_group_info->num_p_only_candidates;
1838
+ int nNumLeftCandidates = 0;
1839
+ int nNumMarkedCandidates = 0;
1840
+ int s_type, s_subtype;
1841
+ int ret, nDelta;
1842
+ int bHardAddedRemovedProtons = t_group_info && (t_group_info->tni.bNormalizationFlags & FLAG_FORCE_SALT_TAUT);
1843
+
1844
+ int s_subtype_all = 0;
1845
+ int nDonorPairs, nAcceptorPairs, nCurDonorPairs, nCurAcceptorPairs, bAlreadyTested;
1846
+ /*
1847
+ ENDPOINT_INFO eif;
1848
+ */
1849
+
1850
+ #if( IGNORE_TGROUP_WITHOUT_H == 1 )
1851
+ int bTGroupHasNegativeChargesOnly = 1;
1852
+ #endif
1853
+ /*return 0;*/ /* debug only */
1854
+
1855
+ i1 = -1;
1856
+
1857
+ if ( nNumCandidates <= -2 || !t_group_info || !t_group_info->t_group ) {
1858
+ return 0;
1859
+ }
1860
+ /*************************************************************************/
1861
+ /* find all candidates including those with differen s_type (other type) */
1862
+ /*************************************************************************/
1863
+ for ( i = 0, nNumCandidates = nNumOtherCandidates = nNumPOnlyCandidates = 0; i < num_atoms; i ++ ) {
1864
+ if ( 0 == (s_type = GetSaltChargeType( at, i, t_group_info, &s_subtype )) ||
1865
+ /* -C=O or =C-OH, O = S, Se, Te */
1866
+ 1 == (s_type = GetOtherSaltChargeType( at, i, t_group_info, &s_subtype, 1/* bAccept_O*/ )) ||
1867
+ /* =Z-MH or -Z=M, Z = centerpoint, M = endpoint, other than above */
1868
+ 2 == (s_type = GetOtherSaltType( at, i, &s_subtype ) ) ||
1869
+ ( bHardAddedRemovedProtons && 4 == (s_type = bIsHardRemHCandidate( at, i, &s_subtype ) ) )
1870
+ /* >C-SH, >C-S(-); S=S,Se,Te */
1871
+ ) {
1872
+
1873
+ if ( nNumCandidates >= nMaxNumCandidates ) {
1874
+ return BNS_VERT_EDGE_OVFL;
1875
+ }
1876
+ s_candidate[nNumCandidates].atnumber = i;
1877
+ s_candidate[nNumCandidates].type = s_type;
1878
+ s_candidate[nNumCandidates].subtype = s_subtype;
1879
+ s_candidate[nNumCandidates].endpoint = at[i].endpoint;
1880
+ nNumCandidates ++;
1881
+ nNumOtherCandidates += (1 == s_type);
1882
+ s_subtype_all |= s_subtype;
1883
+ i1 = i; /* save a representative of a tautomeric group */
1884
+ }
1885
+ }
1886
+
1887
+ if ( nNumCandidates <= 1 || /* TG_FLAG_ALLOW_NO_NEGTV_O <=> CHARGED_SALTS_ONLY=0 */
1888
+ !(s_subtype_all & SALT_ACCEPTOR) ||
1889
+ (((t_group_info->bTautFlags & TG_FLAG_ALLOW_NO_NEGTV_O) ||
1890
+ (t_group_info->bTautFlagsDone & TG_FLAG_FOUND_SALT_CHARGES_DONE) ||
1891
+ (t_group_info->tni.bNormalizationFlags & FLAG_FORCE_SALT_TAUT)) ?
1892
+ !(s_subtype_all & (SALT_DONOR)):
1893
+ (!(s_subtype_all & SALT_DONOR_Neg) || nNumOtherCandidates == nNumCandidates ))
1894
+ ) {
1895
+ s_group_info->num_candidates = 0; /* no candidate exists */
1896
+ return 0;
1897
+ }
1898
+ if ( !(s_subtype_all & (SALT_DONOR_Neg) ) ) {
1899
+ t_group_info->bTautFlagsDone |= TG_FLAG_ALLOW_NO_NEGTV_O_DONE;
1900
+ }
1901
+
1902
+ /************************************************************************************/
1903
+ /* Mark redundant candidates so that only one candidate from one t-group is left in */
1904
+ /************************************************************************************/
1905
+ for ( i = 0; i < nNumCandidates; i ++ ) {
1906
+ if ( 2 == s_candidate[nNumCandidates].type ) {
1907
+ s_candidate[i].type -= DISABLE_CANDIDATE; /* disable >C-SH candidates */
1908
+ nNumLeftCandidates ++; /* count rejected */
1909
+ continue;
1910
+ }
1911
+ if ( s_candidate[i].endpoint ) {
1912
+ for ( j = i-1; 0 <= j; j -- ) {
1913
+ if ( s_candidate[i].endpoint == s_candidate[j].endpoint ) {
1914
+ s_candidate[i].type -= DISABLE_CANDIDATE; /* disable subsequent redundant */
1915
+ nNumLeftCandidates ++; /* count rejected */
1916
+ break;
1917
+ }
1918
+ }
1919
+ }
1920
+ }
1921
+ nNumLeftCandidates = nNumCandidates - nNumLeftCandidates; /* subtract num. rejected from the total */
1922
+ s_group_info->num_candidates = 0; /* reinit next time */
1923
+ /*********************************************************************/
1924
+ /* reorder so that all disabled are at the end, tautomeric are first */
1925
+ /*********************************************************************/
1926
+ qsort ( s_candidate, nNumCandidates, sizeof(s_candidate[0]), comp_candidates );
1927
+ cPair = (S_CHAR *)inchi_calloc( nNumLeftCandidates*nNumLeftCandidates, sizeof(cPair[0]) );
1928
+ if ( !cPair ) {
1929
+ /*printf("BNS_OUT_OF_RAM-6\n");*/
1930
+ nTotNumChanges = BNS_OUT_OF_RAM;
1931
+ goto quick_exit;
1932
+ }
1933
+ nDonorPairs = nAcceptorPairs = 0;
1934
+ /**********************************************************************/
1935
+ /* Find whether we have at least one donor pair and one acceptor pair */
1936
+ /**********************************************************************/
1937
+ for ( i = 0; i < nNumLeftCandidates; i ++ ) {
1938
+ nCurDonorPairs = nCurAcceptorPairs = 0;
1939
+ for ( j = 0; j <= i; j ++ ) {
1940
+ if ( i == j && !s_candidate[i].endpoint ) {
1941
+ continue; /* same non-taut atom. However, success for i==j means *
1942
+ * that the whole tautomeric group may donate or accept 2H */
1943
+ }
1944
+ /* check for acceptor pair */
1945
+ if ( (s_candidate[i].subtype & SALT_ACCEPTOR) && (s_candidate[j].subtype & SALT_ACCEPTOR) &&
1946
+ (ret = bExistsAltPath( pBNS, pBD, NULL, at, num_atoms, s_candidate[i].atnumber,
1947
+ s_candidate[j].atnumber, ALT_PATH_MODE_ADD2H_TST ))) {
1948
+ if ( IS_BNS_ERROR( ret ) ) {
1949
+ nTotNumChanges = ret;
1950
+ goto quick_exit;
1951
+ }
1952
+ if ( ret & 1 ) {
1953
+ nDelta = (ret & ~3) >> 2;
1954
+ /*nNumChanges += (ret & 2);*/
1955
+ if ( nDelta ) {
1956
+ /* alt path unleashed previously localized radicals and they annihilated */
1957
+ nNumChanges = 0;
1958
+ nTotNumChanges = BNS_RADICAL_ERR;
1959
+ goto quick_exit;
1960
+ }
1961
+ cPAIR(i,j) |= ACCEPTOR_PAIR; /* the result: mark the pair */
1962
+ /*cPAIR(j,i) |= ACCEPTOR_PAIR;*/
1963
+ }
1964
+ }
1965
+ /* check for donor pair */
1966
+ if ( (s_candidate[i].subtype & SALT_DONOR) && (s_candidate[j].subtype & SALT_DONOR) &&
1967
+ (ret = bExistsAltPath( pBNS, pBD, NULL, at, num_atoms, s_candidate[i].atnumber,
1968
+ s_candidate[j].atnumber, ALT_PATH_MODE_REM2H_TST ))) {
1969
+ if ( IS_BNS_ERROR( ret ) ) {
1970
+ nTotNumChanges = ret;
1971
+ goto quick_exit;
1972
+ }
1973
+ if ( ret & 1 ) {
1974
+ nDelta = (ret & ~3) >> 2;
1975
+ /*nNumChanges += (ret & 2);*/
1976
+ if ( nDelta ) {
1977
+ /* alt path unleashed previously localized radicals and they annihilated */
1978
+ nNumChanges = 0;
1979
+ nTotNumChanges = BNS_RADICAL_ERR;
1980
+ goto quick_exit;
1981
+ }
1982
+ cPAIR(i,j) |= DONOR_PAIR; /* the result: mark the pair */
1983
+ /*cPAIR(j,i) |= ACCEPTOR_PAIR;*/
1984
+ }
1985
+ }
1986
+ /* since the results will be used later to change bonds, check only now */
1987
+ /* when both results for (i,j) have been obtained. */
1988
+ if ( cPAIR(i,j) & ACCEPTOR_PAIR ) {
1989
+ nCurAcceptorPairs ++;
1990
+ if ( nDonorPairs ) {
1991
+ /* find donor pair (i1,j1) such that i!=i1, i!=j1, j!=i1, j!=j1 */
1992
+ for ( i1 = 0; i1 < i; i1 ++ ) {
1993
+ for ( j1 = 0; j1 <= i1; j1 ++ ) {
1994
+ /* here always j1 < i && i1 < i therefore we do not compare i to i1 or j1 */
1995
+ if ( j1 != j && i1 != j && (cPAIR(i1,j1) & DONOR_PAIR) ) {
1996
+ /* both the donor and the acceptor pairs have been found */
1997
+ goto bFound2Pairs;
1998
+ }
1999
+ }
2000
+ }
2001
+ }
2002
+ }
2003
+ if ( cPAIR(i,j) & DONOR_PAIR ) {
2004
+ nCurDonorPairs ++;
2005
+ if ( nAcceptorPairs ) {
2006
+ /* find acceptor pair (i1,j1) such that i!=i1, i!=j1, j!=i1, j!=j1 */
2007
+ for ( i1 = 0; i1 < i; i1 ++ ) {
2008
+ for ( j1 = 0; j1 <= i1; j1 ++ ) {
2009
+ /* here always j1 < i && i1 < i therefore we do not compare i to i1 or j1 */
2010
+ if ( j1 != j && i1 != j && (cPAIR(i1,j1) & ACCEPTOR_PAIR) ) {
2011
+ /* both the donor and the acceptor pairs have been found */
2012
+ goto bFound2Pairs;
2013
+ }
2014
+ }
2015
+ }
2016
+ }
2017
+ }
2018
+ }
2019
+ nDonorPairs += nCurDonorPairs;
2020
+ nAcceptorPairs += nCurAcceptorPairs;
2021
+ }
2022
+ /* nothing has been found */
2023
+ nNumChanges = 0;
2024
+ inchi_free( cPair );
2025
+ cPair = NULL;
2026
+ goto quick_exit;
2027
+
2028
+
2029
+ /* both the donor and the acceptor pairs have been found */
2030
+ bFound2Pairs:
2031
+ /* first, try already found pairs */
2032
+ i1 = i;
2033
+ j1 = j;
2034
+
2035
+ /* Find all possible donor and acceptor pairs */
2036
+ nNumMarkedCandidates = 0;
2037
+ for ( i = 0; i < nNumLeftCandidates; i ++ ) {
2038
+ nCurDonorPairs = nCurAcceptorPairs = 0;
2039
+ for ( j = 0; j <= i; j ++ ) {
2040
+ bAlreadyTested = (i < i1 || i == i1 && j <= j1);
2041
+ if ( bAlreadyTested && (cPAIR(i,j) & ACCEPTOR_PAIR) || !bAlreadyTested ) {
2042
+ /* checking for acceptor pair */
2043
+ if ( (s_candidate[i].subtype & SALT_ACCEPTOR) && (s_candidate[j].subtype & SALT_ACCEPTOR) &&
2044
+ (ret = bExistsAltPath( pBNS, pBD, NULL, at, num_atoms, s_candidate[i].atnumber,
2045
+ s_candidate[j].atnumber, ALT_PATH_MODE_ADD2H_CHG ))) {
2046
+ if ( IS_BNS_ERROR( ret ) ) {
2047
+ nTotNumChanges = ret;
2048
+ goto quick_exit;
2049
+ }
2050
+ if ( ret & 1 ) {
2051
+ nDelta = (ret & ~3) >> 2;
2052
+ nNumChanges += (ret & 2);
2053
+ if ( nDelta ) {
2054
+ /* alt path unleashed previously localized radicals and they annihilated */
2055
+ nNumChanges = 0;
2056
+ nTotNumChanges = BNS_RADICAL_ERR;
2057
+ goto quick_exit;
2058
+ }
2059
+ cPAIR(i,j) |= ACCEPTOR_PAIR;
2060
+ /*cPAIR(j,i) |= ACCEPTOR_PAIR;*/
2061
+ nCurAcceptorPairs += !bAlreadyTested;
2062
+ if ( !(s_candidate[i].subtype & SALT_SELECTED) ) {
2063
+ s_candidate[i].subtype |= SALT_SELECTED;
2064
+ nNumMarkedCandidates ++;
2065
+ if ( !s_candidate[i].endpoint && s_candidate[i].type ) {
2066
+ nNumOtherChanges ++;
2067
+ } else {
2068
+ nNumAcidicChanges ++;
2069
+ }
2070
+ }
2071
+ if ( !(s_candidate[j].subtype & SALT_SELECTED) ) {
2072
+ s_candidate[j].subtype |= SALT_SELECTED;
2073
+ nNumMarkedCandidates ++;
2074
+ if ( !s_candidate[j].endpoint && s_candidate[j].type ) {
2075
+ nNumOtherChanges ++;
2076
+ } else {
2077
+ nNumAcidicChanges ++;
2078
+ }
2079
+ }
2080
+ }
2081
+ }
2082
+ }
2083
+ if ( bAlreadyTested && (cPAIR(i,j) & DONOR_PAIR) || !bAlreadyTested ) {
2084
+ /* checking for donor pair */
2085
+ if ( (s_candidate[i].subtype & SALT_DONOR) && (s_candidate[j].subtype & SALT_DONOR) &&
2086
+ (ret = bExistsAltPath( pBNS, pBD, NULL, at, num_atoms, s_candidate[i].atnumber,
2087
+ s_candidate[j].atnumber, ALT_PATH_MODE_REM2H_CHG ))) {
2088
+ if ( IS_BNS_ERROR( ret ) ) {
2089
+ nTotNumChanges = ret;
2090
+ goto quick_exit;
2091
+ }
2092
+ if ( ret & 1 ) {
2093
+ nDelta = (ret & ~3) >> 2;
2094
+ nNumChanges += (ret & 2);
2095
+ if ( nDelta ) {
2096
+ /* alt path unleashed previously localized radicals and they annihilated */
2097
+ nNumChanges = 0;
2098
+ nTotNumChanges = BNS_RADICAL_ERR;
2099
+ goto quick_exit;
2100
+ }
2101
+ cPAIR(i,j) |= DONOR_PAIR;
2102
+ /*cPAIR(j,i) |= ACCEPTOR_PAIR;*/
2103
+ nCurDonorPairs += !bAlreadyTested;
2104
+ if ( !(s_candidate[i].subtype & SALT_SELECTED) ) {
2105
+ s_candidate[i].subtype |= SALT_SELECTED;
2106
+ nNumMarkedCandidates ++;
2107
+ if ( !s_candidate[i].endpoint && s_candidate[i].type ) {
2108
+ nNumOtherChanges ++;
2109
+ } else {
2110
+ nNumAcidicChanges ++;
2111
+ }
2112
+ }
2113
+ if ( !(s_candidate[j].subtype & SALT_SELECTED) ) {
2114
+ s_candidate[j].subtype |= SALT_SELECTED;
2115
+ nNumMarkedCandidates ++;
2116
+ if ( !s_candidate[j].endpoint && s_candidate[j].type ) {
2117
+ nNumOtherChanges ++;
2118
+ } else {
2119
+ nNumAcidicChanges ++;
2120
+ }
2121
+ }
2122
+ }
2123
+ }
2124
+ }
2125
+ }
2126
+ nDonorPairs += nCurDonorPairs;
2127
+ nAcceptorPairs += nCurAcceptorPairs;
2128
+ }
2129
+ inchi_free( cPair );
2130
+ cPair = NULL;
2131
+
2132
+ if ( nNumMarkedCandidates ) {
2133
+ EndPoint = (T_ENDPOINT *)inchi_calloc( nNumMarkedCandidates, sizeof(EndPoint[0]));
2134
+ if ( !EndPoint ) {
2135
+ /*printf("BNS_OUT_OF_RAM-7\n");*/
2136
+ nTotNumChanges = BNS_OUT_OF_RAM;
2137
+ goto quick_exit;
2138
+ }
2139
+ for ( i = 0, j = 0; i < nNumLeftCandidates; i ++ ) {
2140
+ if ( s_candidate[i].subtype & SALT_SELECTED ) {
2141
+ s_candidate[i].subtype ^= SALT_SELECTED; /* remove the flag */
2142
+ if ( j < nNumMarkedCandidates ) {
2143
+ i1 = s_candidate[i].atnumber; /* save a representative of the t-group to be created */
2144
+ AddEndPoint( EndPoint+j, at, i1 );
2145
+ }
2146
+ j ++;
2147
+ }
2148
+ }
2149
+ if ( j != nNumMarkedCandidates ) {
2150
+ nTotNumChanges = BNS_PROGRAM_ERR;
2151
+ goto quick_exit;
2152
+ }
2153
+ /* merge all marked atoms and their t-groups into one t-group */
2154
+ ret = RegisterEndPoints( t_group_info, EndPoint, nNumMarkedCandidates, at, num_atoms, c_group_info, pBNS );
2155
+ if ( ret == -1 ) {
2156
+ ret = BNS_PROGRAM_ERR;
2157
+ }
2158
+ if ( ret < 0 ) {
2159
+ nTotNumChanges = ret;
2160
+ goto quick_exit;
2161
+ }
2162
+ nTotNumChanges += (ret > 0);
2163
+ inchi_free( EndPoint );
2164
+ EndPoint = NULL;
2165
+
2166
+ if ( nNumMarkedCandidates ) {
2167
+ for ( i = nNumLeftCandidates; i < nNumCandidates; i ++ ) {
2168
+ s_candidate[i].type += DISABLE_CANDIDATE;
2169
+ j1 = s_candidate[i].atnumber;
2170
+ if ( at[j1].endpoint == at[i1].endpoint ) {
2171
+ if ( !s_candidate[i].endpoint && s_candidate[i].type ) {
2172
+ nNumOtherChanges ++;
2173
+ } else {
2174
+ nNumAcidicChanges ++;
2175
+ }
2176
+ }
2177
+ }
2178
+ } else {
2179
+ for ( i = nNumLeftCandidates; i < nNumCandidates; i ++ ) {
2180
+ s_candidate[i].type += DISABLE_CANDIDATE;
2181
+ }
2182
+ }
2183
+
2184
+ /* find whether the new t-group have any movable H */
2185
+ for ( i = 0, bTGroupHasNegativeChargesOnly = 0; i < t_group_info->num_t_groups; i ++ ) {
2186
+ if ( t_group_info->t_group[i].nGroupNumber == at[i1].endpoint &&
2187
+ t_group_info->t_group[i].num[0] == t_group_info->t_group[i].num[1] ) {
2188
+ bTGroupHasNegativeChargesOnly = 1;
2189
+ break;
2190
+ }
2191
+ }
2192
+ }
2193
+ nTotNumChanges = ( nTotNumChanges > 0);
2194
+
2195
+ #if( IGNORE_TGROUP_WITHOUT_H == 1 )
2196
+ if ( nTotNumChanges && bTGroupHasNegativeChargesOnly ) {
2197
+ nTotNumChanges = 2; /* means no moveable H has been affected */
2198
+ }
2199
+ #endif
2200
+ }
2201
+
2202
+ quick_exit:
2203
+ if ( nNumOtherChanges && nTotNumChanges == 1 ) {
2204
+ nTotNumChanges = 5; /* not only acidic atoms merged */
2205
+ }
2206
+ if ( cPair ) {
2207
+ inchi_free( cPair );
2208
+ /*cPair = NULL;*/
2209
+ }
2210
+ if ( EndPoint ) {
2211
+ inchi_free ( EndPoint );
2212
+ /*EndPoint = NULL;*/
2213
+ }
2214
+ return nTotNumChanges; /* 0=>no changes, 1=>new salt tautomerism found, 2=>only new charge tautomerism found */
2215
+ #undef ALT_PATH_FOUND
2216
+ #undef NO_ENDPOINT
2217
+ }
2218
+ /********************************************************************************************************/
2219
+ /* regular one-path version: find alt paths then merge */
2220
+ /* Check for oxygen negative charge-H tautomerism (Salts)
2221
+ allowed long-range tautomerism; only one H or (-) can be moved, for example:
2222
+ HO-C=X-Y=Z-...-C=O => O=C-X=Y-Z=...=C-OH
2223
+ */
2224
+
2225
+ #if ( SALT_WITH_PROTONS == 1 )
2226
+
2227
+ #define MAX_LOCAL_TGNUM 0 /* was 32; disable since it has not been used */
2228
+
2229
+ #if ( MAX_LOCAL_TGNUM > 0 )
2230
+ typedef struct tagTGroupData {
2231
+ S_SHORT nGroupNumber; /* t-group number from t_group_info->t_group->nGroupNumber */
2232
+ S_SHORT nGroupIndex; /* TGroupData[nGroupNumber]nGroupIndex = index of t_group in t_group_info */
2233
+ S_SHORT nDonorM; /* number of endpoint-donors that have negative charge (Minus) */
2234
+ S_SHORT nDonorH; /* number of endpoint-donors that have only H */
2235
+ S_SHORT nAccepM; /* number of endpoint-acceptors that have negative charge (Minus) */
2236
+ S_SHORT nAccepH; /* number of endpoint-acceptors that have H and no negative charge */
2237
+ S_SHORT nAccep0; /* number of endpoint-acceptors that have no H and no negative charge */
2238
+ S_SHORT nDonorA; /* number of acidic endpoint-donors */
2239
+ S_SHORT nAccepS; /* number of acidic endpoint-acceptors */
2240
+ } TGroupData;
2241
+ #endif
2242
+ /********************************************************************************************************/
2243
+ int MarkSaltChargeGroups ( inp_ATOM *at, int num_atoms, S_GROUP_INFO *s_group_info,
2244
+ T_GROUP_INFO *t_group_info, C_GROUP_INFO *c_group_info,
2245
+ struct BalancedNetworkStructure *pBNS, struct BalancedNetworkData *pBD )
2246
+ {
2247
+
2248
+ int nNumChanges = 0, nTotNumChanges = 0;
2249
+ if ( s_group_info && s_group_info->s_candidate && s_group_info->max_num_candidates > 0 ) {
2250
+ int i, i1, i2, j, j1, j2, jj, ii1, ii2, jj1, jj2, /*k,*/ num_tested;
2251
+ S_CANDIDATE *s_candidate = s_group_info->s_candidate;
2252
+ int nMaxNumCandidates = s_group_info->max_num_candidates;
2253
+ int nNumCandidates = s_group_info->num_candidates;
2254
+ int nNumOtherCandidates = s_group_info->num_other_candidates;
2255
+ int nNumPOnlyCandidates = s_group_info->num_p_only_candidates;
2256
+ int s_type, s_subtype;
2257
+ int ret, nDelta, /*nMobile,*/ err = 0;
2258
+ int s_subtype_all = 0;
2259
+ int nGroupNumber;
2260
+ T_ENDPOINT EndPoint[2];
2261
+ #if ( MAX_LOCAL_TGNUM > 0 )
2262
+ TGroupData tgData[MAX_LOCAL_TGNUM];
2263
+ TGroupData *ptgData = tgData;
2264
+ #endif
2265
+ if ( nNumCandidates <= -1 || !t_group_info || !t_group_info->t_group ) {
2266
+ return 0;
2267
+ }
2268
+
2269
+ /* count t-groups */
2270
+ for ( i = 0, nGroupNumber = 0; i < t_group_info->num_t_groups; i ++ ) {
2271
+ if ( nGroupNumber < t_group_info->t_group[i].nGroupNumber ) {
2272
+ nGroupNumber = t_group_info->t_group[i].nGroupNumber; /* max. t-group number */
2273
+ }
2274
+ }
2275
+ #if ( MAX_LOCAL_TGNUM > 0 )
2276
+ /* prepare memory */
2277
+ if ( nGroupNumber >= MAX_LOCAL_TGNUM ) {
2278
+ if ( !( ptgData = (TGroupData*)inchi_calloc( nGroupNumber+1, sizeof(TGroupData) ) ) ) {
2279
+ err = BNS_OUT_OF_RAM;
2280
+ goto quick_exit;
2281
+ }
2282
+ } else {
2283
+ memset( ptgData, 0, sizeof(tgData) );
2284
+ }
2285
+ ptgData[0].nGroupIndex = -1; /* data for non-tautomeric atoms */
2286
+ for ( i = 0, nGroupNumber = 0; i < t_group_info->num_t_groups; i ++ ) {
2287
+ if ( nGroupNumber = t_group_info->t_group[i].nGroupNumber ) {
2288
+ ptgData[nGroupNumber].nGroupIndex = i;
2289
+ ptgData[i].nGroupNumber = nGroupNumber;
2290
+ }
2291
+ }
2292
+ #endif
2293
+ nNumCandidates = 0; /* always recalculate 2004-03-22 */
2294
+
2295
+ if ( nNumCandidates == 0 ) {
2296
+ for ( i = 0, nNumCandidates = nNumOtherCandidates = nNumPOnlyCandidates = 0; i < num_atoms; i ++ ) {
2297
+ if ( 0 == (s_type = GetSaltChargeType( at, i, t_group_info, &s_subtype )) ||
2298
+ /* -C=O or =C-OH, O = S, Se, Te */
2299
+ #if( INCL_NON_SALT_CANDIDATATES == 1 )
2300
+ 1 == (s_type = GetOtherSaltChargeType( at, i, t_group_info, &s_subtype, 1 )) ||
2301
+ /* =Z-MH or -Z=M, Z = centerpoint, M = endpoint, other than above */
2302
+ #endif
2303
+ 2 == (s_type = GetOtherSaltType( at, i, &s_subtype ) )
2304
+ /* >C-SH, >C-S(-); S=S,Se,Te */
2305
+ ) {
2306
+
2307
+ if ( nNumCandidates >= nMaxNumCandidates ) {
2308
+ err = BNS_VERT_EDGE_OVFL;
2309
+ goto quick_exit;
2310
+ }
2311
+ s_candidate[nNumCandidates].atnumber = i;
2312
+ s_candidate[nNumCandidates].type = s_type;
2313
+ s_candidate[nNumCandidates].subtype = s_subtype;
2314
+ s_candidate[nNumCandidates].endpoint = at[i].endpoint;
2315
+ nNumCandidates ++;
2316
+ nNumOtherCandidates += (1 == s_type);
2317
+ nNumPOnlyCandidates += (2 == s_type);
2318
+ s_subtype_all |= s_subtype;
2319
+ /*i1 = i;*/ /* save a representative of a tautomeric group */
2320
+ }
2321
+ }
2322
+
2323
+ /* changes: TG_FLAG_ALLOW_NO_NEGTV_O replaced CHARGED_SALTS_ONLY==0 */
2324
+ if ( nNumCandidates <= 1 ||
2325
+ !(s_subtype_all & SALT_ACCEPTOR) ||
2326
+ (((t_group_info->bTautFlags & TG_FLAG_ALLOW_NO_NEGTV_O)||
2327
+ (t_group_info->bTautFlagsDone & TG_FLAG_FOUND_SALT_CHARGES_DONE) ||
2328
+ (t_group_info->tni.bNormalizationFlags & FLAG_FORCE_SALT_TAUT)) ?
2329
+ !(s_subtype_all & (SALT_DONOR_Neg | SALT_DONOR_H)):
2330
+ (!(s_subtype_all & SALT_DONOR_Neg) || nNumOtherCandidates==nNumCandidates))
2331
+ ) {
2332
+ s_group_info->num_candidates = -1; /* no candidate exists */
2333
+ goto quick_exit;
2334
+ }
2335
+ if ( !(s_subtype_all & (SALT_DONOR_Neg) ) ) {
2336
+ t_group_info->bTautFlagsDone |= TG_FLAG_ALLOW_NO_NEGTV_O_DONE;
2337
+ }
2338
+ } else {
2339
+ for ( i = 0; i < nNumCandidates; i ++ ) {
2340
+ i1 = s_candidate[i].atnumber;
2341
+ if ( 0 <= (s_type = GetSaltChargeType( at, i1, t_group_info, &s_subtype ))
2342
+ #if( INCL_NON_SALT_CANDIDATATES == 1 )
2343
+ || 0 < (s_type = GetOtherSaltChargeType( at, i1, t_group_info, &s_subtype, 1 /* bAccept_O*/ ))
2344
+ #endif
2345
+ ) {
2346
+ s_candidate[nNumCandidates].type = s_type;
2347
+ s_candidate[nNumCandidates].subtype = s_subtype;
2348
+ s_candidate[nNumCandidates].endpoint = at[i1].endpoint;
2349
+ }
2350
+ }
2351
+ }
2352
+ /* Look for alt paths connecting:
2353
+ SALT_DONOR_Neg to SALT_ACCEPTOR : long distance migration of negative charges
2354
+ SALT_DONOR_H to SALT_ACCEPTOR : long distance migration of H-atoms
2355
+ */
2356
+ num_tested = 0;
2357
+ do {
2358
+ nNumChanges = 0;
2359
+ for ( i1 = 0; i1 < nNumCandidates; i1 ++ ) {
2360
+ j1 = s_candidate[i1].atnumber;
2361
+ for ( i2 = i1+1; i2 < nNumCandidates; i2 ++ ) {
2362
+ /* prev. approach: do not test if both candidates are not "salt-type". Disabled 2004-03-18
2363
+ if ( s_candidate[i1].type && s_candidate[i2].type )
2364
+ continue;
2365
+ */
2366
+ j2 = s_candidate[i2].atnumber;
2367
+ if ( at[j1].endpoint && at[j1].endpoint == at[j2].endpoint ) {
2368
+ continue;
2369
+ }
2370
+ for ( j = 0; j < 2; j ++ ) {
2371
+ if ( j ) {
2372
+ ii1 = i2; /* candidate 1 (donor) ordering number */
2373
+ ii2 = i1; /* candidate 2 (acceptor) ordering number */
2374
+ jj1 = j2; /* candidate 1 (donor) atom number */
2375
+ jj2 = j1; /* candidate 2 (acceptor) atom number */
2376
+ } else { /* transposition */
2377
+ ii1 = i1; /* candidate 1 (donor) ordering number */
2378
+ ii2 = i2; /* candidate 2 (acceptor) ordering number */
2379
+ jj1 = j1; /* candidate 1 (donor) atom number */
2380
+ jj2 = j2; /* candidate 2 (acceptor) atom number */
2381
+ }
2382
+
2383
+ if ( ( s_candidate[ii1].subtype & (SALT_DONOR_Neg | SALT_DONOR_H) ) &&
2384
+ ( s_candidate[ii2].subtype & SALT_ACCEPTOR ) ) {
2385
+ ret = bExistsAltPath( pBNS, pBD, NULL, at, num_atoms, jj2, jj1, ALT_PATH_MODE_4_SALT );
2386
+ num_tested ++;
2387
+ if ( IS_BNS_ERROR( ret ) ) {
2388
+ err = ret;
2389
+ goto quick_exit;
2390
+ }
2391
+ if ( ret & 1 ) {
2392
+ nDelta = (ret & ~3) >> 2;
2393
+ nNumChanges += (ret & 2);
2394
+ for ( i = 0; i < 2; i ++ ) {
2395
+ jj = i? jj2 : jj1;
2396
+ AddEndPoint( EndPoint+i, at, jj );
2397
+ }
2398
+ /* add/merge taut groups and reinit pBNS in the fly */
2399
+ ret = RegisterEndPoints( t_group_info,
2400
+ EndPoint, 2, at, num_atoms, c_group_info, pBNS );
2401
+ if ( ret == -1 ) {
2402
+ ret = BNS_PROGRAM_ERR;
2403
+ }
2404
+ if ( ret < 0 ) {
2405
+ err = ret;
2406
+ goto quick_exit;
2407
+ }
2408
+ if ( nDelta ) {
2409
+ err = BNS_RADICAL_ERR;
2410
+ goto quick_exit;
2411
+ }
2412
+ nNumChanges += (ret > 0);
2413
+ break; /* avoid redundant repetition */
2414
+ }
2415
+ }
2416
+ }
2417
+ }
2418
+ }
2419
+ nTotNumChanges += nNumChanges;
2420
+ } while ( num_tested && nNumChanges );
2421
+
2422
+ quick_exit:
2423
+ if ( !err ) {
2424
+ nTotNumChanges += nNumChanges; /* nNumChanges != 0 only in case of 'goto quick_exit' */
2425
+ if ( s_group_info->num_candidates == 0 ) {
2426
+ /* first time: initialize */
2427
+ s_group_info->num_candidates = num_tested? nNumCandidates : -1; /* no candidate exists */
2428
+ }
2429
+ } else {
2430
+ nTotNumChanges = err;
2431
+ }
2432
+ #if ( MAX_LOCAL_TGNUM > 0 )
2433
+ if ( ptgData != tgData ) {
2434
+ inchi_free( ptgData );
2435
+ }
2436
+ #endif
2437
+ }
2438
+ return nTotNumChanges;
2439
+ }
2440
+ #else
2441
+ /********************************************************************************************************/
2442
+ int MarkSaltChargeGroups ( inp_ATOM *at, int num_atoms, S_GROUP_INFO *s_group_info,
2443
+ T_GROUP_INFO *t_group_info, C_GROUP_INFO *c_group_info,
2444
+ struct BalancedNetworkStructure *pBNS, struct BalancedNetworkData *pBD )
2445
+ {
2446
+
2447
+ int nNumChanges = 0, nTotNumChanges = 0;
2448
+ if ( s_group_info && s_group_info->s_candidate && s_group_info->max_num_candidates > 0 ) {
2449
+ int i, i1, i2, j, j1, j2, jj, ii1, ii2, jj1, jj2, k, num_tested;
2450
+ S_CANDIDATE *s_candidate = s_group_info->s_candidate;
2451
+ int nMaxNumCandidates = s_group_info->max_num_candidates;
2452
+ int nNumCandidates = s_group_info->num_candidates;
2453
+ int nNumOtherCandidates = s_group_info->num_other_candidates;
2454
+ int s_type, s_subtype;
2455
+ int ret, nDelta, nMobile;
2456
+ int s_subtype_all = 0;
2457
+ T_ENDPOINT EndPoint[2];
2458
+
2459
+ if ( nNumCandidates <= -1 || !t_group_info || !t_group_info->t_group ) {
2460
+ return 0;
2461
+ } else
2462
+ if ( nNumCandidates == 0 ) {
2463
+ for ( i = 0, nNumCandidates = nNumOtherCandidates = 0; i < num_atoms; i ++ ) {
2464
+ if ( 0 <= (s_type = GetSaltChargeType( at, i, t_group_info, &s_subtype )) ) {
2465
+ if ( nNumCandidates >= nMaxNumCandidates ) {
2466
+ return BNS_VERT_EDGE_OVFL;
2467
+ }
2468
+ s_candidate[nNumCandidates].atnumber = i;
2469
+ s_candidate[nNumCandidates].type = s_type;
2470
+ s_candidate[nNumCandidates].subtype = s_subtype;
2471
+ s_candidate[nNumCandidates].endpoint = at[i].endpoint;
2472
+ nNumCandidates ++;
2473
+ s_subtype_all |= s_subtype;
2474
+ /*i1 = i;*/ /* save a representative of a tautomeric group */
2475
+ }
2476
+ #if( INCL_NON_SALT_CANDIDATATES == 1 )
2477
+ else /* new */
2478
+ if ( 0 < (s_type = GetOtherSaltChargeType( at, i, t_group_info, &s_subtype, 1 /* bAccept_O*/ )) ) {
2479
+ if ( nNumCandidates >= nMaxNumCandidates ) {
2480
+ return BNS_VERT_EDGE_OVFL;
2481
+ }
2482
+ s_candidate[nNumCandidates].atnumber = i;
2483
+ s_candidate[nNumCandidates].type = s_type;
2484
+ s_candidate[nNumCandidates].subtype = s_subtype;
2485
+ s_candidate[nNumCandidates].endpoint = at[i].endpoint;
2486
+ nNumCandidates ++;
2487
+ nNumOtherCandidates ++;
2488
+ s_subtype_all |= s_subtype;
2489
+ }
2490
+ #endif
2491
+ }
2492
+
2493
+ /* changes: TG_FLAG_ALLOW_NO_NEGTV_O replaced CHARGED_SALTS_ONLY==0 */
2494
+ if ( nNumCandidates <= 1 || nNumOtherCandidates == nNumCandidates ||
2495
+ ((t_group_info->bTautFlags & TG_FLAG_ALLOW_NO_NEGTV_O) ?
2496
+ !(s_subtype_all & (SALT_DONOR_Neg | SALT_DONOR_H)):
2497
+ !(s_subtype_all & SALT_DONOR_Neg)) ||
2498
+ !(s_subtype_all & SALT_ACCEPTOR)) {
2499
+ s_group_info->num_candidates = -1; /* no candidate exists */
2500
+ return 0;
2501
+ }
2502
+ if ( !(s_subtype_all & (SALT_DONOR_Neg) ) ) {
2503
+ t_group_info->bTautFlagsDone |= TG_FLAG_ALLOW_NO_NEGTV_O_DONE;
2504
+ }
2505
+ } else {
2506
+ for ( i = 0; i < nNumCandidates; i ++ ) {
2507
+ i1 = s_candidate[i].atnumber;
2508
+ if ( 0 <= (s_type = GetSaltChargeType( at, i1, t_group_info, &s_subtype ))
2509
+ #if( INCL_NON_SALT_CANDIDATATES == 1 )
2510
+ || 0 < (s_type = GetOtherSaltChargeType( at, i1, t_group_info, &s_subtype, 1 /* bAccept_O*/ ))
2511
+ #endif
2512
+ ) {
2513
+ s_candidate[nNumCandidates].type = s_type;
2514
+ s_candidate[nNumCandidates].subtype = s_subtype;
2515
+ s_candidate[nNumCandidates].endpoint = at[i1].endpoint;
2516
+ }
2517
+ }
2518
+ }
2519
+ /* Look for alt paths connecting:
2520
+ SALT_DONOR_Neg to SALT_ACCEPTOR : long distance migration of negative charges
2521
+ SALT_DONOR_H to SALT_ACCEPTOR : long distance migration of H-atoms
2522
+ */
2523
+ num_tested = 0;
2524
+ do {
2525
+ nNumChanges = 0;
2526
+ for ( i1 = 0; i1 < nNumCandidates; i1 ++ ) {
2527
+ j1 = s_candidate[i1].atnumber;
2528
+ for ( i2 = i1+1; i2 < nNumCandidates; i2 ++ ) {
2529
+ if ( s_candidate[i1].type && s_candidate[i2].type )
2530
+ continue; /* both candidates are not "salt-type" */
2531
+ j2 = s_candidate[i2].atnumber;
2532
+ if ( at[j1].endpoint && at[j1].endpoint == at[j2].endpoint ) {
2533
+ continue;
2534
+ }
2535
+ for ( j = 0; j < 2; j ++ ) {
2536
+ if ( j ) {
2537
+ ii1 = i2; /* candidate 1 (donor) ordering number */
2538
+ ii2 = i1; /* candidate 2 (acceptor) ordering number */
2539
+ jj1 = j2; /* candidate 1 (donor) atom number */
2540
+ jj2 = j1; /* candidate 2 (acceptor) atom number */
2541
+ } else { /* transposition */
2542
+ ii1 = i1; /* candidate 1 (donor) ordering number */
2543
+ ii2 = i2; /* candidate 2 (acceptor) ordering number */
2544
+ jj1 = j1; /* candidate 1 (donor) atom number */
2545
+ jj2 = j2; /* candidate 2 (acceptor) atom number */
2546
+ }
2547
+
2548
+ if ( ( s_candidate[ii1].subtype & (SALT_DONOR_Neg | SALT_DONOR_H) ) &&
2549
+ ( s_candidate[ii2].subtype & SALT_ACCEPTOR ) ) {
2550
+ ret = bExistsAltPath( pBNS, pBD, NULL, at, num_atoms, jj2, jj1, ALT_PATH_MODE_4_SALT );
2551
+ num_tested ++;
2552
+ if ( IS_BNS_ERROR( ret ) ) {
2553
+ return ret;
2554
+ }
2555
+ if ( ret & 1 ) {
2556
+ nDelta = (ret & ~3) >> 2;
2557
+ nNumChanges += (ret & 2);
2558
+ for ( i = 0; i < 2; i ++ ) {
2559
+ jj = i? jj2 : jj1;
2560
+ EndPoint[i].nAtomNumber = jj;
2561
+ EndPoint[i].nEquNumber = 0;
2562
+ EndPoint[i].nGroupNumber = at[jj].endpoint;
2563
+ if ( at[jj].endpoint ) {
2564
+ memset( EndPoint[i].num, 0, sizeof(EndPoint[i].num) );
2565
+ } else {
2566
+ AddAtom2num( EndPoint[i].num, at, jj, 2 ); /* fill out */
2567
+ AddAtom2DA( EndPoint[i].num_DA, at, jj, 2 );
2568
+ /*
2569
+ nMobile = EndPoint[i].num[1] = (at[jj].charge == -1);
2570
+ nMobile = EndPoint[i].num[0] = at[jj].num_H + nMobile;
2571
+ for ( k = 0; k < T_NUM_ISOTOPIC; k ++ ) {
2572
+ EndPoint[i].num[T_NUM_NO_ISOTOPIC+k] = at[jj].num_iso_H[NUM_H_ISOTOPES-k-1];
2573
+ }
2574
+ */
2575
+ }
2576
+ }
2577
+ /* add/merge taut groups and reinit pBNS */
2578
+ ret = RegisterEndPoints( t_group_info,
2579
+ EndPoint, 2, at, num_atoms, c_group_info, pBNS );
2580
+ if ( ret < 0 ) {
2581
+ return ret;
2582
+ }
2583
+ nNumChanges += (ret > 0);
2584
+ if ( nDelta ) {
2585
+ goto quick_exit;
2586
+ }
2587
+ break; /* avoid redundant repetition */
2588
+ }
2589
+ }
2590
+ }
2591
+ }
2592
+ }
2593
+ nTotNumChanges += nNumChanges;
2594
+ } while ( num_tested && nNumChanges );
2595
+
2596
+ quick_exit:
2597
+ nTotNumChanges += nNumChanges; /* nNumChanges != 0 only in case of 'goto quick_exit' */
2598
+ if ( s_group_info->num_candidates == 0 ) {
2599
+ /* first time: initialize */
2600
+ s_group_info->num_candidates = num_tested? nNumCandidates : -1; /* no candidate exists */
2601
+ }
2602
+
2603
+ }
2604
+ return nTotNumChanges;
2605
+ }
2606
+ #endif
2607
+
2608
+ /*****************************************************************************/
2609
+ int MergeSaltTautGroups( inp_ATOM *at, int num_atoms, S_GROUP_INFO *s_group_info,
2610
+ T_GROUP_INFO *t_group_info, C_GROUP_INFO *c_group_info,
2611
+ struct BalancedNetworkStructure *pBNS )
2612
+ {
2613
+ /* count candidates to be connected: exclude pure donors that do not belong to any t-group */
2614
+ AT_NUMB nCurTGroupNumber;
2615
+ int i, j, /*k,*/ ret, iat, /*nMobile,*/ nMinNumEndpoints;
2616
+ int s_subtype_all, s_subtype_taut;
2617
+ int nMaxNumCandidates, nNumCandidates, nNumCandidates2;
2618
+ T_ENDPOINT EndPointStackArray[MAX_STACK_ARRAY_LEN]; /* will be reallocated if too short */
2619
+ T_ENDPOINT *EndPoint = EndPointStackArray;
2620
+
2621
+
2622
+ if ( !s_group_info || !s_group_info->s_candidate || /*s_group_info->num_candidates <= 0 ||*/
2623
+ !t_group_info || !t_group_info->t_group || !c_group_info ) {
2624
+ return 0;
2625
+ }
2626
+ nMinNumEndpoints = 0;
2627
+ nMaxNumCandidates = s_group_info->max_num_candidates;
2628
+ nCurTGroupNumber = MAX_ATOMS; /* impossible t-group number */
2629
+ s_subtype_all = s_subtype_taut = 0;
2630
+ /* collect tautomeric acidic O and previously non-tautomeric C-OH, C-SH, C-O(-), C-S(-) */
2631
+ /* find whether previously found tautomeric atoms have both mobile H and (-) */
2632
+ if ( 1 || (s_group_info->num_candidates < 0) ) {
2633
+ /* can be only -O(-) and -OH */
2634
+ int s_type, s_subtype;
2635
+ S_CANDIDATE *s_candidate = s_group_info->s_candidate;
2636
+ for ( i = 0, nNumCandidates = nNumCandidates2 = 0; i < num_atoms; i ++ ) {
2637
+ s_subtype = 0;
2638
+ if ( 0 == (s_type = GetSaltChargeType( at, i, t_group_info, &s_subtype )) ||
2639
+ /* -C=O or =C-OH, O = S, Se, Te */
2640
+
2641
+ /*(t_group_info->tni.bNormalizationFlags & FLAG_FORCE_SALT_TAUT) &&*/
2642
+ 1 == (s_type = GetOtherSaltChargeType( at, i, t_group_info, &s_subtype, 1/* bAccept_O*/ )) ||
2643
+ /* =Z-MH or -Z=M, Z = centerpoint, M = endpoint, other than above. M may be N */
2644
+
2645
+ 2 == (s_type = GetOtherSaltType( at, i, &s_subtype )) ||
2646
+ /* >C-SH, >C-S(-); S=S,Se,Te */
2647
+
2648
+ /* other proton donor or acceptor */
2649
+ bHasAcidicHydrogen( at, i) && ((s_type=3), (s_subtype = SALT_p_DONOR)) ||
2650
+ bHasAcidicMinus( at, i) && ((s_type=3), (s_subtype = SALT_p_ACCEPTOR))
2651
+ ) {
2652
+
2653
+ if ( nNumCandidates >= nMaxNumCandidates ) {
2654
+ return BNS_VERT_EDGE_OVFL;
2655
+ }
2656
+ if ( at[i].endpoint ) {
2657
+ s_subtype_taut |= s_subtype;
2658
+ } else
2659
+ if ( bDoNotMergeNonTautAtom(at, i) ) {
2660
+ continue; /* ignore non-tautomeric N */
2661
+ }
2662
+ if ( !( s_subtype & SALT_DONOR_ALL ) ||
2663
+ (s_subtype & SALT_ACCEPTOR) && !at[i].endpoint ) {
2664
+ continue; /* do not include non-taut acceptors like -C=O */
2665
+ }
2666
+ s_candidate[nNumCandidates].atnumber = i;
2667
+ s_candidate[nNumCandidates].type = s_type;
2668
+ s_candidate[nNumCandidates].subtype = s_subtype;
2669
+ s_candidate[nNumCandidates].endpoint = at[i].endpoint;
2670
+ nNumCandidates ++;
2671
+ s_subtype_all |= s_subtype;
2672
+ }
2673
+ }
2674
+ /*
2675
+ Forced merging occurs upon:
2676
+ ===========================
2677
+ (t_group_info->bTautFlags & TG_FLAG_ALLOW_NO_NEGTV_O) or
2678
+ (t_group_info->tni.bNormalizationFlags & FLAG_FORCE_SALT_TAUT)
2679
+
2680
+
2681
+ Allow forced merging in cases:
2682
+ {t-groups} (H, (-)} {H, (-), t-groups}
2683
+
2684
+
2685
+ Normal salt merging in cases:
2686
+ (H, (-)} {H, (-), t-groups},
2687
+
2688
+ Cannot merge H into t-groups if no (-) is present
2689
+ */
2690
+
2691
+
2692
+ if ( (t_group_info->bTautFlags & TG_FLAG_ALLOW_NO_NEGTV_O) ||
2693
+ (t_group_info->bTautFlagsDone & TG_FLAG_FOUND_SALT_CHARGES_DONE) ||
2694
+ (t_group_info->tni.bNormalizationFlags & FLAG_FORCE_SALT_TAUT) ) {
2695
+ /* force merge even though no negative charges are present */
2696
+ if ( nNumCandidates <= 1 ||
2697
+ (!(s_subtype_all & SALT_DONOR_Neg2) || !(s_subtype_all & SALT_DONOR_H2)) &&
2698
+ !t_group_info->num_t_groups ) {
2699
+ s_group_info->num_candidates = -1; /* no candidate exists */
2700
+ return 0;
2701
+ }
2702
+ } else {
2703
+ /* normal salt mode: merge if both -XH and -X(-) are present */
2704
+ if ( nNumCandidates <= 1 ||
2705
+ (!(s_subtype_all & SALT_DONOR_Neg2) || !(s_subtype_all & SALT_DONOR_H2)) ) {
2706
+ s_group_info->num_candidates = -1; /* no candidate exists */
2707
+ return 0;
2708
+ }
2709
+ }
2710
+ /* -- old code --
2711
+ if ( nNumCandidates <= 1 ||
2712
+ (((t_group_info->bTautFlags & TG_FLAG_ALLOW_NO_NEGTV_O) ||
2713
+ (t_group_info->tni.bNormalizationFlags & FLAG_FORCE_SALT_TAUT)) ?
2714
+ !(s_subtype_all & SALT_DONOR_ALL):
2715
+ !(s_subtype_all & SALT_DONOR_Neg2)
2716
+ )
2717
+ ) {
2718
+ s_group_info->num_candidates = -1;
2719
+ return 0;
2720
+ }
2721
+ */
2722
+ if ( !(s_subtype_all & (SALT_DONOR_Neg2) ) ) {
2723
+ t_group_info->bTautFlagsDone |= TG_FLAG_ALLOW_NO_NEGTV_O_DONE;
2724
+ }
2725
+ s_group_info->num_candidates = nNumCandidates;
2726
+ }
2727
+
2728
+ for ( i = 0; i < s_group_info->num_candidates; i ++ ) {
2729
+ iat = s_group_info->s_candidate[i].atnumber;
2730
+ if ( (s_group_info->s_candidate[i].subtype & SALT_ACCEPTOR) && !at[iat].endpoint ) {
2731
+ continue; /* should not happen */
2732
+ }
2733
+ s_subtype_all |= s_group_info->s_candidate[i].subtype;
2734
+ if ( at[iat].endpoint != nCurTGroupNumber || !at[iat].endpoint ) {
2735
+ nMinNumEndpoints ++;
2736
+ }
2737
+ nCurTGroupNumber = (int)at[iat].endpoint;
2738
+ }
2739
+ if ( nMinNumEndpoints <= 1 ) {
2740
+ return 0; /* too few endpoints */
2741
+ }
2742
+
2743
+ /* make sure we have enough memory */
2744
+ if ( nMinNumEndpoints > MAX_STACK_ARRAY_LEN ) {
2745
+ if ( !(EndPoint = (T_ENDPOINT *)inchi_calloc( nMinNumEndpoints, sizeof(EndPoint[0]) ) ) ) {
2746
+ /*printf("BNS_OUT_OF_RAM-8\n");*/
2747
+ return BNS_OUT_OF_RAM;
2748
+ }
2749
+ }
2750
+
2751
+ nCurTGroupNumber = MAX_ATOMS; /* impossible t-group number */
2752
+ for ( i = j = 0; i < s_group_info->num_candidates; i ++ ) {
2753
+ iat = s_group_info->s_candidate[i].atnumber;
2754
+ if ( s_group_info->s_candidate[i].subtype == SALT_ACCEPTOR && !at[iat].endpoint ) {
2755
+ continue;
2756
+ }
2757
+ if ( at[iat].endpoint != nCurTGroupNumber || !at[iat].endpoint ) {
2758
+ AddEndPoint( EndPoint+j, at, iat );
2759
+ j ++;
2760
+ }
2761
+ nCurTGroupNumber = (int)at[iat].endpoint;
2762
+ }
2763
+
2764
+ ret = RegisterEndPoints( t_group_info,
2765
+ EndPoint, j, at, num_atoms, c_group_info, pBNS );
2766
+ if ( ret == -1 ) {
2767
+ ret = BNS_PROGRAM_ERR;
2768
+ }
2769
+
2770
+ if ( EndPoint != EndPointStackArray ) {
2771
+ inchi_free( EndPoint );
2772
+ }
2773
+
2774
+ return ret;
2775
+ }
2776
+
2777
+ /*****************************************************************************/
2778
+ int MakeIsotopicHGroup( inp_ATOM *at, int num_atoms, S_GROUP_INFO *s_group_info,
2779
+ T_GROUP_INFO *t_group_info )
2780
+ {
2781
+ /* all tautomeric atoms and all possible H+ donors and acceptors that have H */
2782
+ int i, j, k, n, bHasH, tg, nError=0;
2783
+ int s_subtype_all, s_subtype_taut;
2784
+ int nMaxNumCandidates, nNumCandidates, nNumNonTautCandidates;
2785
+
2786
+
2787
+ if ( !s_group_info || !s_group_info->s_candidate || /*s_group_info->num_candidates <= 0 ||*/
2788
+ !t_group_info || !t_group_info->t_group ) {
2789
+ return 0;
2790
+ }
2791
+ nMaxNumCandidates = s_group_info->max_num_candidates;
2792
+ s_subtype_all = s_subtype_taut = 0;
2793
+ memset( t_group_info->num_iso_H, 0, sizeof(t_group_info->num_iso_H) );
2794
+ if ( 1 || (s_group_info->num_candidates < 0) ) {
2795
+ int s_type, s_subtype;
2796
+ S_CANDIDATE *s_candidate = s_group_info->s_candidate;
2797
+ for ( i = 0, nNumCandidates = nNumNonTautCandidates = 0; i < num_atoms; i ++ ) {
2798
+ s_subtype = 0;
2799
+ s_type = 0;
2800
+ if ( at[i].endpoint ) {
2801
+ if ( (tg = t_group_info->tGroupNumber[at[i].endpoint]) &&
2802
+ at[i].endpoint == t_group_info->t_group[tg-=1].nGroupNumber ) {
2803
+ bHasH = (int)t_group_info->t_group[tg].num[0] - (int)t_group_info->t_group[tg].num[1];
2804
+ } else {
2805
+ nError = BNS_PROGRAM_ERR;
2806
+ break;
2807
+ }
2808
+ } else {
2809
+ bHasH = (int)at[i].num_H;
2810
+ }
2811
+ if ( bHasH && at[i].endpoint || /* tautomeric atoms */
2812
+
2813
+ /* non-tautomeric heteroatoms that
2814
+ (a) have H and
2815
+ (b) may be donors of H
2816
+ therefore may exchange isotopic-non-isotopic H */
2817
+ bHasH &&
2818
+ (0 == (s_type = GetSaltChargeType( at, i, t_group_info, &s_subtype )) ||
2819
+ /* -C=O or =C-OH, O = S, Se, Te */
2820
+
2821
+ /*(t_group_info->tni.bNormalizationFlags & FLAG_FORCE_SALT_TAUT) &&*/
2822
+ 1 == (s_type = GetOtherSaltChargeType( at, i, t_group_info, &s_subtype, 1/* bAccept_O*/ )) ||
2823
+ /* =Z-MH or -Z=M, Z = centerpoint, M = endpoint, other than above. M may be N */
2824
+
2825
+ 2 == (s_type = GetOtherSaltType( at, i, &s_subtype )) ||
2826
+ /* >C-SH, >C-S(-); S=S,Se,Te */
2827
+
2828
+ /* other proton donor or acceptor */
2829
+ bHasAcidicHydrogen( at, i) && ((s_type=3), (s_subtype = SALT_p_DONOR)) ||
2830
+ bHasAcidicMinus( at, i) && ((s_type=3), (s_subtype = SALT_p_ACCEPTOR)) ||
2831
+ bHasOtherExchangableH (at, i) && ((s_type=3), (s_subtype = SALT_DONOR_H)) )
2832
+
2833
+ ) {
2834
+
2835
+ if ( nNumCandidates >= nMaxNumCandidates ) {
2836
+ return BNS_VERT_EDGE_OVFL;
2837
+ }
2838
+ s_candidate[nNumCandidates].atnumber = i;
2839
+ s_candidate[nNumCandidates].type = s_type;
2840
+ s_candidate[nNumCandidates].subtype = s_subtype;
2841
+ s_candidate[nNumCandidates].endpoint = at[i].endpoint;
2842
+ nNumCandidates ++;
2843
+ nNumNonTautCandidates += !at[i].endpoint;
2844
+ s_subtype_all |= s_subtype;
2845
+ }
2846
+ }
2847
+ if ( nError ) {
2848
+ return nError;
2849
+ }
2850
+ if ( nNumCandidates > 0 ) {
2851
+ t_group_info->nIsotopicEndpointAtomNumber = (AT_NUMB *)inchi_calloc( nNumNonTautCandidates+1, sizeof(t_group_info->nIsotopicEndpointAtomNumber[0]));
2852
+ t_group_info->nIsotopicEndpointAtomNumber[0] = nNumNonTautCandidates;
2853
+ for ( i = 0, n = 1; i < nNumCandidates; i ++ ) {
2854
+ k = s_candidate[i].atnumber;
2855
+ if ( !at[k].endpoint ) {
2856
+ t_group_info->nIsotopicEndpointAtomNumber[n++] = k;
2857
+ }
2858
+ for ( j = 0; j < NUM_H_ISOTOPES; j ++ ) {
2859
+ t_group_info->num_iso_H[j] += at[k].num_iso_H[j];
2860
+ }
2861
+ at[k].cFlags |= AT_FLAG_ISO_H_POINT;
2862
+ }
2863
+ t_group_info->nNumIsotopicEndpoints = nNumNonTautCandidates+1;
2864
+ }
2865
+ }
2866
+ return nNumCandidates;
2867
+ }
2868
+
2869
+ /*#else*/ /* } DISCONNECT_SALTS == 0 */
2870
+
2871
+ /**********************************************************************************
2872
+ Charges and tautomeric endpoints (N only)
2873
+ **********************************************************************************
2874
+
2875
+ H = number of possibly moveable hydrogen atoms
2876
+ C = possibly moveable positive charge
2877
+
2878
+ - = single bond
2879
+ = = double bond
2880
+ # = triple bond
2881
+
2882
+ +-----------------------------------------------------------------------------+
2883
+ |ca-| H | edges to t- | 1 bond | 2 bonds | 3 bonds *) |
2884
+ |se | C | and c-groups | (valence) | (valence) | (valence) |
2885
+ | # | | (edges flow) | | | |
2886
+ +---|------+---------------+----------------+----------------+----------------|
2887
+ | 1 | H=0 | -- (1) | =NH (3) | =N- (3) | >N- (3) |
2888
+ | | C=0 | == | | | |
2889
+ +---|------+---------------+----------------+----------------+----------------|
2890
+ | 2 | H=1 | == (2) | -NH2 (3) | -NH- (3) | none |
2891
+ | | C=0 | == | | | |
2892
+ +---|------+---------------+----------------+----------------+----------------|
2893
+ | 3 | H=0 | -- (0) | #NH(+) (4) | =N(+)= (4) +)| >N(+)= (4) |
2894
+ | | C=1 | -- | (prohibited | | |
2895
+ | | | | by edge cap) | | |
2896
+ +---|------+---------------+----------------+----------------+----------------|
2897
+ | 4 | H=1 | == (1) | =NH2(+) (4) +)| =NH(+)- (4) +)| >NH(+)- (4) |
2898
+ | | C=1 | -- | | | |
2899
+ +---+-------------------------------------------------------------------------+
2900
+
2901
+ *) Cannot be a tautomeric endpoint
2902
+
2903
+ +) The three charged types of atoms [=N(+)=, =NH(+)-, =NH2(+)] should be
2904
+ checked for possible H-tautomerism. Other types in the marked by *)
2905
+ column should not be checked as long as H(+) exchange is not considered
2906
+ tautomeric.
2907
+
2908
+ Other possibilities: -NH3(+) >NH2(+) >N(+)< cannot be H-tautomeric endpoints.
2909
+
2910
+ Case #1 (H=0, C=0) and #4 (H=1,C=0) is indistinguishable from the
2911
+ viewpoint of edges flow and capacities except for flow from N to (+) vertex.
2912
+
2913
+ Without taking precautions H(+) can be transferred
2914
+
2915
+ from =NH2(+) to =NH,
2916
+ from =NH(+)- to =N-,
2917
+ from >NH(+)- to >N-
2918
+
2919
+ or to any other appropriate atom that has a lone electron pair and bonds
2920
+ will not change. In this case no bond must be marked as tautomeric.
2921
+
2922
+ For this reason before attempting to transfer H from one endpoint to
2923
+ another the charges on the two atoms should be set to zero by
2924
+ forcing zero flow from each of atoms to the (+)-vertices if the
2925
+ atoms belong to a c-group.
2926
+
2927
+ **********************************************************************************/
2928
+
2929
+
2930
+ /********************************************************************************************************/
2931
+ /* MarkTautomerGroups: do not identify positively charged N as endpoints for now */
2932
+ int MarkTautomerGroups( inp_ATOM *at, int num_atoms, T_GROUP_INFO *t_group_info, C_GROUP_INFO *c_group_info
2933
+ , struct BalancedNetworkStructure *pBNS, struct BalancedNetworkData *pBD )
2934
+ {
2935
+ int i, j, k, m, endpoint_valence, centerpoint, endpoint, bond_type, nMobile, num_changes=0, tot_changes=0;
2936
+ T_ENDPOINT EndPoint[MAXVAL];
2937
+ T_BONDPOS BondPos[MAXVAL];
2938
+ AT_NUMB nGroupNumber;
2939
+ int bDiffGroups;
2940
+ int nNumEndPoints, nNumBondPos, nNumPossibleMobile;
2941
+ int bTautBond, bNonTautBond, bAltBond;
2942
+ int nNumDonor, nNumAcceptor, bPossiblyEndpoint;
2943
+ T_GROUP *t_group;
2944
+ int *pnum_t, max_num_t, bIgnoreIsotopic;
2945
+ ENDPOINT_INFO eif1, eif2;
2946
+ int nErr = 0;
2947
+ #define ALLOWED_EDGE(PBNS, IAT,IBOND) ( !PBNS || !PBNS->edge || !PBNS->vert || !PBNS->edge[PBNS->vert[IAT].iedge[IBOND]].forbidden)
2948
+
2949
+ if ( !t_group_info || !(t_group_info->bTautFlags & TG_FLAG_TEST_TAUT__ATOMS) )
2950
+ return 0;
2951
+ /* initial t_group allocation */
2952
+ if ( !t_group_info->t_group && !t_group_info->max_num_t_groups ) {
2953
+ INCHI_MODE bTautFlags = t_group_info->bTautFlags; /* save initial setting */
2954
+ INCHI_MODE bTautFlagsDone = t_group_info->bTautFlagsDone; /* save previous findings, if any */
2955
+ TNI tni = t_group_info->tni;
2956
+ AT_NUMB *tGroupNumber = t_group_info->tGroupNumber;
2957
+ bIgnoreIsotopic = t_group_info->bIgnoreIsotopic;
2958
+ memset( t_group_info, 0, sizeof(*t_group_info) );
2959
+ t_group_info->bIgnoreIsotopic = bIgnoreIsotopic; /* restore initial setting */
2960
+ t_group_info->bTautFlags = bTautFlags;
2961
+ t_group_info->bTautFlagsDone = bTautFlagsDone;
2962
+ t_group_info->tni = tni;
2963
+ t_group_info->tGroupNumber = tGroupNumber;
2964
+ t_group_info->max_num_t_groups = num_atoms/2+1; /* upper limit */
2965
+ if (!(t_group_info->t_group = (T_GROUP*)inchi_calloc(t_group_info->max_num_t_groups, sizeof(t_group[0])))) {
2966
+ return (t_group_info->max_num_t_groups = -1); /* failed, out of RAM */
2967
+ }
2968
+ }
2969
+ /* check if t_group_info exists */
2970
+ if ( !t_group_info->t_group || !t_group_info->max_num_t_groups )
2971
+ return 0;
2972
+
2973
+ if ( 0 > t_group_info->max_num_t_groups )
2974
+ return t_group_info->max_num_t_groups;
2975
+
2976
+ pnum_t = &t_group_info->num_t_groups; /* number of found tautomer endpoint groups */
2977
+ t_group = t_group_info->t_group;
2978
+ max_num_t = t_group_info->max_num_t_groups;
2979
+ bIgnoreIsotopic = t_group_info->bIgnoreIsotopic;
2980
+ /* 1-3 tautomers */
2981
+ for ( i = 0; i < num_atoms; i ++ ) {
2982
+ /* find possible endpoint Z = at[i] */
2983
+ if ( endpoint_valence = nGetEndpointInfo( at, i, &eif1 ) ) {
2984
+ /* 1st endpoint candidate found. Find centerpoint candidate */
2985
+ for ( j = 0; j < at[i].valence; j ++ ) {
2986
+ bond_type = (int)at[i].bond_type[j] & ~BOND_MARK_ALL;
2987
+ centerpoint = (int)at[i].neighbor[j]; /* a centerpoint candidate */
2988
+ if ( (bond_type == BOND_DOUBLE ||
2989
+ bond_type == BOND_ALTERN ||
2990
+ bond_type == BOND_ALT12NS ||
2991
+ bond_type == BOND_TAUTOM) && is_centerpoint_elem( at[centerpoint].el_number )
2992
+ && ALLOWED_EDGE(pBNS, i, j)
2993
+ ) {
2994
+ /* test a centerpoint candidate. */
2995
+ /* find all endpoints including at[i] and store them into EndPoint[] */
2996
+ nNumPossibleMobile = 0;
2997
+ nGroupNumber = (AT_NUMB)num_atoms; /* greater than any tautomeric group number */
2998
+ bDiffGroups = -1; /* ignore the first difference */
2999
+ nNumDonor = nNumAcceptor = 0;
3000
+ for ( k = 0, nNumEndPoints = 0, nNumBondPos = 0; k < at[centerpoint].valence; k ++ ) {
3001
+ endpoint = at[centerpoint].neighbor[k]; /* endpoint candidate */
3002
+ bond_type = (int)at[centerpoint].bond_type[k] & ~BOND_MARK_ALL;
3003
+ bTautBond =
3004
+ bNonTautBond =
3005
+ bAltBond =
3006
+ bPossiblyEndpoint = 0;
3007
+ if ( !( !pBNS || !pBNS->edge || !pBNS->vert || !pBNS->edge[pBNS->vert[centerpoint].iedge[k]].forbidden) ) {
3008
+ continue;
3009
+ }
3010
+ if ( !ALLOWED_EDGE(pBNS, centerpoint, k) ) {
3011
+ continue;
3012
+ } else
3013
+ if ( bond_type == BOND_ALTERN || bond_type == BOND_ALT12NS || bond_type == BOND_TAUTOM ) {
3014
+ bTautBond = 1;
3015
+ #if( REPLACE_ALT_WITH_TAUT == 1 )
3016
+ bAltBond = (bond_type == BOND_ALTERN || bond_type == BOND_ALT12NS);
3017
+ #endif
3018
+ } else
3019
+ if ( bond_type == BOND_SINGLE || bond_type == BOND_DOUBLE )
3020
+ bNonTautBond = 1;
3021
+ else
3022
+ continue;
3023
+
3024
+ if ( !(endpoint_valence = nGetEndpointInfo( at, endpoint, &eif1 )) )
3025
+ continue; /* not an endpoint element or can't have mobile groups */
3026
+ /* save information about the found possible tautomeric endpoint */
3027
+ /* 2 = T_NUM_NO_ISOTOPIC non-isotopic values */
3028
+ nMobile =
3029
+ AddAtom2num( EndPoint[nNumEndPoints].num, at, endpoint, 2 ); /* fill out */
3030
+ AddAtom2DA( EndPoint[nNumEndPoints].num_DA, at, endpoint, 2 );
3031
+ /* --- why is isitopic info missing ? -- see below
3032
+ nMobile = EndPoint[nNumEndPoints].num[1] = (at[endpoint].charge == -1);
3033
+ nMobile = EndPoint[nNumEndPoints].num[0] = at[endpoint].num_H + nMobile;
3034
+ */
3035
+ if ( bNonTautBond ) {
3036
+ m = (bond_type == BOND_SINGLE && (nMobile || at[endpoint].endpoint));
3037
+ nNumDonor += m;
3038
+ bPossiblyEndpoint += m;
3039
+ m = (bond_type == BOND_DOUBLE );
3040
+ nNumAcceptor += m;
3041
+ bPossiblyEndpoint += m;
3042
+ } else {
3043
+ /* tautomeric or alternating bond */
3044
+ m = (0 != at[endpoint].endpoint || eif1.cDonor );
3045
+ nNumDonor += m;
3046
+ bPossiblyEndpoint += m;
3047
+ m = ( at[endpoint].endpoint ||
3048
+ eif1.cNeutralBondsValence > at[endpoint].valence );
3049
+ nNumAcceptor += m;
3050
+ bPossiblyEndpoint += m;
3051
+ }
3052
+ if ( !bPossiblyEndpoint )
3053
+ continue;
3054
+ EndPoint[nNumEndPoints].nGroupNumber = at[endpoint].endpoint; /* =0 if it is an endpoint for the 1st time */
3055
+ EndPoint[nNumEndPoints].nEquNumber = 0;
3056
+ EndPoint[nNumEndPoints].nAtomNumber = (AT_NUMB)endpoint;
3057
+ if ( nGroupNumber != at[endpoint].endpoint ) {
3058
+ bDiffGroups ++;
3059
+ nGroupNumber = at[endpoint].endpoint;
3060
+ }
3061
+
3062
+ /* save positions of all, not only possibly tautomeric bonds */
3063
+ #if( REPLACE_ALT_WITH_TAUT != 1 )
3064
+ if ( bNonTautBond || bAltBond ) {
3065
+ #endif
3066
+ BondPos[nNumBondPos].nAtomNumber = (AT_NUMB)centerpoint;
3067
+ BondPos[nNumBondPos].neighbor_index = (AT_NUMB)k; /* bond ordering number; used to change bonds to tautomeric only */
3068
+ nNumBondPos ++;
3069
+ #if( REPLACE_ALT_WITH_TAUT != 1 )
3070
+ }
3071
+ #endif
3072
+ /* mobile group is possible if (a) the endpoint has a mobile group or */
3073
+ /* (b) the centerpoint is adjacent to another endpoint */
3074
+ nNumPossibleMobile += (nMobile>0 || at[endpoint].endpoint);
3075
+ nNumEndPoints ++;
3076
+ }
3077
+ if ( nNumEndPoints > 1 && nNumPossibleMobile && nNumDonor && nNumAcceptor ) {
3078
+ /*
3079
+ * a tautomeric group has been found
3080
+ *
3081
+ * at this point:
3082
+ * nGroupNumber = 0 if all endpoints belong to a newly discovered tautomeric group
3083
+ * bDiffGroups > 0 if at least 2 tautomeric groups are to be merged (one of them can be new)
3084
+ * case (nGroupNumber != 0 && bDiffGroups = 0 ) ignored because all endpoints belong to the same known t-group
3085
+ * case (nGroupNumber != 0 && bDiffGroups < 0 ) cannot happen
3086
+ */
3087
+
3088
+ nErr=FindAccessibleEndPoints( EndPoint, &nNumEndPoints, BondPos, &nNumBondPos,
3089
+ pBNS, pBD, at, num_atoms, c_group_info );
3090
+ if ( IS_BNS_ERROR(nErr) ) {
3091
+ return nErr;
3092
+ }
3093
+ nErr = 0;
3094
+
3095
+ if ( nNumEndPoints > 0 ) {
3096
+ if ( !nGroupNumber || bDiffGroups > 0 ) {
3097
+ num_changes = RegisterEndPoints( t_group_info, EndPoint, nNumEndPoints, at, num_atoms, c_group_info, pBNS );
3098
+ if ( num_changes == -1 ) {
3099
+ nErr = CT_TAUCOUNT_ERR;
3100
+ }
3101
+ if ( num_changes < 0 ) {
3102
+ nErr = num_changes;
3103
+ }
3104
+ if ( nErr )
3105
+ goto exit_function;
3106
+ tot_changes += (num_changes>0);
3107
+ }
3108
+ if ( nNumBondPos > 0 ) {
3109
+ /* some of the bonds have not been marked as tautomeric yet */
3110
+ num_changes = SetTautomericBonds( at, nNumBondPos, BondPos );
3111
+ tot_changes += (num_changes>0);
3112
+ }
3113
+ }
3114
+ }
3115
+ }
3116
+ }
3117
+ }
3118
+ }
3119
+
3120
+ #if( TAUT_OTHER == 1 ) /* { */
3121
+ if ( !tot_changes ) {
3122
+ #define MAX_ALT_PATH_LEN 8
3123
+ int nMaxLenDfsPath = MAX_ALT_PATH_LEN;
3124
+ int i1, i2;
3125
+ AT_RANK *nDfsPathPos = (AT_RANK *)inchi_calloc( num_atoms, sizeof(nDfsPathPos[0]) );
3126
+ DFS_PATH DfsPath[MAX_ALT_PATH_LEN];
3127
+ int ret;
3128
+ if ( !nDfsPathPos || !DfsPath ) {
3129
+ tot_changes = CT_OUT_OF_RAM; /* <BRKPT> */
3130
+ goto free_memory;
3131
+ }
3132
+ #if( TAUT_4PYRIDINOL_RINGS == 1 )
3133
+ /* 6-member rings */
3134
+ /*
3135
+ O OH OH
3136
+ || | |
3137
+ / \ // \ / \\
3138
+ || || <--> | || <--> || |
3139
+ \ / \\ / \ //
3140
+ NH N N
3141
+ */
3142
+ for ( i1 = 0; i1 < num_atoms; i1 ++ ) {
3143
+ /* find possible endpoint Z = at[i1] */
3144
+ if ( 3 != (endpoint_valence = nGetEndpointInfo( at, i1, &eif1 ) ) ||
3145
+ 2 != at[i1].valence ) {
3146
+ continue; /* not a nitrogen atom or a wrong valence */
3147
+ }
3148
+
3149
+ if ( at[i1].nNumAtInRingSystem >= 6 ) {
3150
+ nNumEndPoints = 0;
3151
+ nNumBondPos = 0;
3152
+
3153
+ ret = nGet15TautIn6MembAltRing( at, i1, nDfsPathPos,
3154
+ DfsPath, nMaxLenDfsPath,
3155
+ EndPoint, sizeof(EndPoint)/sizeof(EndPoint[0]),
3156
+ BondPos, sizeof(BondPos)/sizeof(BondPos[0]),
3157
+ &nNumEndPoints, &nNumBondPos,
3158
+ pBNS, pBD, num_atoms);
3159
+ if ( ret > 0 ) {
3160
+ if ( nNumEndPoints ) {
3161
+ num_changes = RegisterEndPoints( t_group_info, EndPoint, nNumEndPoints, at, num_atoms, c_group_info, pBNS);
3162
+ if ( num_changes == -1 ) {
3163
+ nErr = CT_TAUCOUNT_ERR;
3164
+ }
3165
+ if ( num_changes < 0 ) {
3166
+ nErr = num_changes;
3167
+ }
3168
+ if ( nErr )
3169
+ goto free_memory;
3170
+ tot_changes += (num_changes > 0);
3171
+ }
3172
+ if ( nNumBondPos ) {
3173
+ tot_changes += ( 0 < SetTautomericBonds( at, nNumBondPos, BondPos ) );
3174
+ }
3175
+ } else
3176
+ if ( IS_BNS_ERROR( ret ) ) {
3177
+ nErr = ret;
3178
+ goto free_memory;
3179
+ }
3180
+ }
3181
+ }
3182
+ #endif /* TAUT_4PYRIDINOL_RINGS */
3183
+ #if( TAUT_PYRAZOLE_RINGS == 1 )
3184
+ /* 5-member rings:
3185
+
3186
+ Z Z
3187
+ / \\ // \
3188
+ X Y <--> X Y
3189
+ \\ / \ //
3190
+ N--NH HN--N
3191
+
3192
+ ^ ^
3193
+ search for these NH
3194
+ */
3195
+ /* 5-member rings (pyrazole derivatives): look for the neighboring N */
3196
+ for ( i1 = 0; i1 < num_atoms; i1 ++ ) {
3197
+ if ( 2 == at[i1].valence &&
3198
+ at[i1].nNumAtInRingSystem >= 5 &&
3199
+ 3 == (endpoint_valence = nGetEndpointInfo( at, i1, &eif1 ))
3200
+ ) {
3201
+ nMobile = at[i1].num_H + (at[i1].charge == -1);
3202
+ for ( j = 0; j < at[i1].valence; j ++ ) {
3203
+ int nMobile2, endpoint_valence2;
3204
+ i2 = at[i1].neighbor[j];
3205
+
3206
+ /* may be important */
3207
+ if ( i2 >= i1 )
3208
+ continue; /* do not try same pair 2 times */
3209
+
3210
+ if ( at[i2].nRingSystem != at[i1].nRingSystem )
3211
+ continue;
3212
+
3213
+ bond_type = (at[i1].bond_type[j] & ~BOND_MARK_ALL);
3214
+ if ( bond_type != BOND_SINGLE &&
3215
+ bond_type != BOND_TAUTOM &&
3216
+ bond_type != BOND_ALT12NS &&
3217
+ bond_type != BOND_ALTERN || /* added 1-15-2002 */
3218
+ 2 != at[i2].valence ||
3219
+ 3 != (endpoint_valence2 = nGetEndpointInfo( at, i2, &eif2 ) ) ) {
3220
+ continue; /* not a nitrogen atom or a wrong valence or not a single bond */
3221
+ }
3222
+ nMobile2 = at[i2].num_H + (at[i2].charge == -1); /* number of mobile groups */
3223
+ #if( TAUT_IGNORE_EQL_ENDPOINTS == 1 )
3224
+ if ( at[i1].endpoint && at[i1].endpoint == at[i2].endpoint )
3225
+ continue; /* atoms already belong to the same t-group */
3226
+ #endif
3227
+ if ( !at[i1].endpoint && !at[i2].endpoint && 1!=nMobile + nMobile2 )
3228
+ continue;
3229
+
3230
+ ret = nGet12TautIn5MembAltRing( at, i1, j, nDfsPathPos,
3231
+ DfsPath, nMaxLenDfsPath,
3232
+ EndPoint, sizeof(EndPoint)/sizeof(EndPoint[0]),
3233
+ BondPos, sizeof(BondPos)/sizeof(BondPos[0]),
3234
+ &nNumEndPoints, &nNumBondPos
3235
+ , pBNS, pBD, num_atoms);
3236
+ if ( ret > 0 ) {
3237
+ if ( nNumEndPoints ) {
3238
+ num_changes = RegisterEndPoints( t_group_info, EndPoint, nNumEndPoints, at, num_atoms, c_group_info, pBNS);
3239
+ if ( num_changes == -1 ) {
3240
+ nErr = CT_TAUCOUNT_ERR;
3241
+ }
3242
+ if ( num_changes < 0 ) {
3243
+ nErr = num_changes;
3244
+ }
3245
+ if ( nErr )
3246
+ goto free_memory;
3247
+ tot_changes += (num_changes > 0);
3248
+ }
3249
+ if ( nNumBondPos ) {
3250
+ tot_changes += ( 0 < SetTautomericBonds( at, nNumBondPos, BondPos ) );
3251
+ }
3252
+ } else
3253
+ if ( IS_BNS_ERROR( ret ) ) {
3254
+ nErr = ret;
3255
+ goto free_memory;
3256
+ }
3257
+ }
3258
+ }
3259
+ }
3260
+ #endif /* TAUT_PYRAZOLE_RINGS */
3261
+ #if ( TAUT_TROPOLONE_7 == 1 || TAUT_TROPOLONE_5 == 1 ) /* { */
3262
+ /********************************************************
3263
+ * A B
3264
+ * | ||
3265
+ * 7-member rings (tropolones): look for M=Q--R--ZH,
3266
+ * ^ ^ ^ ^
3267
+ * endpoint1 i1 i2 endpoint2
3268
+ * where A-Q-R=B belong to a 7-member alt. (except Q-R bond) ring: ..=A-(Q-R)=B-..
3269
+ * Bond Q-R should be single or tautomeric or alternating
3270
+ * M=Q and R-ZH should be chain (non-ring) bonds
3271
+ * Same for 5-member rings
3272
+ */
3273
+ for ( i1 = 0; i1 < num_atoms; i1 ++ ) {
3274
+ if ( at[i1].nNumAtInRingSystem >=
3275
+ #if( TAUT_TROPOLONE_5 == 1 )
3276
+ 5
3277
+ #else
3278
+ 7
3279
+ #endif
3280
+ &&
3281
+ bIsCenterPointStrict( at, i1 ) &&
3282
+ #if( TAUT_RINGS_ATTACH_CHAIN == 1 )
3283
+ at[i1].bCutVertex &&
3284
+ #endif
3285
+ at[i1].valence == 3 && !at[i1].endpoint ) {
3286
+ int nMobile1, endpoint1, endpoint1_valence, bond_type1;
3287
+ int nMobile2, endpoint2, endpoint2_valence, bond_type2;
3288
+ for ( j = 0; j < at[i1].valence; j ++ ) {
3289
+ i2 = at[i1].neighbor[j];
3290
+ /*
3291
+ // may be important
3292
+ if ( i2 > i1 )
3293
+ continue; // do not try same pair 2 times
3294
+ */
3295
+ if ( at[i2].nRingSystem != at[i1].nRingSystem ||
3296
+ !bIsCenterPointStrict( at, i2 ) ||
3297
+ #if( TAUT_RINGS_ATTACH_CHAIN == 1 )
3298
+ !at[i2].bCutVertex ||
3299
+ #endif
3300
+ at[i2].valence != 3 || at[i2].endpoint )
3301
+ continue;
3302
+ bond_type = (at[i1].bond_type[j] & ~BOND_MARK_ALL);
3303
+ if ( bond_type != BOND_SINGLE &&
3304
+ bond_type != BOND_TAUTOM &&
3305
+ bond_type != BOND_ALT12NS &&
3306
+ bond_type != BOND_ALTERN ) {
3307
+ continue; /* not a single bond between Q-R */
3308
+ }
3309
+ /* find endpoints */
3310
+ for ( k = 0; k < at[i1].valence; k ++ ) {
3311
+ endpoint1 = at[i1].neighbor[k];
3312
+ if ( endpoint1 == i2 )
3313
+ continue; /* j == k */
3314
+ if ( !(endpoint1_valence = nGetEndpointInfo( at, endpoint1, &eif1 ) ) )
3315
+ continue; /* not an endpoint1 element or can't have mobile groups */
3316
+ #if( TAUT_RINGS_ATTACH_CHAIN == 1 )
3317
+ if ( at[endpoint1].nRingSystem == at[i1].nRingSystem )
3318
+ continue;
3319
+ #endif
3320
+ nMobile1 = at[endpoint1].num_H + (at[endpoint1].charge == -1); /* number of mobile groups */
3321
+ if ( nMobile1 + at[endpoint1].chem_bonds_valence != endpoint1_valence )
3322
+ continue; /* abnormal endpoint1 valence; ignore. */
3323
+ bond_type1 = (at[i1].bond_type[k] & ~BOND_MARK_ALL);
3324
+
3325
+ if ( bond_type1 != BOND_SINGLE &&
3326
+ bond_type1 != BOND_DOUBLE &&
3327
+ bond_type1 != BOND_TAUTOM &&
3328
+ bond_type1 != BOND_ALT12NS &&
3329
+ bond_type1 != BOND_ALTERN )
3330
+ continue;
3331
+
3332
+ for ( m = 0; m < at[i2].valence; m ++ ) {
3333
+ endpoint2 = at[i2].neighbor[m];
3334
+ if ( endpoint2 == i1 )
3335
+ continue;
3336
+ if ( !(endpoint2_valence = nGetEndpointInfo( at, endpoint2, &eif2 )) )
3337
+ continue; /* not an endpoint2 element or can't have mobile groups */
3338
+ #if( TAUT_RINGS_ATTACH_CHAIN == 1 )
3339
+ if ( at[endpoint2].nRingSystem == at[i2].nRingSystem )
3340
+ continue;
3341
+ #endif
3342
+ nMobile2 = at[endpoint2].num_H + (at[endpoint2].charge == -1); /* number of mobile groups */
3343
+ bond_type2 = (at[i2].bond_type[m] & ~BOND_MARK_ALL);
3344
+
3345
+ if ( bond_type2 != BOND_SINGLE &&
3346
+ bond_type2 != BOND_DOUBLE &&
3347
+ bond_type2 != BOND_TAUTOM &&
3348
+ bond_type2 != BOND_ALT12NS &&
3349
+ bond_type2 != BOND_ALTERN )
3350
+ continue;
3351
+
3352
+ /* final test for possible tautomerism */
3353
+ nMobile = 0;
3354
+
3355
+ if ( ALLOWED_EDGE(pBNS, i1, k) && ALLOWED_EDGE(pBNS, i2, m) ) {
3356
+
3357
+ /* can mobile group move from 1 to 2? */
3358
+ nMobile += (at[endpoint1].endpoint || nMobile1) && /* from endpoint1 */
3359
+ (bond_type1 != BOND_DOUBLE) &&
3360
+
3361
+ (at[endpoint2].endpoint || /* to endpoint2 */
3362
+ eif2.cNeutralBondsValence > at[endpoint2].valence ) &&
3363
+ (bond_type2 != BOND_SINGLE);
3364
+
3365
+
3366
+ /* can mobile group move from 2 to 1? */
3367
+ nMobile += (at[endpoint2].endpoint || nMobile2) && /* from endpoint2 */
3368
+ (bond_type2 != BOND_DOUBLE) && /*changed from BOND_SINGLE 2004-02-26 */
3369
+
3370
+ (at[endpoint1].endpoint || /* to endpoint1 */
3371
+ eif1.cNeutralBondsValence > at[endpoint1].valence ) &&
3372
+ (bond_type1 != BOND_SINGLE);
3373
+ }
3374
+ if ( !nMobile )
3375
+ continue;
3376
+
3377
+ if ( bond_type1 == bond_type2 &&
3378
+ (bond_type1 == BOND_SINGLE || bond_type1 == BOND_DOUBLE) )
3379
+ continue;
3380
+ /* -- old --
3381
+ if ( !at[endpoint1].endpoint && !at[endpoint2].endpoint && 1 != nMobile1 + nMobile2 )
3382
+ continue;
3383
+ */
3384
+ /* -- new --
3385
+
3386
+ if ( !at[endpoint1].endpoint && !at[endpoint2].endpoint ) {
3387
+ if ( !(bond_type1 == BOND_SINGLE || bond_type1 == BOND_DOUBLE) ||
3388
+ !(bond_type2 == BOND_SINGLE || bond_type2 == BOND_DOUBLE) ) {
3389
+ // at this point bond_type1 != bond_type2
3390
+ continue;
3391
+ }
3392
+ if ( bond_type1 == BOND_SINGLE && !nMobile1 ||
3393
+ bond_type2 == BOND_SINGLE && !nMobile2 ||
3394
+ 0 == nMobile1 + nMobile2 ) {
3395
+ continue;
3396
+ }
3397
+ }
3398
+ */
3399
+ #if ( TAUT_TROPOLONE_7 == 1 )
3400
+ if ( at[i1].nNumAtInRingSystem >= 7 ) {
3401
+ ret = nGet14TautIn7MembAltRing( at, i1, j, k, m, nDfsPathPos,
3402
+ DfsPath, nMaxLenDfsPath,
3403
+ EndPoint, sizeof(EndPoint)/sizeof(EndPoint[0]),
3404
+ BondPos, sizeof(BondPos)/sizeof(BondPos[0]),
3405
+ &nNumEndPoints, &nNumBondPos,
3406
+ pBNS, pBD, num_atoms);
3407
+ if ( ret > 0 ) {
3408
+ if ( nNumEndPoints ) {
3409
+ num_changes = RegisterEndPoints( t_group_info, EndPoint, nNumEndPoints, at, num_atoms, c_group_info, pBNS);
3410
+ if ( num_changes == -1 ) {
3411
+ nErr = CT_TAUCOUNT_ERR;
3412
+ }
3413
+ if ( num_changes < 0 ) {
3414
+ nErr = num_changes;
3415
+ }
3416
+ if ( nErr )
3417
+ goto free_memory;
3418
+ tot_changes += (num_changes > 0);
3419
+ }
3420
+ if ( nNumBondPos ) {
3421
+ tot_changes += ( 0 < SetTautomericBonds( at, nNumBondPos, BondPos ) );
3422
+ }
3423
+ } else
3424
+ if ( IS_BNS_ERROR( ret ) ) {
3425
+ nErr = ret;
3426
+ goto free_memory;
3427
+ }
3428
+ }
3429
+ #endif
3430
+
3431
+ #if ( TAUT_TROPOLONE_5 == 1 )
3432
+ if ( at[i1].nNumAtInRingSystem >= 5 ) {
3433
+ ret = nGet14TautIn5MembAltRing( at, i1, j, k, m, nDfsPathPos,
3434
+ DfsPath, nMaxLenDfsPath,
3435
+ EndPoint, sizeof(EndPoint)/sizeof(EndPoint[0]),
3436
+ BondPos, sizeof(BondPos)/sizeof(BondPos[0]),
3437
+ &nNumEndPoints, &nNumBondPos,
3438
+ pBNS, pBD, num_atoms);
3439
+ if ( ret > 0 ) {
3440
+ if ( nNumEndPoints ) {
3441
+ num_changes = RegisterEndPoints( t_group_info, EndPoint, nNumEndPoints, at, num_atoms, c_group_info, pBNS);
3442
+ if ( num_changes == -1 ) {
3443
+ nErr = CT_TAUCOUNT_ERR;
3444
+ }
3445
+ if ( num_changes < 0 ) {
3446
+ nErr = num_changes;
3447
+ }
3448
+ if ( nErr )
3449
+ goto free_memory;
3450
+ tot_changes += (num_changes > 0);
3451
+ }
3452
+ if ( nNumBondPos ) {
3453
+ tot_changes += ( 0 < SetTautomericBonds( at, nNumBondPos, BondPos ) );
3454
+ }
3455
+ } else
3456
+ if ( IS_BNS_ERROR( ret ) ) {
3457
+ nErr = ret;
3458
+ goto free_memory;
3459
+ }
3460
+ }
3461
+ #endif
3462
+ }
3463
+ }
3464
+ }
3465
+ }
3466
+ }
3467
+ #endif /* } TAUT_TROPOLONE */
3468
+ free_memory:
3469
+ if ( nDfsPathPos ) {
3470
+ inchi_free( nDfsPathPos );
3471
+ }
3472
+ #undef MAX_ALT_PATH_LEN
3473
+ }
3474
+ #endif /* } FIND_RING_SYSTEMS */
3475
+ exit_function:
3476
+ return nErr < 0? nErr : tot_changes;
3477
+ }
3478
+
3479
+ /******************************************************************************/
3480
+ int free_t_group_info( T_GROUP_INFO *t_group_info )
3481
+ {
3482
+ if ( t_group_info ) {
3483
+ if ( t_group_info->t_group ) {
3484
+ inchi_free( t_group_info->t_group );
3485
+ }
3486
+ if ( t_group_info->nEndpointAtomNumber ) {
3487
+ inchi_free( t_group_info->nEndpointAtomNumber );
3488
+ }
3489
+ if ( t_group_info->tGroupNumber ) {
3490
+ inchi_free( t_group_info->tGroupNumber );
3491
+ }
3492
+ if ( t_group_info->nIsotopicEndpointAtomNumber ) {
3493
+ inchi_free( t_group_info->nIsotopicEndpointAtomNumber );
3494
+ }
3495
+ memset( t_group_info, 0, sizeof(*t_group_info));
3496
+ }
3497
+ return 0;
3498
+ }
3499
+
3500
+ /*******************************************************************************/
3501
+ /**/
3502
+ int make_a_copy_of_t_group_info( T_GROUP_INFO *t_group_info, T_GROUP_INFO *t_group_info_orig )
3503
+ {
3504
+ int err = 0, len;
3505
+ free_t_group_info( t_group_info );
3506
+ if ( t_group_info_orig && t_group_info ) {
3507
+ if ( (len=t_group_info_orig->max_num_t_groups) > 0 ) {
3508
+ if (t_group_info->t_group =
3509
+ (T_GROUP*)inchi_malloc( len * sizeof(t_group_info->t_group[0]))) {
3510
+ memcpy(t_group_info->t_group,
3511
+ t_group_info_orig->t_group,
3512
+ len * sizeof(t_group_info->t_group[0]));
3513
+ } else {
3514
+ err ++;
3515
+ }
3516
+ }
3517
+ if ( (len = t_group_info_orig->nNumEndpoints) > 0 ) {
3518
+ if (t_group_info->nEndpointAtomNumber =
3519
+ (AT_NUMB*)inchi_malloc( len * sizeof(t_group_info->nEndpointAtomNumber[0]))) {
3520
+ memcpy(t_group_info->nEndpointAtomNumber,
3521
+ t_group_info_orig->nEndpointAtomNumber,
3522
+ len * sizeof(t_group_info->nEndpointAtomNumber[0]));
3523
+ } else {
3524
+ err ++;
3525
+ }
3526
+ }
3527
+ if ( (len = t_group_info_orig->num_t_groups) > 0 ) {
3528
+ if (t_group_info->tGroupNumber =
3529
+ (AT_NUMB*)inchi_malloc( len * TGSO_TOTAL_LEN * sizeof(t_group_info->tGroupNumber[0]))) {
3530
+ memcpy(t_group_info->tGroupNumber,
3531
+ t_group_info_orig->tGroupNumber,
3532
+ len * TGSO_TOTAL_LEN * sizeof(t_group_info->tGroupNumber[0]));
3533
+ } else {
3534
+ err ++;
3535
+ }
3536
+ }
3537
+ if ( (len = t_group_info_orig->nNumIsotopicEndpoints) > 0 ) {
3538
+ if (t_group_info->nIsotopicEndpointAtomNumber =
3539
+ (AT_NUMB*)inchi_malloc( len * sizeof(t_group_info->nIsotopicEndpointAtomNumber[0]))) {
3540
+ memcpy(t_group_info->nIsotopicEndpointAtomNumber,
3541
+ t_group_info_orig->nIsotopicEndpointAtomNumber,
3542
+ len * sizeof(t_group_info->nIsotopicEndpointAtomNumber[0]));
3543
+ } else {
3544
+ err ++;
3545
+ }
3546
+ }
3547
+ if ( !err ) {
3548
+ t_group_info->nNumEndpoints = t_group_info_orig->nNumEndpoints;
3549
+ t_group_info->num_t_groups = t_group_info_orig->num_t_groups;
3550
+ t_group_info->max_num_t_groups = t_group_info_orig->max_num_t_groups;
3551
+ t_group_info->bIgnoreIsotopic = t_group_info_orig->bIgnoreIsotopic;
3552
+ t_group_info->nNumIsotopicEndpoints = t_group_info_orig->nNumIsotopicEndpoints;
3553
+ t_group_info->tni = t_group_info_orig->tni;
3554
+ /*
3555
+ t_group_info->nNumRemovedExplicitH = t_group_info_orig->nNumRemovedExplicitH;
3556
+ t_group_info->nNumRemovedProtons = t_group_info_orig->nNumRemovedProtons;
3557
+ t_group_info->bNormalizationFlags = t_group_info_orig->bNormalizationFlags;
3558
+ */
3559
+ /*
3560
+ t_group_info->bHardAddedRemovedProtons = t_group_info_orig->bHardAddedRemovedProtons;
3561
+ t_group_info->bSimpleAddedRemovedProtons = t_group_info_orig->bSimpleAddedRemovedProtons;
3562
+ t_group_info->nNumCanceledCharges = t_group_info_orig->nNumCanceledCharges;
3563
+ */
3564
+ }
3565
+ t_group_info->bTautFlags = t_group_info_orig->bTautFlags;
3566
+ t_group_info->bTautFlagsDone = t_group_info_orig->bTautFlagsDone;
3567
+ }
3568
+ return err;
3569
+ }
3570
+ /*******************************************************************************/
3571
+ /* set tautomer group isotopic sort keys */
3572
+ int set_tautomer_iso_sort_keys( T_GROUP_INFO *t_group_info )
3573
+ {
3574
+ T_GROUP *t_group;
3575
+ T_GROUP_ISOWT Mult = 1;
3576
+ int i, j, num_t_groups, num_iso_t_groups = 0;
3577
+ if ( !t_group_info || !(t_group = t_group_info->t_group) ||
3578
+ 0 >= (num_t_groups = t_group_info->num_t_groups) || t_group_info->nNumIsotopicEndpoints )
3579
+ return 0;
3580
+ for ( i = 0; i < num_t_groups; i ++ ) {
3581
+ t_group[i].iWeight = 0;
3582
+ j = T_NUM_ISOTOPIC - 1;
3583
+ Mult = 1;
3584
+ do {
3585
+ t_group[i].iWeight += Mult * (T_GROUP_ISOWT)t_group[i].num[T_NUM_NO_ISOTOPIC+j];
3586
+ } while ( --j >= 0 && (Mult *= T_GROUP_ISOWT_MULT) );
3587
+ num_iso_t_groups += (t_group[i].iWeight != 0);
3588
+ }
3589
+ return num_iso_t_groups;
3590
+ }
3591
+
3592
+ /******************************************************************************
3593
+ *
3594
+ * Fill t_group_info with information necessary to fill out tautomer part
3595
+ * of the linear connection table record.
3596
+ * Note: on input, t_group_info should contain information created by MarkTautomerGroups()
3597
+ * No previous t_group_info adjustment due to throwing out disconnected parts of
3598
+ * the chemical structure is needed.
3599
+ *
3600
+ * Note2: throw out t_groups containing negative charges only (IGNORE_TGROUP_WITHOUT_H==1)
3601
+ * (leave their tautomeric bonds unchanged)
3602
+ * Note3: remove negative charges from other tautomeric groups
3603
+ * and adjust counts of mobile atoms if permitted (REMOVE_TGROUP_CHARGE==1)
3604
+ */
3605
+ int CountTautomerGroups( sp_ATOM *at, int num_atoms, T_GROUP_INFO *t_group_info )
3606
+ {
3607
+ int i, j, ret = 0, nNumEndpoints, max_t_group, num_groups_noH;
3608
+
3609
+ AT_NUMB nGroupNumber, nNewGroupNumber, *nCurrEndpointAtNoPos = NULL;
3610
+
3611
+ T_GROUP *t_group;
3612
+ int num_t;
3613
+ /* int bIgnoreIsotopic, max_num_t; */
3614
+ AT_NUMB *nTautomerGroupNumber = NULL;
3615
+ AT_NUMB *nEndpointAtomNumber = NULL;
3616
+ AT_NUMB *tGroupNumber = NULL;
3617
+
3618
+ if ( !t_group_info || !t_group_info->t_group || 0 >= t_group_info->max_num_t_groups ) {
3619
+ return 0; /* empty t-groups */
3620
+ }
3621
+ num_t = t_group_info->num_t_groups;
3622
+ t_group = t_group_info->t_group;
3623
+ /*
3624
+ max_num_t = t_group_info->max_num_t_groups;
3625
+ bIgnoreIsotopic = t_group_info->bIgnoreIsotopic;
3626
+ */
3627
+ num_groups_noH = 0;
3628
+
3629
+ /* the following 2 arrays are to be rebuilt here */
3630
+ if ( t_group_info->nEndpointAtomNumber ) {
3631
+ inchi_free ( t_group_info->nEndpointAtomNumber );
3632
+ t_group_info->nEndpointAtomNumber = NULL;
3633
+ }
3634
+ if ( t_group_info->tGroupNumber ) {
3635
+ inchi_free ( t_group_info->tGroupNumber );
3636
+ t_group_info->tGroupNumber = NULL;
3637
+ }
3638
+ /* find max_t_group */
3639
+ for ( i = 0, max_t_group = 0; i < t_group_info->num_t_groups; i ++ ) {
3640
+ if ( max_t_group < t_group[i].nGroupNumber )
3641
+ max_t_group = t_group[i].nGroupNumber;
3642
+ }
3643
+ /* allocate memory for temp storage of numbers of endpoints */
3644
+ if ( max_t_group &&
3645
+ !(nTautomerGroupNumber = (AT_NUMB*) inchi_calloc( max_t_group+1, sizeof(nTautomerGroupNumber[0]) ) /*temp*/ ) ) {
3646
+ goto err_exit_function; /* program error: out of RAM */ /* <BRKPT> */
3647
+ }
3648
+
3649
+ /* count endpoints for each tautomer group */
3650
+ for ( i = 0, nNumEndpoints = 0; i < num_atoms; i ++ ) {
3651
+ if ( (j = at[i].endpoint) == 0 )
3652
+ continue;
3653
+ if ( j > max_t_group ) /* debug only */
3654
+ goto err_exit_function; /* program error */ /* <BRKPT> */
3655
+ nTautomerGroupNumber[j] ++;
3656
+ nNumEndpoints ++;
3657
+ }
3658
+
3659
+ if ( !nNumEndpoints ) {
3660
+ goto exit_function; /* not a tautomer */
3661
+ }
3662
+
3663
+ /* allocate temporary array */
3664
+ if ( !(nEndpointAtomNumber = (AT_NUMB*) inchi_calloc( nNumEndpoints, sizeof(nEndpointAtomNumber[0]) ) ) ||
3665
+ !(nCurrEndpointAtNoPos = (AT_NUMB*) inchi_calloc( num_t, sizeof(nCurrEndpointAtNoPos[0]) ) /*temp*/ ) ) {
3666
+ goto err_exit_function; /* program error: out of RAM */ /* <BRKPT> */
3667
+ }
3668
+ /*
3669
+ * Remove missing endpoints from t_group. Since only one
3670
+ * disconnected part is processed, some endpoints groups may have disappeared.
3671
+ * Mark t_groups containing charges only for subsequent removal
3672
+ */
3673
+ for ( i = 0, nNewGroupNumber = 0; i < num_t; /*i ++*/ ) {
3674
+ int bNoH = 0, nNumH;
3675
+ nGroupNumber = t_group[i].nGroupNumber;
3676
+ for ( j = 1, nNumH = t_group[i].num[0]; j < T_NUM_NO_ISOTOPIC; j ++ ) {
3677
+ nNumH -= (int)t_group[i].num[j];
3678
+ }
3679
+ if ( t_group[i].nNumEndpoints != nTautomerGroupNumber[(int)nGroupNumber]
3680
+ #if( IGNORE_TGROUP_WITHOUT_H == 1 )
3681
+ || (bNoH = (t_group[i].num[0]==t_group[i].num[1])) /* only for (H,-) t-groups; (+) t-groups are not removed */
3682
+ #endif
3683
+ ) {
3684
+ if ( !nTautomerGroupNumber[(int)nGroupNumber] || bNoH ) {
3685
+ /* the group belongs to another disconnected part of the structure or has only charges */
3686
+ /* Remove the group */
3687
+ num_t --;
3688
+ if ( i < num_t )
3689
+ memmove( t_group+i, t_group+i+1, (num_t-i)*sizeof(t_group[0]) );
3690
+ if ( bNoH ) {
3691
+ /* group contains no mobile hydrogen atoms, only charges. Prepare to remove it. */
3692
+ nTautomerGroupNumber[(int)nGroupNumber] = 0;
3693
+ num_groups_noH ++;
3694
+ }
3695
+ /*i --;*/
3696
+ } else {
3697
+ /* different number of endpoints */
3698
+ goto err_exit_function; /* program error */ /* <BRKPT> */
3699
+ }
3700
+ } else {
3701
+ /* renumber t_group and prepare to renumber at[i].endpoint */
3702
+ nTautomerGroupNumber[(int)nGroupNumber] =
3703
+ t_group[i].nGroupNumber = ++nNewGroupNumber; /* = i+1 */
3704
+ /* get first group atom orig. number position in the nEndpointAtomNumber[] */
3705
+ /* and in the tautomer endpoint canon numbers part of the connection table */
3706
+ t_group[i].nFirstEndpointAtNoPos = nCurrEndpointAtNoPos[i] =
3707
+ i? (t_group[i-1].nFirstEndpointAtNoPos+t_group[i-1].nNumEndpoints) : 0;
3708
+ t_group[i].num[0] = nNumH;
3709
+ #if( REMOVE_TGROUP_CHARGE == 1 )
3710
+ t_group[i].num[1] = 0; /* remove only (-) charges */
3711
+ #endif
3712
+ /* -- wrong condition. Disabled.
3713
+ if ( t_group[i].nGroupNumber != i + 1 ) { // for debug only
3714
+ goto err_exit_function; // program error
3715
+ }
3716
+ */
3717
+ i ++;
3718
+ }
3719
+ }
3720
+ if ( num_t != nNewGroupNumber ) { /* for debug only */
3721
+ goto err_exit_function; /* program error */ /* <BRKPT> */
3722
+ }
3723
+
3724
+ /* check if any tautomer group was left */
3725
+ if ( !nNewGroupNumber ) {
3726
+ if ( !num_groups_noH )
3727
+ goto err_exit_function; /* program error: not a tautomer */ /* <BRKPT> */
3728
+ else
3729
+ goto exit_function;
3730
+ }
3731
+ /*
3732
+ * an array for tautomer group sorting later, at the time of storing Connection Table
3733
+ * Later the sorting consists out of 2 steps:
3734
+ * 1) Sort t_group[i].nNumEndpoints endpoint atom ranks within each endpoint group
3735
+ * starting from t_group[i].nFirstEndpointAtNoPos; i = 0..t_group_info->num_t_groups-1
3736
+ * 2) Sort the groups indexes t_group_info->tGroupNumber[]
3737
+ */
3738
+ if ( !(tGroupNumber=
3739
+ (AT_NUMB*)inchi_calloc(nNewGroupNumber*TGSO_TOTAL_LEN, sizeof(tGroupNumber[0])))) {
3740
+ goto err_exit_function; /* out of RAM */
3741
+ }
3742
+ for ( i = 0; i < nNewGroupNumber; i ++ ) {
3743
+ tGroupNumber[i] = (AT_NUMB)i; /* initialization: original t_group number = (at[i]->endpoint-1) */
3744
+ }
3745
+ /*
3746
+ * renumber endpoint atoms and save their orig. atom
3747
+ * numbers for filling out the tautomer part of the LinearCT.
3748
+ * nCurrEndpointAtNoPos[j] is an index of the atom number in the nEndpointAtomNumber[]
3749
+ */
3750
+ for ( i = 0; i < num_atoms; i ++ ) {
3751
+ if ( j = (int)at[i].endpoint ) {
3752
+ j = (int)(at[i].endpoint = nTautomerGroupNumber[j])-1; /* new t_group number */
3753
+ if ( j >= 0 ) { /* j=-1 in case of no mobile hydrogen atoms (charges only), group being removed */
3754
+ if ( nCurrEndpointAtNoPos[j] >= /* debug only */
3755
+ t_group[j].nFirstEndpointAtNoPos+t_group[j].nNumEndpoints ) {
3756
+ goto err_exit_function; /* program error */ /* <BRKPT> */
3757
+ }
3758
+ nEndpointAtomNumber[(int)nCurrEndpointAtNoPos[j] ++] = (AT_NUMB)i;
3759
+ } else {
3760
+ nNumEndpoints --; /* endpoint has been removed */
3761
+ }
3762
+ }
3763
+ }
3764
+ t_group_info->num_t_groups = nNewGroupNumber;
3765
+ t_group_info->nNumEndpoints = nNumEndpoints;
3766
+ t_group_info->nEndpointAtomNumber = nEndpointAtomNumber;
3767
+ t_group_info->tGroupNumber = tGroupNumber; /* only the 1st segment filled */
3768
+ inchi_free ( nTautomerGroupNumber );
3769
+ inchi_free ( nCurrEndpointAtNoPos );
3770
+ return nNumEndpoints + T_GROUP_HDR_LEN * nNewGroupNumber + 1; /* nLenLinearCTTautomer */
3771
+
3772
+ err_exit_function:
3773
+ ret = CT_TAUCOUNT_ERR;
3774
+ exit_function:
3775
+ /* release allocated memory; set "no tautomeric group" */
3776
+ if ( nEndpointAtomNumber )
3777
+ inchi_free ( nEndpointAtomNumber );
3778
+ if ( nTautomerGroupNumber )
3779
+ inchi_free ( nTautomerGroupNumber );
3780
+ if ( tGroupNumber )
3781
+ inchi_free ( tGroupNumber );
3782
+ if ( nCurrEndpointAtNoPos )
3783
+ inchi_free ( nCurrEndpointAtNoPos );
3784
+ t_group_info->nNumEndpoints = 0;
3785
+ t_group_info->num_t_groups = 0;
3786
+ if ( !ret && ((t_group_info->tni.bNormalizationFlags & FLAG_NORM_CONSIDER_TAUT) ||
3787
+ t_group_info->nNumIsotopicEndpoints>1 && (t_group_info->bTautFlagsDone & (TG_FLAG_FOUND_ISOTOPIC_H_DONE | TG_FLAG_FOUND_ISOTOPIC_ATOM_DONE))) ) {
3788
+ ret = 1; /* only protons have been (re)moved or neitralization happened */
3789
+ }
3790
+ return ret;
3791
+ }
3792
+ /**************************************************************
3793
+ * tautomers: Compare for sorting
3794
+ ******************************************************************/
3795
+ /* Compare for sorting Ranks only */
3796
+ /* Globals: pn_tRankForSort */
3797
+ int CompRankTautomer(const void* a1, const void* a2 )
3798
+ {
3799
+ int ret = (int)pn_tRankForSort[(int)(*(const AT_RANK*)a1)] -
3800
+ (int)pn_tRankForSort[(int)(*(const AT_RANK*)a2)];
3801
+ return ret;
3802
+ }
3803
+ /*********************************************************************/
3804
+ int SortTautomerGroupsAndEndpoints( T_GROUP_INFO *t_group_info, int num_atoms, int num_at_tg, AT_RANK *nRank )
3805
+ {
3806
+ int i, nFirstEndpointAtNoPos, nNumEndpoints;
3807
+ AT_NUMB *nEndpointAtomNumber;
3808
+ int num_t_groups = num_at_tg - num_atoms;
3809
+ T_GROUP *t_group = NULL;
3810
+ /* check if sorting is required */
3811
+
3812
+ if ( num_t_groups <= 0 || t_group_info->nNumEndpoints < 2 ) {
3813
+ return 0; /* no tautomer data */
3814
+ }
3815
+ t_group = t_group_info->t_group;
3816
+ /* sort endpoints within the groups */
3817
+ for ( i = 0; i < num_t_groups; i ++ ) {
3818
+ if ( t_group[i].nNumEndpoints < 2 )
3819
+ continue; /* program error; should not happen */ /* <BRKPT> */
3820
+ /* set globals for sorting */
3821
+ nFirstEndpointAtNoPos = t_group[i].nFirstEndpointAtNoPos;
3822
+ nNumEndpoints = t_group[i].nNumEndpoints;
3823
+ if ( nNumEndpoints + nFirstEndpointAtNoPos > t_group_info->nNumEndpoints ) { /* for debug only */
3824
+ return CT_TAUCOUNT_ERR; /* program error */ /* <BRKPT> */
3825
+ }
3826
+ nEndpointAtomNumber = t_group_info->nEndpointAtomNumber+(int)nFirstEndpointAtNoPos;
3827
+ pn_tRankForSort = nRank;
3828
+ insertions_sort( nEndpointAtomNumber, nNumEndpoints, sizeof(nEndpointAtomNumber[0]), CompRankTautomer);
3829
+ }
3830
+ /* sort the tautomeric groups according to their ranks only
3831
+ (that is, ignoring the isotopic composition of the mobile groups and ranks of the endpoints) */
3832
+ if ( t_group_info->num_t_groups > 1 ) {
3833
+ /* set globals for sorting */
3834
+ /* a hack: the ranks of all tautomeric groups are */
3835
+ /* located at nRank[num_atoms..num_at_tg-1] */
3836
+ pn_tRankForSort = nRank+num_atoms;
3837
+ /* sort */
3838
+ /* ordering numbers to sort : t_group_info->tGroupNumber; */
3839
+ insertions_sort( t_group_info->tGroupNumber, num_t_groups,
3840
+ sizeof(t_group_info->tGroupNumber[0]), CompRankTautomer);
3841
+ }
3842
+ return t_group_info->num_t_groups;
3843
+ }