rino 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. data/README +44 -0
  2. data/Rakefile +123 -0
  3. data/ext/extconf.rb +26 -0
  4. data/ext/ruby_inchi_main.so +0 -0
  5. data/ext/src/aux2atom.h +2786 -0
  6. data/ext/src/comdef.h +148 -0
  7. data/ext/src/e_0dstereo.c +3014 -0
  8. data/ext/src/e_0dstereo.h +31 -0
  9. data/ext/src/e_comdef.h +57 -0
  10. data/ext/src/e_ctl_data.h +147 -0
  11. data/ext/src/e_ichi_io.c +498 -0
  12. data/ext/src/e_ichi_io.h +40 -0
  13. data/ext/src/e_ichi_parms.c +37 -0
  14. data/ext/src/e_ichi_parms.h +41 -0
  15. data/ext/src/e_ichicomp.h +50 -0
  16. data/ext/src/e_ichierr.h +40 -0
  17. data/ext/src/e_ichimain.c +593 -0
  18. data/ext/src/e_ichisize.h +43 -0
  19. data/ext/src/e_inchi_atom.c +75 -0
  20. data/ext/src/e_inchi_atom.h +33 -0
  21. data/ext/src/e_inpdef.h +41 -0
  22. data/ext/src/e_mode.h +706 -0
  23. data/ext/src/e_mol2atom.c +649 -0
  24. data/ext/src/e_readinch.c +58 -0
  25. data/ext/src/e_readmol.c +54 -0
  26. data/ext/src/e_readmol.h +180 -0
  27. data/ext/src/e_readstru.c +251 -0
  28. data/ext/src/e_readstru.h +33 -0
  29. data/ext/src/e_util.c +284 -0
  30. data/ext/src/e_util.h +61 -0
  31. data/ext/src/extr_ct.h +251 -0
  32. data/ext/src/ichi.h +206 -0
  33. data/ext/src/ichi_bns.c +7999 -0
  34. data/ext/src/ichi_bns.h +231 -0
  35. data/ext/src/ichican2.c +5000 -0
  36. data/ext/src/ichicano.c +2195 -0
  37. data/ext/src/ichicano.h +49 -0
  38. data/ext/src/ichicans.c +1625 -0
  39. data/ext/src/ichicant.h +379 -0
  40. data/ext/src/ichicomn.h +260 -0
  41. data/ext/src/ichicomp.h +50 -0
  42. data/ext/src/ichidrp.h +119 -0
  43. data/ext/src/ichierr.h +124 -0
  44. data/ext/src/ichiisot.c +101 -0
  45. data/ext/src/ichilnct.c +286 -0
  46. data/ext/src/ichimain.h +132 -0
  47. data/ext/src/ichimak2.c +1189 -0
  48. data/ext/src/ichimake.c +3812 -0
  49. data/ext/src/ichimake.h +205 -0
  50. data/ext/src/ichimap1.c +851 -0
  51. data/ext/src/ichimap2.c +2856 -0
  52. data/ext/src/ichimap4.c +1609 -0
  53. data/ext/src/ichinorm.c +741 -0
  54. data/ext/src/ichinorm.h +67 -0
  55. data/ext/src/ichiparm.c +45 -0
  56. data/ext/src/ichiparm.h +1441 -0
  57. data/ext/src/ichiprt1.c +3612 -0
  58. data/ext/src/ichiprt2.c +1511 -0
  59. data/ext/src/ichiprt3.c +3011 -0
  60. data/ext/src/ichiqueu.c +1003 -0
  61. data/ext/src/ichiring.c +326 -0
  62. data/ext/src/ichiring.h +49 -0
  63. data/ext/src/ichisize.h +35 -0
  64. data/ext/src/ichisort.c +539 -0
  65. data/ext/src/ichister.c +3538 -0
  66. data/ext/src/ichister.h +35 -0
  67. data/ext/src/ichitaut.c +3843 -0
  68. data/ext/src/ichitaut.h +387 -0
  69. data/ext/src/ichitime.h +74 -0
  70. data/ext/src/inchi_api.h +670 -0
  71. data/ext/src/inchi_dll.c +1480 -0
  72. data/ext/src/inchi_dll.h +34 -0
  73. data/ext/src/inchi_dll_main.c +23 -0
  74. data/ext/src/inchi_dll_main.h +31 -0
  75. data/ext/src/inpdef.h +328 -0
  76. data/ext/src/lreadmol.h +1246 -0
  77. data/ext/src/mode.h +706 -0
  78. data/ext/src/ruby_inchi_main.c +558 -0
  79. data/ext/src/runichi.c +4179 -0
  80. data/ext/src/strutil.c +3861 -0
  81. data/ext/src/strutil.h +182 -0
  82. data/ext/src/util.c +1130 -0
  83. data/ext/src/util.h +85 -0
  84. data/lib/clean_tempfile.rb +220 -0
  85. data/lib/rino.rb +111 -0
  86. data/test/test.rb +386 -0
  87. metadata +130 -0
@@ -0,0 +1,3843 @@
1
+ /*
2
+ * International Union of Pure and Applied Chemistry (IUPAC)
3
+ * International Chemical Identifier (InChI)
4
+ * Version 1
5
+ * Software version 1.00
6
+ * April 13, 2005
7
+ * Developed at NIST
8
+ */
9
+
10
+ #include <stdio.h>
11
+ #include <stdlib.h>
12
+ #include <string.h>
13
+
14
+ #include "mode.h"
15
+
16
+ #include "inpdef.h"
17
+ #include "extr_ct.h"
18
+ #include "inpdef.h"
19
+ #include "ichitaut.h"
20
+ #include "ichinorm.h"
21
+ #include "ichicant.h"
22
+ #include "ichicomn.h"
23
+
24
+ #include "ichicomp.h"
25
+
26
+ #include "util.h"
27
+
28
+ #include "ichi_bns.h"
29
+ /* local prototypes */
30
+ int SetTautomericBonds( inp_ATOM *at, int nNumBondPos, T_BONDPOS *BondPos );
31
+ int CompRankTautomer(const void* a1, const void* a2 );
32
+ int RegisterEndPoints( T_GROUP_INFO *t_group_info, /* T_GROUP *t_group, int *pnum_t, int max_num_t,*/
33
+ T_ENDPOINT *EndPoint, int nNumEndPoints, inp_ATOM *at, int num_atoms, C_GROUP_INFO *cgi
34
+ , struct BalancedNetworkStructure *pBNS );
35
+ int cmpTGroupNumber( const void *a1, const void *a2 );
36
+ int comp_candidates( const void *a1, const void *a2 );
37
+ int MoveEndpoint( inp_ATOM *at, S_CANDIDATE *s_candidate, AT_NUMB endpoint, AT_NUMB *nTGroupNewNumbers,
38
+ AT_NUMB *nTGroupPosition, int nNewTGroupOrd, T_GROUP_INFO *t_group_info);
39
+
40
+ int FindAccessibleEndPoints( T_ENDPOINT *EndPoint, int *nNumEndPoints, T_BONDPOS *BondPos, int *nNumBondPos,
41
+ struct BalancedNetworkStructure *pBNS, struct BalancedNetworkData *pBD,
42
+ inp_ATOM *at, int num_atoms, C_GROUP_INFO *cgi );
43
+
44
+ /* bits for GetChargeType */
45
+
46
+ #define C_SUBTYPE_CHARGED 0
47
+ #define C_SUBTYPE_p_DONOR 1 /* new */
48
+ #define C_SUBTYPE_p_ACCEPT 2 /* new */
49
+ #define C_SUBTYPE_H_ACCEPT 4
50
+ #define C_SUBTYPE_H_DONOR 8
51
+ #define C_SUBTYPE_NEUTRAL 16
52
+
53
+
54
+ /* internal stack array size */
55
+ #define MAX_STACK_ARRAY_LEN 127
56
+ #define MAX_TGROUP_ARRAY_LEN 127
57
+
58
+ /* local prototypes */
59
+ int GetChargeType( inp_ATOM *atom, int iat, S_CHAR *cChargeSubtype );
60
+ int GetNeutralRepsIfNeeded( AT_NUMB *pri, AT_NUMB *prj, inp_ATOM *at, int num_atoms, T_ENDPOINT *EndPoint, int nNumEndPoints, C_GROUP_INFO *cgi );
61
+ int bCanBeACPoint( inp_ATOM *at, S_CHAR cCharge, S_CHAR cChangeValence, S_CHAR neutral_bonds_valence,
62
+ S_CHAR neutral_valence, S_CHAR nEndpointValence, S_CHAR *cChargeSubtype );
63
+ int CmpCCandidates( const void *a1, const void *a2 );
64
+ int RegisterCPoints( C_GROUP *c_group, int *pnum_c, int max_num_c, T_GROUP_INFO *t_group_info,
65
+ int point1, int point2, int ctype, inp_ATOM *at, int num_atoms );
66
+ int GetSaltChargeType( inp_ATOM *at, int at_no, T_GROUP_INFO *t_group_info, int *s_subtype );
67
+ int GetOtherSaltChargeType( inp_ATOM *at, int at_no, T_GROUP_INFO *t_group_info, int *s_subtype, int bAccept_O );
68
+ int MergeSaltTautGroupsBlind( inp_ATOM *at, int s_type, int num_atoms, S_GROUP_INFO *s_group_info, int nNumCandidates,
69
+ T_GROUP_INFO *t_group_info, C_GROUP_INFO *c_group_info,
70
+ struct BalancedNetworkStructure *pBNS );
71
+ int ConnectSaltTGroups2SuperTGroup( inp_ATOM *at, int num_atoms, S_GROUP_INFO *s_group_info, int nNumCandidates,
72
+ T_GROUP_INFO *t_group_info, C_GROUP_INFO *c_group_info,
73
+ struct BalancedNetworkStructure *pBNS, int *nNewTGroupNumber, int *vertSuperTGroup );
74
+ int bDoNotMergeNonTautAtom(inp_ATOM *at, int at_no);
75
+ int GetOtherSaltType( inp_ATOM *at, int at_no, int *s_subtype );
76
+
77
+
78
+ /****************************************************************/
79
+ /* tautomers: Sorting globals */
80
+ AT_RANK *pn_tRankForSort;
81
+
82
+ /*************************************************************************************/
83
+ int is_centerpoint_elem( U_CHAR el_number )
84
+ {
85
+ static U_CHAR el_numb[12];
86
+ static int len;
87
+ int i;
88
+ if ( !el_numb[0] && !len ) {
89
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "C" );
90
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "N" );
91
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "P" );
92
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "S" );
93
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "I" );
94
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "As" );
95
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "Sb" );
96
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "Se" );
97
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "Te" );
98
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "Cl" );
99
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "Br" );
100
+ }
101
+ for ( i = 0; i < len; i ++ ) {
102
+ if ( el_numb[i] == el_number ) {
103
+ return 1;
104
+ }
105
+ }
106
+ return 0;
107
+ }
108
+ /*************************************************************************************/
109
+ int is_centerpoint_elem_strict( U_CHAR el_number )
110
+ {
111
+ static U_CHAR el_numb[6];
112
+ static int len;
113
+ int i;
114
+ if ( !el_numb[0] && !len ) {
115
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "C" );
116
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "N" );
117
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "P" );
118
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "As" );
119
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "Sb" );
120
+ }
121
+ for ( i = 0; i < len; i ++ ) {
122
+ if ( el_numb[i] == el_number ) {
123
+ return 1;
124
+ }
125
+ }
126
+ return 0;
127
+ }
128
+ /*************************************************************************************/
129
+ int get_endpoint_valence( U_CHAR el_number )
130
+ {
131
+ static U_CHAR el_numb[6];
132
+ static int len, len2;
133
+ int i;
134
+ if ( !el_numb[0] && !len ) {
135
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "O" );
136
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "S" );
137
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "Se" );
138
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "Te" );
139
+ len2 = len;
140
+ el_numb[len++] = (U_CHAR)get_periodic_table_number( "N" );
141
+ }
142
+ for ( i = 0; i < len; i ++ ) {
143
+ if ( el_numb[i] == el_number ) {
144
+ return i < len2? 2 : 3;
145
+ }
146
+ }
147
+ return 0;
148
+ }
149
+ /********************************************************************************************************/
150
+ int AddAtom2num( AT_RANK num[], inp_ATOM *atom, int at_no, int bSubtract )
151
+ { /* bSubtract: 0=> add, 1=>subtract, 2=> fill */
152
+ inp_ATOM *at = atom + at_no;
153
+ int k;
154
+ int nMobile = (at->charge == -1);
155
+ if ( bSubtract == 1 ) {
156
+ /* 1: subtract */
157
+ num[1] -= nMobile;
158
+ nMobile += at->num_H;
159
+ num[0] -= nMobile;
160
+ for ( k = 0; k < T_NUM_ISOTOPIC; k ++ ) {
161
+ /* T (3H isotope) first because it has higher weight */
162
+ num[T_NUM_NO_ISOTOPIC+k] -= at->num_iso_H[NUM_H_ISOTOPES-k-1];
163
+ }
164
+ } else {
165
+ if ( bSubtract == 2 ) {
166
+ /* fill */
167
+ memset( num, 0, (T_NUM_NO_ISOTOPIC + T_NUM_ISOTOPIC)*sizeof(num[0]) );
168
+ }
169
+ /* else (0): add */
170
+ num[1] += nMobile;
171
+ nMobile += at->num_H;
172
+ num[0] += nMobile;
173
+ for ( k = 0; k < T_NUM_ISOTOPIC; k ++ ) {
174
+ /* T (3H isotope) first because it has higher weight */
175
+ num[T_NUM_NO_ISOTOPIC+k] += at->num_iso_H[NUM_H_ISOTOPES-k-1];
176
+ }
177
+ }
178
+ return nMobile;
179
+ }
180
+ /********************************************************************************************************/
181
+ void AddAtom2DA( AT_RANK num_DA[], inp_ATOM *atom, int at_no, int bSubtract )
182
+ { /* bSubtract: 0=> add, 1=>subtract, 2=> fill */
183
+ inp_ATOM *at = atom + at_no;
184
+ int nDelta, nAcidic_O;
185
+
186
+ if (at->charge < -1 || at->charge == 1 && !at->c_point || at->charge > 1 )
187
+ return;
188
+
189
+ nDelta = ( bSubtract == 1 )? -1 : 1;
190
+
191
+ /* "Acidic" O, S, Se, Te recognition */
192
+ if ( at->at_type & ATT_ACIDIC_CO ) {
193
+ nAcidic_O = nDelta;
194
+ } else {
195
+ nAcidic_O = 0;
196
+ }
197
+
198
+ if ( bSubtract == 2 ) { /* 2: fill, otherwise add */
199
+ memset( num_DA, 0, TG_NUM_DA * sizeof(num_DA[0]) );
200
+ }
201
+ if ( at->charge <= 0 && at->valence == at->chem_bonds_valence ||
202
+ /* neutral or negative donor */
203
+ at->charge > 0 && at->valence + 1 == at->chem_bonds_valence
204
+ /* positively charged donor */
205
+ ) {
206
+ if ( at->charge < 0 ) {
207
+ num_DA[TG_Num_dM] += nDelta;
208
+ num_DA[TG_Num_dO] += nAcidic_O;
209
+ } else
210
+ if ( at->num_H ) {
211
+ num_DA[TG_Num_dH] += nDelta;
212
+ num_DA[TG_Num_dO] += nAcidic_O;
213
+ }
214
+ } else
215
+ if ( at->charge <= 0 && at->valence + 1 == at->chem_bonds_valence ||
216
+ at->charge > 0 && at->valence + 2 == at->chem_bonds_valence ) {
217
+ /* acceptor */
218
+ if ( at->charge < 0 ) {
219
+ num_DA[TG_Num_aM] += nDelta;
220
+ } else
221
+ if ( at->num_H ) {
222
+ num_DA[TG_Num_aH] += nDelta;
223
+ } else {
224
+ num_DA[TG_Num_aO] += nAcidic_O; /* acidic O-acceptor has no H or charge */
225
+ }
226
+ }
227
+ return;
228
+ }
229
+ /********************************************************************************************************/
230
+ int AddEndPoint( T_ENDPOINT *pEndPoint, inp_ATOM *at, int iat )
231
+ {
232
+ pEndPoint->nAtomNumber = iat;
233
+ pEndPoint->nEquNumber = 0;
234
+ pEndPoint->nGroupNumber = at[iat].endpoint;
235
+ if ( at[iat].endpoint ) {
236
+ /* already an endpoint */
237
+ memset( pEndPoint->num, 0, sizeof(pEndPoint->num) );
238
+ } else {
239
+ /* not an endpoint yet, make it an endpoint */
240
+ AddAtom2num( pEndPoint->num, at, iat, 2 ); /* fill */
241
+ AddAtom2DA( pEndPoint->num_DA, at, iat, 2 );
242
+ /*
243
+ nMobile = pEndPoint->num[1] = (at[iat].charge == -1);
244
+ nMobile = pEndPoint->num[0] = at[iat].num_H + nMobile;
245
+ for ( k = 0; k < T_NUM_ISOTOPIC; k ++ ) {
246
+ pEndPoint->num[T_NUM_NO_ISOTOPIC+k] = at[iat].num_iso_H[NUM_H_ISOTOPES-k-1];
247
+ }
248
+ */
249
+ }
250
+ return 0;
251
+ }
252
+ /********************************************************************************************************/
253
+ int nGetEndpointInfo( inp_ATOM *atom, int iat, ENDPOINT_INFO *eif )
254
+ {
255
+ int nEndpointValence;
256
+ int nMobile;
257
+ S_CHAR cChargeSubtype;
258
+
259
+ if ( atom[iat].radical && atom[iat].radical != RADICAL_SINGLET )
260
+ return 0; /* a radical */
261
+ if ( !(nEndpointValence = get_endpoint_valence( atom[iat].el_number )) )
262
+ return 0; /* not an endpoint */
263
+ if ( nEndpointValence <= atom[iat].valence )
264
+ return 0; /* not an endpoint, for example >N(+)< or >N< or >O(+)- or >O- or >N- or -O- */
265
+
266
+ if ( atom[iat].charge == -1 || atom[iat].charge == 0 ) {
267
+ /* not a positive charge-point */
268
+ if ( nEndpointValence < atom[iat].chem_bonds_valence )
269
+ return 0; /* abnormal valence > standard endpoint valence */
270
+ nMobile = atom[iat].num_H + (atom[iat].charge == -1);
271
+ if ( nMobile + atom[iat].chem_bonds_valence != nEndpointValence )
272
+ return 0; /* non-standard endpoint valence */
273
+ switch ( atom[iat].chem_bonds_valence - atom[iat].valence ) {
274
+ case 0:
275
+ eif->cDonor = 1;
276
+ eif->cAcceptor = 0;
277
+ break;
278
+ case 1:
279
+ eif->cDonor = 0;
280
+ eif->cAcceptor = 1;
281
+ break;
282
+ default:
283
+ return 0;
284
+ }
285
+ eif->cMobile = nMobile;
286
+ eif->cNeutralBondsValence = nEndpointValence-nMobile;
287
+ eif->cMoveableCharge = 0;
288
+ return nEndpointValence;
289
+ } else
290
+ if ( atom[iat].c_point &&
291
+ 0 <= GetChargeType( atom, iat, &cChargeSubtype ) &&
292
+ ((int)cChargeSubtype & (C_SUBTYPE_H_ACCEPT|C_SUBTYPE_H_DONOR))
293
+ ) {
294
+ /* charge-point */
295
+ if ( cChargeSubtype & C_SUBTYPE_H_ACCEPT ) {
296
+ eif->cDonor = 0;
297
+ eif->cAcceptor = 1;
298
+ } else
299
+ if ( cChargeSubtype & C_SUBTYPE_H_DONOR ) {
300
+ eif->cDonor = 1;
301
+ eif->cAcceptor = 0;
302
+ } else {
303
+ return 0;
304
+ }
305
+ eif->cMobile = atom[iat].num_H;
306
+ eif->cNeutralBondsValence = nEndpointValence-atom[iat].num_H;
307
+ eif->cMoveableCharge = atom[iat].charge;
308
+ return nEndpointValence;
309
+ }
310
+ return 0;
311
+ }
312
+ /********************************************************************************************************/
313
+ /* RegisterEndPoints ret>0 => new registration happened, 0 => no changes, -1 => program error (debug) */
314
+ int RegisterEndPoints( T_GROUP_INFO *t_group_info, /* T_GROUP *t_group, int *pnum_t, int max_num_t,*/
315
+ T_ENDPOINT *EndPoint, int nNumEndPoints, inp_ATOM *at, int num_atoms, C_GROUP_INFO *cgi
316
+ , struct BalancedNetworkStructure *pBNS )
317
+ {
318
+ T_GROUP *t_group = t_group_info->t_group;
319
+ int *pnum_t = &t_group_info->num_t_groups;
320
+ int max_num_t = t_group_info->max_num_t_groups;
321
+ int nNumZeroEqu, nNumNewTGroups;
322
+ AT_NUMB group, prev_group, prev_eqnum, nNextGroupNumber, nLeastGroupNumber;
323
+ int nNumGroups, num_t, difference;
324
+ int i, j, k, ret;
325
+ AT_NUMB nNewTgNumberStackArray[MAX_STACK_ARRAY_LEN+1];
326
+ AT_NUMB nGroupNumberStackArray[MAX_STACK_ARRAY_LEN+1];
327
+ AT_NUMB nGroupNewNumberStackArray[MAX_STACK_ARRAY_LEN+1];
328
+ AT_NUMB *nNewTgNumber = nNewTgNumberStackArray;
329
+ AT_NUMB *nGroupNumber = nGroupNumberStackArray;
330
+ AT_NUMB *nGroupNewNumber = nGroupNewNumberStackArray;
331
+
332
+ if ( nNumEndPoints <= 0 )
333
+ return 0; /* nothing to do */
334
+ num_t = *pnum_t;
335
+ difference = 0;
336
+ nNextGroupNumber = 0;
337
+ nNumZeroEqu = 0;
338
+ ret = 0;
339
+ /* find max group number; increment it to obtain next available group number */
340
+ for ( i = 0; i < num_t; i ++ ) {
341
+ if ( nNextGroupNumber < t_group[i].nGroupNumber )
342
+ nNextGroupNumber = t_group[i].nGroupNumber;
343
+ }
344
+ nNextGroupNumber ++;
345
+
346
+ /* find min non-zero group number nLeastGroupNumber;
347
+ count zero EndPoint[i].nEquNumber
348
+ if all EndPoint[i].nGroupNumber are equal and non-zero then exit: nothing to do.
349
+ */
350
+ nLeastGroupNumber = nNextGroupNumber;
351
+ prev_group = EndPoint[0].nGroupNumber;
352
+ prev_eqnum = EndPoint[0].nEquNumber;
353
+ for ( i = j = k = 0; i < nNumEndPoints; i ++ ) {
354
+ if ( group = EndPoint[i].nGroupNumber ) {
355
+ if ( group < nLeastGroupNumber ) {
356
+ nLeastGroupNumber = group;
357
+ }
358
+ }
359
+ j += (prev_group == EndPoint[i].nGroupNumber); /* count endpoints that belong to the 1st group */
360
+ k += (prev_eqnum == EndPoint[i].nEquNumber); /* count endpoints that belongo to a group equivalent to the 1st group */
361
+ nNumZeroEqu += !EndPoint[i].nEquNumber; /* count endpoints that have been processed by FindAccessibleEndPoints() */
362
+ }
363
+ if ( j == nNumEndPoints && prev_group && k == nNumEndPoints ) {
364
+ /* all endpoints already belong to one t-group;
365
+ the last comparison is not needed for now
366
+ because EndPoint[i].nEquNumber cannot make
367
+ endpont partitioning finer
368
+ */
369
+ return 0;
370
+ }
371
+
372
+ nNumNewTGroups = 0;
373
+
374
+ if ( !nNumZeroEqu ) {
375
+ /* EndPoint[] has been processed by FindAccessibleEndPoints;
376
+ * equal EndPoint[i].nEquNumber mark endpoints belonging to
377
+ * the same t-group
378
+ * Since now the next available t-group number, nNextGroupNumber,
379
+ * is known,replace fict. IDs assigned by FindAccessibleEndPoints
380
+ * with correct new t-group numbers.
381
+ */
382
+ for ( i = 0; i < nNumEndPoints; i ++ ) {
383
+ if ( (group = EndPoint[i].nEquNumber) >= nNextGroupNumber ) {
384
+ /* replace fict. IDs assigned by FindAccessibleEndPoints() with new t-group numbers */
385
+ /* these fict. IDs have values = (num_atoms+1), (num_atoms+2),...; they may be non-contiguous */
386
+ for ( j = 0; j < nNumNewTGroups; j ++ ) {
387
+ if ( group == nGroupNewNumber[j] )
388
+ break;
389
+ }
390
+ if ( j == nNumNewTGroups ) {
391
+ /* found new fict. ID = group */
392
+ if ( j == MAX_STACK_ARRAY_LEN && nGroupNewNumber == nGroupNewNumberStackArray ) {
393
+ /* stack array overflow; allocate more memory than may be needed */
394
+ nGroupNewNumber = (AT_NUMB *)inchi_malloc(nNumEndPoints*sizeof(nGroupNewNumber[0]));
395
+ if ( !nGroupNewNumber ) {
396
+ ret = -1;
397
+ goto exit_function;
398
+ }
399
+ memcpy( nGroupNewNumber, nGroupNewNumberStackArray, nNumNewTGroups*sizeof(nGroupNewNumber[0]));
400
+ }
401
+ /* save newly found fict. t-group ID to compare to the next values of EndPoint[].nEquNumber */
402
+ nGroupNewNumber[j] = group;
403
+ nNumNewTGroups ++;
404
+ }
405
+ EndPoint[i].nEquNumber = nNextGroupNumber + j;
406
+ }
407
+ } /* after this point the values just stored in nGroupNewNumber[] will not
408
+ be used. However, the obtained nNumNewTGroups value will be used */
409
+ } else
410
+ if ( nNumZeroEqu == nNumEndPoints ) {
411
+ /* EndPoint[] has NOT been processed by FindAccessibleEndPoints;
412
+ all atoms and t-groups to which endpoints belong should be merged into a single t-group
413
+ */
414
+ if ( nLeastGroupNumber == nNextGroupNumber ) {
415
+ /* flag to create a new t-group: none of the found
416
+ * endpoints belong to an already known t-group
417
+ */
418
+ nNumNewTGroups = 1; /* otherwise 0 */
419
+ }
420
+ /* All EndPoint[*].nEquNumber are zeroes. All endpoints will
421
+ * belong to one new or old t-group; its ID is nLeastGroupNumber.
422
+ * Set EndPoint[i].nEquNumber = nLeastGroupNumber;
423
+ */
424
+ for ( i = 0; i < nNumEndPoints; i ++ ) {
425
+ EndPoint[i].nEquNumber = nLeastGroupNumber;
426
+ }
427
+ } else {
428
+ ret = -1; /* program error: only some of EndPoint[i].nEquNumber are zero */ /* <BRKPT> */
429
+ goto exit_function;
430
+ }
431
+
432
+ if ( nNumNewTGroups ) {
433
+ /* create new nNumNewTGroups t-group(s) */
434
+ if ( num_t + nNumNewTGroups > max_num_t ) {
435
+ ret = -1; /* found too many t-groups */ /* <BRKPT> */
436
+ goto exit_function;
437
+ }
438
+ /* initialize new t-group(s) */
439
+ memset( t_group + num_t, 0, nNumNewTGroups * sizeof(t_group[0]) );
440
+ for ( i = 0; i < nNumNewTGroups; i ++ ) {
441
+ t_group[num_t+i].nGroupNumber = nNextGroupNumber + i;
442
+ }
443
+ }
444
+
445
+ /* At this point:
446
+ * EndPoint[i].nGroupNumber == 0 => the endpoint atom does not belong to a t-group yet
447
+ * EndPoint[i].nGroupNumber > 0 => current t-group ID of the endpoint atom
448
+ * EndPoint[i].nEquNumber --> new ID of a tautomeric group of this endpoint atom
449
+ * EndPoint[i].nAtomNumber --> number of the endpoint atom
450
+ */
451
+
452
+ nNumGroups = 0; /* counts the groups to be renumbered */
453
+ for ( i = j = 0; i < nNumEndPoints; i ++ ) {
454
+ if ( group = EndPoint[i].nGroupNumber ) {
455
+ if ( group == EndPoint[i].nEquNumber ) {
456
+ continue; /* ignore: the endpoint belongs to the same t-group as before */
457
+ }
458
+ /* save information for renumbering of the existing t-groups */
459
+ for ( j = 0; j < nNumGroups; j ++ ) {
460
+ if ( group == nGroupNumber[j] ) {
461
+ if ( EndPoint[i].nEquNumber != nGroupNewNumber[j] ) {
462
+ ret = -1; /* program error */ /* <BRKPT> */
463
+ goto exit_function;
464
+ }
465
+ break;
466
+ }
467
+ }
468
+ if ( j == nNumGroups ) {
469
+ /* discovered a new t-group number; store it together with its nEquNumber */
470
+ if ( j == MAX_STACK_ARRAY_LEN ) {
471
+ if ( nGroupNewNumber == nGroupNewNumberStackArray ) {
472
+ nGroupNewNumber = (AT_NUMB *)inchi_malloc(nNumEndPoints*sizeof(nGroupNewNumber[0]));
473
+ if ( !nGroupNewNumber ) {
474
+ ret = -1;
475
+ goto exit_function;
476
+ }
477
+ memcpy( nGroupNewNumber, nGroupNewNumberStackArray, nNumGroups*sizeof(nGroupNewNumber[0]));
478
+ }
479
+ if ( nGroupNumber == nGroupNumberStackArray ) {
480
+ nGroupNumber = (AT_NUMB *)inchi_malloc(nNumEndPoints*sizeof(nGroupNumber[0]));
481
+ if ( !nGroupNumber ) {
482
+ ret = -1;
483
+ goto exit_function;
484
+ }
485
+ memcpy( nGroupNumber, nGroupNumberStackArray, nNumGroups*sizeof(nGroupNumber[0]));
486
+ }
487
+ }
488
+
489
+ nGroupNumber[j] = group; /* old t-group ID */
490
+ nGroupNewNumber[j] = EndPoint[i].nEquNumber; /* new t-group ID */
491
+ nNumGroups ++;
492
+ }
493
+ } else {
494
+ /* add a new endpoint to the newly created or previously existing t-groups */
495
+ group = EndPoint[i].nEquNumber;
496
+ if ( group >= nNextGroupNumber ) {
497
+ /* get index of a new t-group from equ number */
498
+ j = num_t + group - nNextGroupNumber; /* newly assigned IDs are contiguous */
499
+ } else {
500
+ /* old t-group */
501
+ if ( j >= num_t || group != t_group[j].nGroupNumber ) {
502
+ /* search only if j is not a needed group index */
503
+ for ( j = 0; j < num_t; j ++ ) {
504
+ if ( group == t_group[j].nGroupNumber )
505
+ break;
506
+ }
507
+ if ( j == num_t ) {
508
+ ret = -1; /* program error: t-group not found */ /* <BRKPT> */
509
+ goto exit_function;
510
+ }
511
+ }
512
+ }
513
+ /* add aton to existing or new t-group */
514
+ t_group[j].nNumEndpoints ++;
515
+ for ( k = 0; k < (int)(sizeof(t_group->num)/sizeof(t_group->num[0])); k ++ )
516
+ t_group[j].num[k] += EndPoint[i].num[k];
517
+ for ( k = 0; k < (int)(sizeof(t_group->num_DA)/sizeof(t_group->num_DA[0])); k ++ )
518
+ t_group[j].num_DA[k] += EndPoint[i].num_DA[k];
519
+ /* mark endpoint */
520
+ at[EndPoint[i].nAtomNumber].endpoint = group;
521
+ difference ++;
522
+ }
523
+ }
524
+
525
+ difference += nNumGroups;
526
+ num_t += nNumNewTGroups;
527
+ if ( !difference ) {
528
+ ret = 0; /* nothing to do. Not necessarily a program error: happens if all EndPoint[i].nGroupNumber==EndPoint[i].nEquNumber */
529
+ goto exit_function;
530
+ }
531
+
532
+ if ( nNumGroups ) {
533
+ /* prepare for renumbering: find max t-group number */
534
+ for ( i = 0, nNextGroupNumber = 0; i < num_t; i ++ ) {
535
+ if ( nNextGroupNumber < t_group[i].nGroupNumber ) {
536
+ nNextGroupNumber = t_group[i].nGroupNumber;
537
+ }
538
+ }
539
+ }
540
+ /* renumber and merge t-groups */
541
+ for ( i = 0; i < nNumGroups; i ++ ) {
542
+ int i1, i2;
543
+ AT_NUMB group1 = nGroupNumber[i];
544
+ AT_NUMB group2 = nGroupNewNumber[i];
545
+ /* add group1 to group2, then delete group1. */
546
+ for ( j = 0, i1 = i2 = -1; j < num_t && (i1 < 0 || i2 < 0); j ++ ) {
547
+ if ( i1 < 0 && group1 == t_group[j].nGroupNumber )
548
+ i1 = j;
549
+ if ( i2 < 0 && group2 == t_group[j].nGroupNumber )
550
+ i2 = j;
551
+ }
552
+ if ( i1 < 0 || i2 < 0 ) {
553
+ ret = -1; /* program error */ /* <BRKPT> */
554
+ goto exit_function;
555
+ }
556
+ /* add t_group[i1] to t_group[i2] and remove t_group[i1] */
557
+ for ( k = 0; k < (int)(sizeof(t_group->num)/sizeof(t_group->num[0])); k ++ )
558
+ t_group[i2].num[k] += t_group[i1].num[k];
559
+ for ( k = 0; k < (int)(sizeof(t_group->num_DA)/sizeof(t_group->num_DA[0])); k ++ )
560
+ t_group[i2].num_DA[k] += t_group[i1].num_DA[k];
561
+ t_group[i2].nNumEndpoints += t_group[i1].nNumEndpoints;
562
+ num_t --;
563
+ if ( num_t > i1 ) {
564
+ memmove( t_group+i1, t_group+i1+1, ( num_t - i1)*sizeof(t_group[0]) );
565
+ }
566
+ }
567
+
568
+ if ( nNumGroups ) {
569
+ /* there are groups to merge */
570
+ if ( nNextGroupNumber >= MAX_STACK_ARRAY_LEN ) {
571
+ nNewTgNumber = (AT_NUMB *)inchi_malloc((nNextGroupNumber+1)*sizeof(*nNewTgNumber));
572
+ if ( !nNewTgNumber ) {
573
+ ret = -1;
574
+ goto exit_function; /* error: out of RAM */
575
+ }
576
+ }
577
+ memset( nNewTgNumber, 0, (nNextGroupNumber+1)*sizeof(*nNewTgNumber) );
578
+ for ( i = 0; i < num_t; i ++ ) {
579
+ nNewTgNumber[t_group[i].nGroupNumber] = i+1; /* new t-group numbers */
580
+ }
581
+ for ( j = 0; j < nNumGroups; j ++ ) {
582
+ if ( !nNewTgNumber[nGroupNumber[j]] && nNewTgNumber[nGroupNewNumber[j]] ) {
583
+ nNewTgNumber[nGroupNumber[j]] = nNewTgNumber[nGroupNewNumber[j]];
584
+ } else {
585
+ ret = -1; /* program error: all new numbers must have been marked */
586
+ goto exit_function;
587
+ }
588
+ }
589
+ /* renumber t-groups */
590
+ for ( i = 0; i < num_t; i ++ ) {
591
+ t_group[i].nGroupNumber = nNewTgNumber[t_group[i].nGroupNumber];
592
+ }
593
+ #if( bRELEASE_VERSION != 1 )
594
+ /* Check: debug only */
595
+ for ( i = 1; i < num_t; i ++ ) {
596
+ if ( 1 != t_group[i].nGroupNumber - t_group[i-1].nGroupNumber ) {
597
+ ret = -1; /* debug */
598
+ goto exit_function;
599
+ }
600
+ }
601
+ #endif
602
+ /* renumber endpoints */
603
+ for ( i = 0; i < num_atoms; i ++ ) {
604
+ if ( group = at[i].endpoint ) {
605
+ if ( !(at[i].endpoint = nNewTgNumber[group]) || nNextGroupNumber <= nNewTgNumber[group] ) {
606
+ ret = -1; /* program error */
607
+ goto exit_function;
608
+ }
609
+ }
610
+ }
611
+ }
612
+ if ( nNewTgNumber != nNewTgNumberStackArray ) {
613
+ inchi_free( nNewTgNumber );
614
+ nNewTgNumber = nNewTgNumberStackArray;
615
+ }
616
+ if ( nGroupNumber != nGroupNumberStackArray ) {
617
+ inchi_free(nGroupNumber);
618
+ nGroupNumber = nGroupNumberStackArray;
619
+ }
620
+ if ( nGroupNewNumber != nGroupNewNumberStackArray ) {
621
+ inchi_free( nGroupNewNumber );
622
+ nGroupNewNumber = nGroupNewNumberStackArray;
623
+ }
624
+ if ( !t_group_info->tGroupNumber ) {
625
+ t_group_info->tGroupNumber = (AT_NUMB *)inchi_malloc(2*max_num_t*sizeof(t_group_info->tGroupNumber[0]));
626
+ if ( !t_group_info->tGroupNumber ) {
627
+ ret = -1;
628
+ goto exit_function;
629
+ }
630
+ }
631
+ /* fill out t-group index 2004-02-27 */
632
+ memset( t_group_info->tGroupNumber, 0, 2*max_num_t*sizeof(t_group_info->tGroupNumber[0]) );
633
+ for ( i = 0; i < num_t; i ++ ) {
634
+ if ( t_group[i].nNumEndpoints && t_group[i].nGroupNumber )
635
+ t_group_info->tGroupNumber[t_group[i].nGroupNumber] = i+1;
636
+ }
637
+
638
+ if ( pBNS && (pBNS->tot_st_cap == pBNS->tot_st_flow || ALWAYS_ADD_TG_ON_THE_FLY) ) {
639
+ T_GROUP_INFO tgi;
640
+ int ret_bns;
641
+ memset( &tgi, 0, sizeof(tgi) );
642
+ tgi.num_t_groups = num_t;
643
+ tgi.t_group = t_group;
644
+ /* reinitialize BN Structure */
645
+ ret_bns = ReInitBnStruct( pBNS, at, num_atoms, 0 );
646
+ if ( IS_BNS_ERROR( ret_bns ) ) {
647
+ return ret_bns;
648
+ }
649
+ if ( *pBNS->pbTautFlags & TG_FLAG_MOVE_POS_CHARGES ) {
650
+ /* set new charge groups */
651
+ ret_bns = AddCGroups2BnStruct( pBNS, at, num_atoms, cgi );
652
+ if ( IS_BNS_ERROR( ret_bns ) ) {
653
+ return ret_bns;
654
+ }
655
+ }
656
+ /* set new tautomeric groups */
657
+ ret_bns = AddTGroups2BnStruct( pBNS, at, num_atoms, &tgi );
658
+ if ( IS_BNS_ERROR( ret_bns ) ) {
659
+ return ret_bns;
660
+ }
661
+ }
662
+
663
+ *pnum_t = num_t;
664
+ return difference;
665
+
666
+ exit_function:
667
+ if ( nNewTgNumber != nNewTgNumberStackArray ) {
668
+ inchi_free( nNewTgNumber );
669
+ }
670
+ if ( nGroupNumber != nGroupNumberStackArray ) {
671
+ inchi_free(nGroupNumber);
672
+ }
673
+ if ( nGroupNewNumber != nGroupNewNumberStackArray ) {
674
+ inchi_free( nGroupNewNumber );
675
+ }
676
+ return ret;
677
+ }
678
+ /*******************************************************************************************************
679
+ * change non-alternating and non-tautomeric bonds
680
+ * (that is, single and double bonds) to tautomeric
681
+ */
682
+ int SetTautomericBonds( inp_ATOM *at, int nNumBondPos, T_BONDPOS *BondPos )
683
+ {
684
+ int k, n;
685
+ for ( k = n = 0; k < nNumBondPos; k ++ ) {
686
+ int neighbor_index = BondPos[k].neighbor_index;
687
+ int center = BondPos[k].nAtomNumber;
688
+ int bond_mark = at[center].bond_type[neighbor_index];
689
+ int bond_type = bond_mark & ~BOND_MARK_ALL;
690
+ int neighbor;
691
+ #if( REPLACE_ALT_WITH_TAUT == 1 )
692
+ if ( bond_type != BOND_TAUTOM )
693
+ #else
694
+ if ( bond_type != BOND_ALTERN && bond_type != BOND_TAUTOM )
695
+ #endif
696
+ {
697
+ int ii;
698
+ /* change bond type to BOND_TAUTOM presering higher bits marks */
699
+ bond_type = (bond_mark & BOND_MARK_ALL) | BOND_TAUTOM;
700
+ /* change center-neighbor bond */
701
+ at[center].bond_type[neighbor_index] = bond_type;
702
+ neighbor = at[center].neighbor[neighbor_index];
703
+ for ( ii = 0; ii < at[neighbor].valence; ii ++ ) {
704
+ if ( at[neighbor].neighbor[ii] == center ) {
705
+ /* neighbor-center bond found */
706
+ at[neighbor].bond_type[ii] = bond_type;
707
+ break;
708
+ }
709
+ }
710
+ n ++;
711
+ }
712
+ }
713
+ return n;
714
+ }
715
+
716
+ /********************************************************************************************************/
717
+ int GetNeutralRepsIfNeeded( AT_NUMB *pri, AT_NUMB *prj, inp_ATOM *at, int num_atoms, T_ENDPOINT *EndPoint, int nNumEndPoints, C_GROUP_INFO *cgi )
718
+ {
719
+ AT_NUMB ri = *pri;
720
+ AT_NUMB rj = *prj;
721
+ int i, k;
722
+ AT_NUMB c_point, endpoint, r;
723
+
724
+ if ( (c_point = at[ri].c_point) && (c_point == at[rj].c_point) &&
725
+ (at[ri].charge == 1 || at[rj].charge == 1) && cgi && cgi->num_c_groups > 0 ) {
726
+ /* at[ri] and at[rj] belong to the same charge group, at least one is charged */
727
+ for ( k = 0; k < cgi->num_c_groups; k ++ ) {
728
+ if ( cgi->c_group[k].nGroupNumber == c_point ) {
729
+ /* cgi->c_group[k] is found to be this charge group */
730
+ if ( cgi->c_group[k].num_CPoints - cgi->c_group[k].num[0] < 2 ) {
731
+ /* Only one neutral in the c-group: we will not be able to neutralize both
732
+ when looking for the alt path to discover the tautomerism.
733
+ Therefore we need to find a neutral t-group representative */
734
+ /* at[rj] */
735
+ if ( endpoint = at[rj].endpoint ) {
736
+ for ( i = 0; i < nNumEndPoints; i ++ ) {
737
+ if ( (r=EndPoint[i].nAtomNumber) == *prj )
738
+ continue; /* ignore at[*prj] */
739
+ if ( at[r].endpoint != endpoint )
740
+ continue; /* at[r] does not belong to the same t-group as at[*prj]; ignore the atom */
741
+ if ( !at[r].c_point ) {
742
+ rj = r; /* found a neutral t-group representative */
743
+ break;
744
+ }
745
+ if ( at[r].c_point != c_point && c_point == at[rj].c_point ) {
746
+ /* replace only once because of (c_point == at[rj].c_point) condition */
747
+ rj = r;
748
+ }
749
+ }
750
+ if ( rj == *prj /*&& at[ri].endpoint*/ ) {
751
+ /* !!! "&& at[ri].endpoint": only between 2 t-groups 2004-02-27;
752
+ the change disabled due to undiscovered yet possibility of ambiguity*/
753
+ /* no replacement has been found in EndPoint[]; try all atoms in the t-group */
754
+ for ( i = 0; i < num_atoms; i ++ ) {
755
+ if ( at[i].endpoint != endpoint )
756
+ continue;
757
+ if ( i == (int)*prj )
758
+ continue;
759
+ if ( !at[i].c_point ) {
760
+ rj = (AT_NUMB)i; /* found neutral t-group representative */
761
+ break;
762
+ }
763
+ if ( at[i].c_point != c_point && c_point == at[rj].c_point ) {
764
+ /* replace only once */
765
+ rj = (AT_NUMB)i;
766
+ }
767
+ }
768
+ }
769
+ }
770
+ /* at[ri] */
771
+ if ( endpoint = at[ri].endpoint ) {
772
+ for ( i = 0; i < nNumEndPoints; i ++ ) {
773
+ if ( (r=EndPoint[i].nAtomNumber) == *pri )
774
+ continue;
775
+ if ( at[r].endpoint != endpoint )
776
+ continue;
777
+ if ( !at[r].c_point ) {
778
+ ri = r; /* found neutral t-group representative */
779
+ break;
780
+ }
781
+ if ( at[r].c_point != c_point && c_point == at[ri].c_point &&
782
+ at[r].c_point != at[rj].c_point ) {
783
+ /* replace only once */
784
+ ri = r;
785
+ }
786
+ }
787
+ if ( ri == *pri && at[rj].endpoint ) {
788
+ /* !!! "&& at[rj].endpoint": only between 2 t-groups 2004-02-27;
789
+ the change disabled due to undiscovered yet possibility of ambiguity */
790
+ for ( i = 0; i < num_atoms; i ++ ) {
791
+ if ( at[i].endpoint != endpoint )
792
+ continue;
793
+ if ( i == (int)*pri )
794
+ continue;
795
+ if ( !at[i].c_point ) {
796
+ ri = (AT_NUMB)i; /* found neutral t-group representative */
797
+ break;
798
+ }
799
+ if ( at[i].c_point != c_point && c_point == at[ri].c_point &&
800
+ at[i].c_point != at[rj].c_point) {
801
+ /* replace only once */
802
+ ri = (AT_NUMB)i;
803
+ }
804
+ }
805
+ }
806
+ }
807
+
808
+ }
809
+ }
810
+ break;
811
+ }
812
+ *prj = rj;
813
+ *pri = ri;
814
+ }
815
+ return 0;
816
+ }
817
+
818
+ /********************************************************************************************************/
819
+ int FindAccessibleEndPoints( T_ENDPOINT *EndPoint, int *nNumEndPoints, T_BONDPOS *BondPos, int *nNumBondPos,
820
+ struct BalancedNetworkStructure *pBNS, struct BalancedNetworkData *pBD,
821
+ inp_ATOM *at, int num_atoms, C_GROUP_INFO *cgi )
822
+ {
823
+ AT_NUMB nTGroupRepresenative[MAXVAL], nTGroupEqu[MAXVAL], nTGEndPointNo[MAXVAL], ri, rj;
824
+ AT_NUMB nCurTGroupNumber, nMaxTGroupNumber, nNumTgroupNumbers, nMaxEquNumber;
825
+ int i, j, k, nNumDiffTGroupNumbers = 0, nNumFoundEqu, nErr;
826
+
827
+ if ( *nNumEndPoints != *nNumBondPos )
828
+ return 0;
829
+ /* collect all group numbers. Fill EndPoint[i].nEquNumber */
830
+ for ( i = 0; i < *nNumEndPoints; i ++ ) {
831
+ nCurTGroupNumber = EndPoint[i].nEquNumber = EndPoint[i].nGroupNumber; /* initial equivalence */
832
+ if ( nCurTGroupNumber ) {
833
+ /* found endpoint that already belongs to a t-group */
834
+ for ( j = 0; j < nNumDiffTGroupNumbers; j ++ ) {
835
+ if ( nTGroupEqu[j] == nCurTGroupNumber )
836
+ break;
837
+ }
838
+ if ( j == nNumDiffTGroupNumbers ) {
839
+ nTGroupRepresenative[nNumDiffTGroupNumbers] = EndPoint[i].nAtomNumber;
840
+ nTGroupEqu[nNumDiffTGroupNumbers] = EndPoint[i].nGroupNumber;
841
+ nTGEndPointNo[nNumDiffTGroupNumbers] = i;
842
+ nNumDiffTGroupNumbers ++;
843
+ }
844
+ }
845
+ }
846
+
847
+
848
+ /* check whether each pair belongs to the same t-group and establish the equivalence(s) */
849
+ for ( i = 0, nNumFoundEqu=0; i < nNumDiffTGroupNumbers; i ++ ) {
850
+ for ( j = i+1; j < nNumDiffTGroupNumbers; j ++ ) {
851
+ ri = nTGroupRepresenative[i];
852
+ rj = nTGroupRepresenative[j];
853
+ /* both at[ri] and at[rj] are known to belong to tautomeric groups */
854
+ GetNeutralRepsIfNeeded( &ri, &rj, at, num_atoms, EndPoint, *nNumEndPoints, cgi );
855
+ nErr = bExistsAnyAltPath( pBNS, pBD, at, num_atoms, ri, rj, ALT_PATH_MODE_TAUTOM );
856
+ if ( IS_BNS_ERROR(nErr) )
857
+ return nErr;
858
+ if ( 0 == nErr )
859
+ continue; /* alt path between at[ri] and at[rj] not found */
860
+ nCurTGroupNumber = inchi_min( nTGroupEqu[i], nTGroupEqu[j] );
861
+ nMaxTGroupNumber = inchi_max( nTGroupEqu[i], nTGroupEqu[j] );
862
+ for ( k = 0; k < nNumDiffTGroupNumbers; k ++ ) {
863
+ if ( nTGroupEqu[k]==nMaxTGroupNumber ) {
864
+ nTGroupEqu[k] = nCurTGroupNumber;
865
+ nNumFoundEqu ++;
866
+ }
867
+ }
868
+ for ( k = 0; k < *nNumEndPoints; k ++ ) {
869
+ if ( EndPoint[k].nEquNumber == nMaxTGroupNumber ) {
870
+ EndPoint[k].nEquNumber = nCurTGroupNumber;
871
+ }
872
+ }
873
+ }
874
+ }
875
+ if ( nNumFoundEqu ) {
876
+ /* leave in only non-equivalent representatives */
877
+ for ( i = 1, k = 0; i < nNumDiffTGroupNumbers; i ++ ) {
878
+ for ( j = 0; j < i; j ++ ) {
879
+ if ( nTGroupEqu[j] == nTGroupEqu[i] ) {
880
+ nTGroupEqu[i] = 0; /* i > j; mark equivalent for removal*/
881
+ break;
882
+ }
883
+ }
884
+ }
885
+ for ( i = j = 0; i < nNumDiffTGroupNumbers; i ++ ) {
886
+ if ( nTGroupEqu[i] ) {
887
+ if ( i != j ) { /* remove the marked */
888
+ nTGroupEqu[j] = nTGroupEqu[i];
889
+ nTGroupRepresenative[j] = nTGroupRepresenative[i];
890
+ nTGEndPointNo[j] = nTGEndPointNo[i];
891
+ }
892
+ j ++;
893
+ }
894
+ }
895
+ nNumDiffTGroupNumbers = j; /* number of known t-group representatives */
896
+ }
897
+ /* collect endpoints that have not been assigned to t-groups */
898
+ for ( i = 0, j = nNumDiffTGroupNumbers; i < *nNumEndPoints; i ++ ) {
899
+ if ( EndPoint[i].nEquNumber )
900
+ continue;
901
+ nTGroupEqu[j] = 0;
902
+ nTGroupRepresenative[j] = EndPoint[i].nAtomNumber;
903
+ nTGEndPointNo[j] = i;
904
+ j ++;
905
+
906
+ }
907
+ nNumTgroupNumbers = j;
908
+ nMaxEquNumber = num_atoms + 1; /* impossible atom or t-group number */
909
+
910
+ /* check whether each pair belongs to the same group and establish the equivalence(s) */
911
+ for ( i = 0, nNumFoundEqu=0; i < nNumTgroupNumbers; i ++ ) {
912
+ for ( j = i+1; j < nNumTgroupNumbers; j ++ ) {
913
+ if ( nTGroupEqu[i] != nTGroupEqu[j] && (i>=nNumDiffTGroupNumbers || j>=nNumDiffTGroupNumbers) ||
914
+ /* equivalence of a t-group and a non-t-group atom */
915
+ !nTGroupEqu[i] && !nTGroupEqu[j]
916
+ /* equivalence of two non-t-group atoms */
917
+ ) {
918
+ ri = nTGroupRepresenative[i];
919
+ rj = nTGroupRepresenative[j];
920
+ /*------------------------------!!!---------------------------------------------
921
+ Explanation why GetNeutralRepsIfNeeded() may need to be changed 2004-02-27
922
+ The change has been disabled due to undiscovered yet possibility of ambiguity
923
+ to search for neutral only among EndPoint[] in case taut-not_taut pairs
924
+
925
+ Counterexample: O=C-NH(+)=C-NH2
926
+ 1 2 3
927
+
928
+ Has already been found: 2-3 (+)-charge exchange
929
+ 1-2 tautomerism (charge removed to 3)
930
+ Now testing: 2-3 tautomerism. If not commented out,
931
+ GetNeutralRepsIfNeeded() would replace 2-3 test with 1-3 test because:
932
+ o Charge group has only one neutral and both 2 and 3 belong to it,
933
+ therefore we cannot neutralize both; search for neutral representative;
934
+ o Since 1 and 2 belong to the same t-group and 1 is neutral,
935
+ test 1-3 instead of 2-3.
936
+ This breaks our condition:
937
+ Test tautomeric H movement only between neutral atoms.
938
+ -----------------------------------------------------------------------------*/
939
+ GetNeutralRepsIfNeeded( &ri, &rj, at, num_atoms, EndPoint, *nNumEndPoints, cgi );
940
+
941
+ nErr = bExistsAnyAltPath( pBNS, pBD, at, num_atoms, ri, rj, ALT_PATH_MODE_TAUTOM );
942
+ if ( IS_BNS_ERROR(nErr) )
943
+ return nErr;
944
+ if ( nErr <= 0 )
945
+ continue;
946
+ if ( nTGroupEqu[i] && nTGroupEqu[j] ) {
947
+ /* found equivalence of two t-groups; at least one of them must be a new one */
948
+ nCurTGroupNumber = inchi_min( nTGroupEqu[i], nTGroupEqu[j] );
949
+ nMaxTGroupNumber = inchi_max( nTGroupEqu[i], nTGroupEqu[j] );
950
+ for ( k = 0; k < nNumTgroupNumbers; k ++ ) {
951
+ if ( nTGroupEqu[k]==nMaxTGroupNumber ) {
952
+ nTGroupEqu[k] = nCurTGroupNumber;
953
+ nNumFoundEqu ++;
954
+ }
955
+ }
956
+ for ( k = 0; k < *nNumEndPoints; k ++ ) {
957
+ if ( EndPoint[k].nEquNumber == nMaxTGroupNumber ) {
958
+ EndPoint[k].nEquNumber = nCurTGroupNumber;
959
+ }
960
+ }
961
+ } else
962
+ if ( nTGroupEqu[i] ) { /* extend existing t-group */
963
+ nTGroupEqu[j] = nTGroupEqu[i];
964
+ EndPoint[nTGEndPointNo[j]].nEquNumber = nTGroupEqu[i];
965
+
966
+ } else
967
+ if ( nTGroupEqu[j] ) { /* extend existing t-group */
968
+ nTGroupEqu[i] = nTGroupEqu[j];
969
+ EndPoint[nTGEndPointNo[i]].nEquNumber = nTGroupEqu[j];
970
+
971
+ } else { /* establis a new t-group */
972
+ nTGroupEqu[i] =
973
+ nTGroupEqu[j] = nMaxEquNumber; /* assign a fict. ID to establish equivalence */
974
+ EndPoint[nTGEndPointNo[i]].nEquNumber =
975
+ EndPoint[nTGEndPointNo[j]].nEquNumber = nMaxEquNumber;
976
+ nMaxEquNumber ++;
977
+ }
978
+ }
979
+ }
980
+ }
981
+ /* eliminate endpoints and bonds that do not belong to t-group(s)
982
+ (they have not been found connected by an alt path to any other endpoint)
983
+ */
984
+ for ( i = 0, j = 0; i < *nNumEndPoints; i ++ ) {
985
+ if ( EndPoint[i].nEquNumber ) {
986
+ #if( IGNORE_SINGLE_ENDPOINTS == 1 ) /* 1-28-2003 */
987
+ for ( k = 0, nNumFoundEqu = 0; k < *nNumEndPoints; k ++ ) {
988
+ nNumFoundEqu += (EndPoint[i].nEquNumber == EndPoint[k].nEquNumber);
989
+ }
990
+ if ( nNumFoundEqu <= 1 ) { /* one time it is equal to itself when i == k above */
991
+ /* if EndPoint[i] is not "equivalent" to any other EndPoint then ignore it */
992
+ continue;
993
+ }
994
+ #endif
995
+ if ( i != j ) { /* save endpoints that are found to be connected to other endpoints by alt paths */
996
+ EndPoint[j] = EndPoint[i];
997
+ BondPos[j] = BondPos[i];
998
+ }
999
+ j ++;
1000
+ }
1001
+ }
1002
+
1003
+ #if( IGNORE_SINGLE_ENDPOINTS != 1 ) /* 1-28-2003 */
1004
+ /* Do not allow a centerpoint to have only one tautomeric bond */
1005
+ /* Hack: we may have only one centerpoint */
1006
+ /* BondPos[*].nAtomNumber are centerpoints */
1007
+ if ( j == 1 ) {
1008
+ /* check if there exist other centerpoint neighbors
1009
+ * connected to it by another tautomeric-bond
1010
+ */
1011
+ for ( i = 0, k = 0; i < at[BondPos[0].nAtomNumber].valence; i ++ ) {
1012
+ k += ( i != BondPos[0].neighbor_index &&
1013
+ BOND_TAUTOM == (at[BondPos[0].nAtomNumber].bond_type[i] & ~BOND_MARK_ALL));
1014
+ }
1015
+ if ( !k ) {
1016
+ j = 0;
1017
+ }
1018
+ }
1019
+ #endif
1020
+
1021
+ *nNumEndPoints = *nNumBondPos = j;
1022
+ return j;
1023
+
1024
+ }
1025
+
1026
+ /*#if( MOVE_CHARGES == 1 ) */ /* { */
1027
+ /********************************************************************************************************/
1028
+
1029
+ /**********************************************/
1030
+ /* */
1031
+ /* definitions for positive ion recognition */
1032
+ /* */
1033
+ /**********************************************/
1034
+
1035
+ typedef struct tagChargeType { /* meaning see in bCanBeACPoint() */
1036
+ char elname[3];
1037
+ S_CHAR charge;
1038
+ S_CHAR neutral_valence;
1039
+ S_CHAR neutral_bonds_valence; /* valence of a neutral atom */
1040
+ S_CHAR cChangeValence; /* charge increases valence by this value */
1041
+ S_CHAR cChargeType; /* different types are treated separately */
1042
+ S_CHAR num_bonds; /* added 02-06-2005 */
1043
+ } CHARGE_TYPE;
1044
+
1045
+ CHARGE_TYPE CType[] = {
1046
+ { "N\0", 1, 3, 3, 1, 0, 0 },
1047
+ { "P\0", 1, 3, 3, 1, 1, 0 },
1048
+ #if( ADD_MOVEABLE_O_PLUS == 1 )
1049
+ { "O\0", 1, 2, 2, 1, 2, 2 }, /* added 02-06-2005 */
1050
+ { "S\0", 1, 2, 2, 1, 3, 2 }, /* added 03-18-2005 */
1051
+ { "Se", 1, 2, 2, 1, 4, 2 }, /* added 03-18-2005 */
1052
+ { "Te", 1, 2, 2, 1, 5, 2 }, /* added 03-18-2005 */
1053
+ #endif
1054
+ };
1055
+
1056
+ /* bits */
1057
+
1058
+ #define C_SUBTYPE_CHARGED 0
1059
+ #define C_SUBTYPE_p_DONOR 1 /* new */
1060
+ #define C_SUBTYPE_p_ACCEPT 2 /* new */
1061
+ #define C_SUBTYPE_H_ACCEPT 4
1062
+ #define C_SUBTYPE_H_DONOR 8
1063
+ #define C_SUBTYPE_NEUTRAL 16
1064
+
1065
+ /* make sure any C_SUBTYPE_CHARGED_... < any C_SUBTYPE_NEUTRAL_... */
1066
+ /* charged */
1067
+ #define C_SUBTYPE_CHARGED_NON_TAUT (C_SUBTYPE_CHARGED)
1068
+ #define C_SUBTYPE_CHARGED_p_DONOR (C_SUBTYPE_CHARGED|C_SUBTYPE_p_DONOR)
1069
+ #define C_SUBTYPE_CHARGED_H_ACCEPT (C_SUBTYPE_CHARGED|C_SUBTYPE_H_ACCEPT)
1070
+ #define C_SUBTYPE_CHARGED_H_ACCEPT_p_DONOR (C_SUBTYPE_CHARGED|C_SUBTYPE_H_ACCEPT|C_SUBTYPE_p_DONOR)
1071
+ #define C_SUBTYPE_CHARGED_H_DONOR (C_SUBTYPE_CHARGED|C_SUBTYPE_H_DONOR |C_SUBTYPE_p_DONOR)
1072
+ /* neutral */
1073
+ #define C_SUBTYPE_NEUTRAL_NON_TAUT (C_SUBTYPE_NEUTRAL)
1074
+ #define C_SUBTYPE_NEUTRAL_H_ACCEPT (C_SUBTYPE_NEUTRAL|C_SUBTYPE_H_ACCEPT)
1075
+ #define C_SUBTYPE_NEUTRAL_H_ACCEPT_p_ACCEPT (C_SUBTYPE_NEUTRAL|C_SUBTYPE_H_ACCEPT|C_SUBTYPE_p_ACCEPT)
1076
+ #define C_SUBTYPE_NEUTRAL_H_DONOR (C_SUBTYPE_NEUTRAL|C_SUBTYPE_H_DONOR)
1077
+
1078
+ #define NUM_C_TYPES (int)(sizeof( CType )/sizeof(CType[0]))
1079
+
1080
+
1081
+ /********************************************************************************************************/
1082
+ int bCanBeACPoint( inp_ATOM *at, S_CHAR cCharge, S_CHAR cChangeValence, S_CHAR neutral_bonds_valence,
1083
+ S_CHAR neutral_valence, S_CHAR nEndpointValence, S_CHAR *cChargeSubtype )
1084
+ {
1085
+ int nChangeValence;
1086
+ int nNumBonds;
1087
+ int nBondsValence;
1088
+ int bNegCharge = (at->charge == -1); /* add fict. bonds to (-) 2004-02-24*/
1089
+ if ( at->charge == cCharge && at->valence == at->chem_bonds_valence && at->num_H ) {
1090
+ /* proton donors candidates >NH(+)-, >NH2(+), -NH3(+), >OH(+), -OH2(+) */
1091
+ /* charged, added p-transfer -- 01-28-2004 */
1092
+ nChangeValence = at->charge * cChangeValence; /* +1 or -1; currently only +1 */
1093
+ nBondsValence = at->chem_bonds_valence + at->num_H;
1094
+ if ( nBondsValence == neutral_bonds_valence + nChangeValence && nEndpointValence ) {
1095
+ *cChargeSubtype = C_SUBTYPE_CHARGED_p_DONOR; /* ignore Phosphorus p-donors for now */
1096
+ }
1097
+ return 0;
1098
+ } else
1099
+ if ( at->charge == cCharge && at->valence < at->chem_bonds_valence ) {
1100
+ /* the requirement at->valence < at->chem_bonds_valence rejects
1101
+ candidates >NH(+)-, >NH2(+), -NH3(+), >N(+)<, >OH(+), -OH2(+), >O(+)-
1102
+ Moveable charge requires double bonds; these ions have no double bonds
1103
+ */
1104
+
1105
+ /* charged */
1106
+ nChangeValence = at->charge * cChangeValence; /* +1 or -1; currently only +1 */
1107
+ nBondsValence = at->chem_bonds_valence + at->num_H;
1108
+ nNumBonds = at->valence + at->num_H;
1109
+ if ( nBondsValence == neutral_bonds_valence + nChangeValence ) { /* known valence */
1110
+ if ( nNumBonds == neutral_valence ) {
1111
+ /* non-tautomeric: >N(+)=, =O(+)-
1112
+ possibly tautomeric donor: =NH(+)-, =NH2(+), =OH(+) */
1113
+ if ( at->valence == neutral_valence || !nEndpointValence ) {
1114
+ /* non-tautomeric: >N(+)=, =O(+)-; any suitable P+: >P(+)=, =PH(+)-, =PH2(+) */
1115
+ *cChargeSubtype = C_SUBTYPE_CHARGED_NON_TAUT;
1116
+ } else {
1117
+ /* possibly tautomeric donor: =NH(+)-, =NH2(+), =OH(+) */
1118
+ *cChargeSubtype = C_SUBTYPE_CHARGED_H_DONOR;
1119
+ }
1120
+ return 1;
1121
+ }
1122
+ if ( nNumBonds == neutral_valence - 1 ) {
1123
+ /* possibly tutomeric acceptor: =N(+)=, #N(+)-, #NH(+), #O(+) */
1124
+ if ( nEndpointValence ) {
1125
+ *cChargeSubtype = at->num_H? C_SUBTYPE_CHARGED_H_ACCEPT_p_DONOR : C_SUBTYPE_CHARGED_H_ACCEPT;
1126
+ } else {
1127
+ /* =P(+)=, #P(+)-, #PH(+) */
1128
+ *cChargeSubtype = C_SUBTYPE_CHARGED_NON_TAUT;
1129
+ }
1130
+ return 1; /* charge type, charged */
1131
+ }
1132
+ }
1133
+
1134
+ } else
1135
+ if ( at->charge == 0 || bNegCharge ) {
1136
+ /* neutral atom or anion, all bonds are single */
1137
+ nBondsValence = at->chem_bonds_valence + at->num_H + bNegCharge; /* add fict. bonds to (-) 2004-02-24*/
1138
+ nNumBonds = at->valence + at->num_H + bNegCharge; /* add fict. bonds to (-) 2004-02-24*/
1139
+ if ( nBondsValence == neutral_bonds_valence ) {
1140
+ if ( nNumBonds == neutral_valence ) {
1141
+ /* only single bonds: >N-, >NH, -NH2, -O-, -OH, >P- >PH -PH2 */
1142
+ /* >N(-), -NH(-), -O(-). >P(-) -PH(-) */
1143
+ if ( at->valence == neutral_valence || !nEndpointValence ) {
1144
+ /* >N-, -O-, any P(3 single bonds): >P- >PH -PH2 */
1145
+ *cChargeSubtype = C_SUBTYPE_NEUTRAL_NON_TAUT;
1146
+ } else
1147
+ if ( at->valence < neutral_valence /*&& nEndpointValence */ ) {
1148
+ /* num_H > 0: >NH -NH2 -OH */
1149
+ /* num_H = 0: none C_SUBTYPE_NEUTRAL_H_ACCEPT for now */
1150
+ *cChargeSubtype = at->num_H? C_SUBTYPE_NEUTRAL_H_DONOR: C_SUBTYPE_NEUTRAL_H_ACCEPT;
1151
+ } else {
1152
+ return 0;
1153
+ }
1154
+ return 1; /* charge type, neutral */
1155
+ }
1156
+ if ( nNumBonds == neutral_valence - 1 ) {
1157
+ /* possibly tautomeric acceptor =N-, =NH, =O or non-taut =P-, =PH */
1158
+ if ( nEndpointValence ) {
1159
+ /* =N-, =NH, =O */
1160
+ *cChargeSubtype = C_SUBTYPE_NEUTRAL_H_ACCEPT_p_ACCEPT;
1161
+ } else {
1162
+ /* =P-, =PH */
1163
+ *cChargeSubtype = C_SUBTYPE_NEUTRAL_NON_TAUT;
1164
+ }
1165
+ return 1; /* charge type, (+) => neutral */
1166
+ }
1167
+ }
1168
+ }
1169
+ return 0;
1170
+ }
1171
+ /********************************************************************************************************/
1172
+ int GetChargeType( inp_ATOM *atom, int iat, S_CHAR *cChargeSubtype )
1173
+ {
1174
+ int i, n;
1175
+ S_CHAR nEndpointValence;
1176
+ inp_ATOM *at = atom + iat;
1177
+
1178
+ *cChargeSubtype = 0;
1179
+ /* ignore ion pairs and charges != 1 */
1180
+ if ( abs(at->charge) == 1 ) {
1181
+ for ( i = 0; i < at->valence; i ++ ) {
1182
+ n = at->neighbor[i];
1183
+ /* allow negatively charged tautomeric neighbors 2004-02-26 */
1184
+ if ( abs(atom[n].charge + at->charge) < abs(atom[n].charge - at->charge) && !atom[n].endpoint ) {
1185
+ return -1; /* charges have different signs */
1186
+ }
1187
+ }
1188
+ } else
1189
+ if ( at->charge ) {
1190
+ return -1; /* abs(charge) != 1 */
1191
+ }
1192
+ /* find candidates */
1193
+ for ( i = 0; i < NUM_C_TYPES; i ++ ) {
1194
+ if ( !strcmp( at->elname, CType[i].elname ) &&
1195
+ (!CType[i].num_bonds || CType[i].num_bonds==at->valence && at->nNumAtInRingSystem >= 5) ) {
1196
+ nEndpointValence = (S_CHAR)get_endpoint_valence(at->el_number );
1197
+ if ( bCanBeACPoint( at, CType[i].charge, CType[i].cChangeValence, CType[i].neutral_bonds_valence,
1198
+ CType[i].neutral_valence, nEndpointValence, cChargeSubtype ) ) {
1199
+ return CType[i].cChargeType;
1200
+ }
1201
+ }
1202
+ }
1203
+ return -1;
1204
+ }
1205
+ /********************************************************************************************************/
1206
+ int CmpCCandidates( const void *a1, const void *a2 )
1207
+ {
1208
+ const C_CANDIDATE *c1 = (const C_CANDIDATE *)a1;
1209
+ const C_CANDIDATE *c2 = (const C_CANDIDATE *)a2;
1210
+ int ret;
1211
+ if ( ret = (int)c1->type - (int)c2->type )
1212
+ return ret;
1213
+ if ( ret = (int)c1->subtype - (int)c2->subtype )
1214
+ return ret;
1215
+ ret = (int)c1->atnumber - (int)c2->atnumber;
1216
+ return ret;
1217
+ }
1218
+ /********************************************************************************************************/
1219
+ int RegisterCPoints( C_GROUP *c_group, int *pnum_c, int max_num_c, T_GROUP_INFO *t_group_info,
1220
+ int point1, int point2, int ctype, inp_ATOM *at, int num_atoms )
1221
+ {
1222
+ int num_c = *pnum_c, i, i1, i2;
1223
+ AT_NUMB nGroupNumber = 0, nNewGroupNumber;
1224
+
1225
+
1226
+ if ( at[point1].c_point == at[point2].c_point ) {
1227
+ if ( at[point1].c_point )
1228
+ return 0;
1229
+ memset( c_group+num_c, 0, sizeof(c_group[0]) );
1230
+ if ( num_c < max_num_c ) {
1231
+ c_group[num_c].num[0] = CHARGED_CPOINT(at,point1) + CHARGED_CPOINT(at, point2);
1232
+ c_group[num_c].num_CPoints += 2;
1233
+ c_group[num_c].cGroupType = ctype;
1234
+ /* get next available c-group number */
1235
+ for ( i = 0; i < num_c; i ++ ) {
1236
+ if ( nGroupNumber < c_group[i].nGroupNumber )
1237
+ nGroupNumber = c_group[i].nGroupNumber;
1238
+ }
1239
+ nGroupNumber ++;
1240
+ c_group[num_c].nGroupNumber =
1241
+ at[point1].c_point =
1242
+ at[point2].c_point = nGroupNumber;
1243
+ *pnum_c = num_c+1;
1244
+ /* count protons */
1245
+ if ( at[point1].num_H ) {
1246
+ c_group[num_c].num[1] ++;
1247
+ } else
1248
+ if ( at[point2].num_H ) {
1249
+ c_group[num_c].num[1] ++;
1250
+ } else
1251
+ if ( (at[point1].endpoint || at[point2].endpoint) && t_group_info && t_group_info->t_group && t_group_info->num_t_groups ) {
1252
+ /* !!! add later !!! */
1253
+ }
1254
+
1255
+
1256
+ return 1;
1257
+ }
1258
+ return BNS_CPOINT_ERR; /* overflow */
1259
+ }
1260
+ if ( at[point1].c_point > at[point2].c_point ) {
1261
+ /* make sure at[point1].c_point < at[point2].c_point */
1262
+ i = point1;
1263
+ point1 = point2;
1264
+ point2 = i;
1265
+ }
1266
+ if ( !at[point1].c_point ) {
1267
+ /* add a new c-endpoint to an existing c-group */
1268
+ nGroupNumber = at[point2].c_point;
1269
+ for ( i = 0; i < num_c; i ++ ) {
1270
+ if ( nGroupNumber == c_group[i].nGroupNumber ) {
1271
+ at[point1].c_point = at[point2].c_point;
1272
+ c_group[i].num_CPoints ++;
1273
+ c_group[i].num[0] += CHARGED_CPOINT(at,point1);
1274
+ return 1;
1275
+ }
1276
+ }
1277
+ return BNS_CPOINT_ERR; /* program error: c-group not found */
1278
+ } else {
1279
+ /* merge two c-groups */
1280
+ nNewGroupNumber = at[point1].c_point;
1281
+ nGroupNumber = at[point2].c_point;
1282
+ for ( i = 0, i1=i2=-1; i < num_c && (i1 < 0 || i2 < 0); i ++ ) {
1283
+ if ( nNewGroupNumber == c_group[i].nGroupNumber ) {
1284
+ i1 = i;
1285
+ continue;
1286
+ }
1287
+ if ( nGroupNumber == c_group[i].nGroupNumber ) {
1288
+ i2 = i;
1289
+ continue;
1290
+ }
1291
+ }
1292
+ if ( i1 < 0 || i2 < 0 ) {
1293
+ return BNS_CPOINT_ERR; /* at least one not found */
1294
+ }
1295
+
1296
+ c_group[i1].num[0] += c_group[i2].num[0];
1297
+ c_group[i1].num_CPoints += c_group[i2].num_CPoints;
1298
+ num_c --;
1299
+ if ( num_c > i2 ) {
1300
+ memmove( c_group+i2, c_group+i2+1, ( num_c - i2)*sizeof(c_group[0]) );
1301
+ }
1302
+ *pnum_c = num_c;
1303
+ /* renumber c-groups */
1304
+ for ( i = 0; i < num_c; i ++ ) {
1305
+ if ( c_group[i].nGroupNumber > nGroupNumber ) {
1306
+ c_group[i].nGroupNumber --;
1307
+ }
1308
+ }
1309
+ /* renumber c-points */
1310
+ for ( i = 0; i < num_atoms; i ++ ) {
1311
+ if ( at[i].c_point > nGroupNumber ) {
1312
+ at[i].c_point --;
1313
+ } else
1314
+ if ( at[i].c_point == nGroupNumber ) {
1315
+ at[i].c_point = nNewGroupNumber;
1316
+ }
1317
+ }
1318
+ return 1;
1319
+ }
1320
+ }
1321
+
1322
+ /********************************************************************************************************/
1323
+ int MarkChargeGroups ( inp_ATOM *at, int num_atoms, C_GROUP_INFO *c_group_info, T_GROUP_INFO *t_group_info,
1324
+ struct BalancedNetworkStructure *pBNS, struct BalancedNetworkData *pBD )
1325
+ {
1326
+
1327
+ int nNumChanges = 0;
1328
+ if ( c_group_info && c_group_info->c_candidate && c_group_info->max_num_candidates > 0 ) {
1329
+ int i, i1, i2, i3, j, num_tested;
1330
+ C_CANDIDATE *c_candidate = c_group_info->c_candidate;
1331
+ int nMaxNumCandidates = c_group_info->max_num_candidates;
1332
+ int nNumCandidates = c_group_info->num_candidates;
1333
+ S_CHAR c_type, c_subtype;
1334
+ int iat1, iat2, ret, nDelta;
1335
+
1336
+ if ( nNumCandidates == -1 ) {
1337
+ nNumCandidates = 0; /* 2004-02-26 they could appear after t-group discovery */
1338
+ /*return 0;*/
1339
+ }
1340
+ if ( nNumCandidates == 0 ) {
1341
+ for ( i = 0, nNumCandidates = 0; i < num_atoms; i ++ ) {
1342
+ if ( 0 <= (c_type = GetChargeType( at, i, &c_subtype )) ) {
1343
+ if ( nNumCandidates >= nMaxNumCandidates ) {
1344
+ return BNS_VERT_EDGE_OVFL;
1345
+ }
1346
+ c_candidate[nNumCandidates].atnumber = i;
1347
+ c_candidate[nNumCandidates].type = c_type;
1348
+ c_candidate[nNumCandidates].subtype = c_subtype;
1349
+ nNumCandidates ++;
1350
+ }
1351
+ }
1352
+ if ( nNumCandidates <= 1 ) {
1353
+ c_group_info->num_candidates = -1; /* no candidate exists */
1354
+ return 0;
1355
+ }
1356
+ }
1357
+ /* sorting keys: (1) atom type (N,P); (2) uncharged=16/charged=0; (3) other;
1358
+ atom-charged-N .... i1
1359
+ ...
1360
+ atom-charged-N
1361
+ atom-neutral-N .... i2
1362
+ ...
1363
+ atom-neutral-N
1364
+ atom-charged-P .... i3 ... i1
1365
+ ...
1366
+ atom-charged-P
1367
+ atom-neutral-P ........... i2
1368
+ ...
1369
+ atom-neutral-P
1370
+ end. ........... i3
1371
+ */
1372
+ qsort( c_candidate, nNumCandidates, sizeof(c_candidate[0]), CmpCCandidates );
1373
+
1374
+ i1 = 0;
1375
+ num_tested = 0;
1376
+ nDelta = 0;
1377
+
1378
+ while ( i1 < nNumCandidates ) {
1379
+
1380
+ /* the the first charged candidate of a new atom type */
1381
+ for ( ; i1 < nNumCandidates && (c_candidate[i1].subtype & C_SUBTYPE_NEUTRAL); i1 ++ )
1382
+ ;
1383
+ if ( i1 == nNumCandidates )
1384
+ break; /* not found */
1385
+
1386
+ /* bypass other charged candidates of the same atom type */
1387
+ for ( i2 = i1+1; i2 < nNumCandidates &&
1388
+ c_candidate[i2].type == c_candidate[i1].type &&
1389
+ !(c_candidate[i2].subtype & C_SUBTYPE_NEUTRAL); i2++ )
1390
+ ;
1391
+ if ( i2 == nNumCandidates )
1392
+ break; /* no neutral candidates */
1393
+
1394
+ /* find next to the last neutral candidate of the same atom type */
1395
+ for ( i3 = i2; i3 < nNumCandidates &&
1396
+ c_candidate[i3].type == c_candidate[i1].type; i3 ++ )
1397
+ ;
1398
+
1399
+ if ( i3 == i2 ) {
1400
+ /* no neutral candidates found */
1401
+ if ( i2 < nNumCandidates ) {
1402
+ i1 = i3;
1403
+ continue; /* move to the next atom type */
1404
+ }
1405
+ break; /* nothing more to do */
1406
+ }
1407
+
1408
+ /* found charged candidates: i1...i2-1; neutral candidates: i2...i3-1 */
1409
+ for ( i = i1; i < i2; i ++ ) {
1410
+ iat1 = c_candidate[i].atnumber;
1411
+ for ( j = i2; j < i3; j ++ ) {
1412
+ /* check alt path at[iat1]=-=-...-at[iat2]; at[iat1] is charged, at[iat2] is neutral */
1413
+ num_tested ++;
1414
+ iat2 = c_candidate[j].atnumber;
1415
+ if ( at[iat1].c_point && at[iat1].c_point == at[iat2].c_point )
1416
+ continue;
1417
+ ret = bExistsAltPath( pBNS, pBD, NULL, at, num_atoms, iat1, iat2, ALT_PATH_MODE_CHARGE );
1418
+ if ( IS_BNS_ERROR( ret ) ) {
1419
+ return ret;
1420
+ }
1421
+ if ( ret & 1 ) {
1422
+ nDelta = (ret & ~3) >> 2;
1423
+ nNumChanges += (ret & 2);
1424
+ ret = RegisterCPoints( c_group_info->c_group, &c_group_info->num_c_groups,
1425
+ c_group_info->max_num_c_groups, t_group_info,
1426
+ iat1, iat2, c_candidate[i1].type, at, num_atoms );
1427
+ if ( IS_BNS_ERROR( ret ) ) {
1428
+ return ret;
1429
+ }
1430
+ if ( nDelta ) {
1431
+ goto quick_exit;
1432
+ }
1433
+
1434
+ }
1435
+ }
1436
+ }
1437
+ i1 = i3;
1438
+ }
1439
+ quick_exit:
1440
+ if ( c_group_info->num_candidates == 0 ) {
1441
+ /* first time: initialize */
1442
+ c_group_info->num_candidates = num_tested? nNumCandidates : -1; /* no candidate exists */
1443
+ }
1444
+
1445
+ }
1446
+ return nNumChanges;
1447
+ }
1448
+
1449
+ /********************************************************************************************************/
1450
+ int GetSaltChargeType( inp_ATOM *at, int at_no, T_GROUP_INFO *t_group_info, int *s_subtype )
1451
+ {
1452
+ static int el_number_C = 0;
1453
+ static int el_number_O = 0;
1454
+ static int el_number_S = 0;
1455
+ static int el_number_Se = 0;
1456
+ static int el_number_Te = 0;
1457
+
1458
+ /*
1459
+ type (returned value):
1460
+ -1 => ignore
1461
+ 0 => oxygen
1462
+ subtype:
1463
+ 1 = SALT_DONOR_H => has H
1464
+ 2 = SALT_DONOR_Neg => has (-) charge
1465
+ 4 = SALT_ACCEPTOR => may be an acceptor of H or (-), but not necessarily
1466
+
1467
+ O-atom should be:
1468
+ - a terminal atom
1469
+ - connected to unsaturated, uncharged, non-radical atom C that has chemical valence 4:
1470
+ H-donors: =CH-OH, =C(-X)-OH
1471
+ possible H-acceptors: -CH=O, >C=O
1472
+ H-acceptors are true if O is tautomeric
1473
+ */
1474
+ int iC, tg, i, type;
1475
+ /* one-time initialization */
1476
+ if ( !el_number_O ) {
1477
+ el_number_C = get_periodic_table_number( "C" );
1478
+ el_number_O = get_periodic_table_number( "O" );
1479
+ el_number_S = get_periodic_table_number( "S" );
1480
+ el_number_Se = get_periodic_table_number( "Se" );
1481
+ el_number_Te = get_periodic_table_number( "Te" );
1482
+ }
1483
+ *s_subtype = 0; /* initialize the output */
1484
+ /* check whether it is a candidate */
1485
+ if ( at[at_no].valence != 1 ||
1486
+ at[at_no].radical && at[at_no].radical != RADICAL_SINGLET ||
1487
+ at[at_no].charge < -1 ||
1488
+ at[at_no].charge > 0 && !at[at_no].c_point ) {
1489
+ return -1;
1490
+ }
1491
+
1492
+ if ( at[at_no].el_number == el_number_O ||
1493
+ at[at_no].el_number == el_number_S ||
1494
+ at[at_no].el_number == el_number_Se ||
1495
+ at[at_no].el_number == el_number_Te ) {
1496
+ type = 0; /* terminal oxygen atom, needs more to be checked... */
1497
+ } else {
1498
+ type = -1; /* ignore this atom */
1499
+ }
1500
+
1501
+ if ( type < 0 ||
1502
+ at[at_no].chem_bonds_valence + at[at_no].num_H !=
1503
+ get_el_valence(at[at_no].el_number, at[at_no].charge, 0) ) {
1504
+ return -1; /* non-standard valence or not an oxygen */
1505
+ }
1506
+
1507
+ iC = at[at_no].neighbor[0];
1508
+
1509
+ #if ( SALT_WITH_PROTONS == 1 )
1510
+ if ( at[iC].el_number != el_number_C ||
1511
+ at[iC].chem_bonds_valence + at[iC].num_H != 4 || /* allow =C(H)-OH or -C(H)=O */
1512
+ at[iC].charge ||
1513
+ at[iC].radical && at[iC].radical != RADICAL_SINGLET ||
1514
+ at[iC].valence == at[iC].chem_bonds_valence ) {
1515
+ return -1; /* oxigen is connected to a wrong atom */
1516
+ }
1517
+ #else
1518
+ if ( at[iC].el_number != el_number_C ||
1519
+ at[iC].num_H ||
1520
+ at[iC].chem_bonds_valence != 4 || /* allow only no H on C */
1521
+ at[iC].charge ||
1522
+ at[iC].radical && at[iC].radical != RADICAL_SINGLET ||
1523
+ at[iC].valence == at[iC].chem_bonds_valence ) {
1524
+ return -1; /* oxigen is connected to a wrong atom */
1525
+ }
1526
+ #endif
1527
+ if ( (tg = at[at_no].endpoint) && t_group_info && t_group_info->t_group ) {
1528
+ /* O-atom is in a tautomeric group */
1529
+ for ( i = 0; i < t_group_info->num_t_groups; i ++ ) {
1530
+ if ( tg == t_group_info->t_group[i].nGroupNumber ) {
1531
+ /*
1532
+ t_group_info->t_group[i].num[0] = number of attached H-atoms and negative charges
1533
+ t_group_info->t_group[i].num[1] = number of attached negative charges
1534
+ */
1535
+ if ( t_group_info->t_group[i].num[0] > t_group_info->t_group[i].num[1] ) {
1536
+ *s_subtype |= SALT_DONOR_H; /* has H */
1537
+ }
1538
+ if ( t_group_info->t_group[i].num[1] ) {
1539
+ *s_subtype |= SALT_DONOR_Neg; /* has (-) */
1540
+ }
1541
+ *s_subtype |= SALT_ACCEPTOR; /* there is always an acceptor in a t-group */
1542
+ return type;
1543
+ }
1544
+ }
1545
+ return -1; /* error: t-group not found */
1546
+ }
1547
+ /* O is not not in a tautomeric group */
1548
+ /* assume valence(O-) < valence(O) < valence(O+) */
1549
+ if ( at[at_no].charge == -1 ) {
1550
+ *s_subtype |= SALT_DONOR_Neg; /* has (-) */
1551
+ }
1552
+ if ( at[at_no].charge <= 0 && at[at_no].num_H ) {
1553
+ *s_subtype |= SALT_DONOR_H; /* has H */
1554
+ }
1555
+ if ( at[at_no].charge == 0 && at[at_no].chem_bonds_valence == 2 ) {
1556
+ *s_subtype |= SALT_ACCEPTOR;
1557
+ }
1558
+ /* since O cannot be a charge point, the following cannot happen: */
1559
+ if ( at[at_no].charge == 1 && at[at_no].c_point && at[at_no].chem_bonds_valence == 2 && at[at_no].num_H ) {
1560
+ *s_subtype |= SALT_DONOR_H; /* has H */
1561
+ }
1562
+ return type;
1563
+ }
1564
+ /********************************************************************************************************/
1565
+ int bDoNotMergeNonTautAtom(inp_ATOM *at, int at_no)
1566
+ {
1567
+ static int el_number_N = 0;
1568
+
1569
+ if ( !el_number_N ) {
1570
+ el_number_N = get_periodic_table_number( "N" );
1571
+ }
1572
+ if ( at[at_no].el_number == el_number_N )
1573
+ {
1574
+ return 1;
1575
+ }
1576
+ return 0;
1577
+ }
1578
+ /********************************************************************************************************/
1579
+ int GetOtherSaltChargeType( inp_ATOM *at, int at_no, T_GROUP_INFO *t_group_info, int *s_subtype, int bAccept_O )
1580
+ {
1581
+ /* static int el_number_C = 0; */
1582
+ /* static int el_number_N = 0; */
1583
+ static int el_number_O = 0;
1584
+ static int el_number_S = 0;
1585
+ static int el_number_Se = 0;
1586
+ static int el_number_Te = 0;
1587
+
1588
+ /*
1589
+ type (returned value):
1590
+ -1 => ignore
1591
+ 1 => not an oxygen
1592
+ subtype:
1593
+ 1 = SALT_DONOR_H => has H
1594
+ 2 = SALT_DONOR_Neg => has (-) charge
1595
+ 4 = SALT_ACCEPTOR => may be an acceptor of H or (-), but not necessarily
1596
+
1597
+ the atom should be:
1598
+ - a tautomeric endpoint atom
1599
+ - connected to possible centerpoint atom
1600
+
1601
+ another description of the atom searched here:
1602
+
1603
+ any possibly tautomeric atom adjacent to a possibly centerpoint
1604
+ that has at least one double bond (possibly if positively charged);
1605
+ if eif.cAcceptor then the bond between the atom and the centerpoint must be possibly double
1606
+ if eif.cAcceptor then the bond must be possibly single
1607
+ Donors that belong to a t-group are also acceptors
1608
+
1609
+
1610
+ */
1611
+ int tg, i, j, type, endpoint_valence, num_centerpoints, bond_type, centerpoint;
1612
+ ENDPOINT_INFO eif;
1613
+ /* one-time initialization */
1614
+ if ( !el_number_O && !bAccept_O ) {
1615
+ /* el_number_C = get_periodic_table_number( "C" ); */
1616
+ /* el_number_N = get_periodic_table_number( "N" ); */
1617
+ el_number_O = get_periodic_table_number( "O" );
1618
+ el_number_S = get_periodic_table_number( "S" );
1619
+ el_number_Se = get_periodic_table_number( "Se" );
1620
+ el_number_Te = get_periodic_table_number( "Te" );
1621
+ }
1622
+ *s_subtype = 0; /* initialize the output */
1623
+ if ( !bAccept_O /* only N */ &&
1624
+ (at[at_no].el_number == el_number_O ||
1625
+ at[at_no].el_number == el_number_S ||
1626
+ at[at_no].el_number == el_number_Se ||
1627
+ at[at_no].el_number == el_number_Te ) ) {
1628
+ return -1; /* we are not looking for oxygen here */
1629
+ }
1630
+
1631
+ type = 1;
1632
+ if ( !(endpoint_valence = nGetEndpointInfo( at, at_no, &eif )) ) {
1633
+ return -1; /* not a possible endpoint */
1634
+ } else {
1635
+ /* at[at_no] is not not in a tautomeric group; use eif previously filled out by nGetEndpointInfo */
1636
+ /* check whether there is adjacent atom-candidate for a centerpoint */
1637
+ num_centerpoints = 0;
1638
+ for ( j = 0; j < at[at_no].valence; j ++ ) {
1639
+ bond_type = (int)at[at_no].bond_type[j] & BOND_TYPE_MASK;
1640
+ centerpoint = (int)at[at_no].neighbor[j]; /* a centerpoint candidate */
1641
+ if ( ( eif.cAcceptor && (bond_type == BOND_DOUBLE ||
1642
+ bond_type == BOND_ALTERN || /* possibly double */
1643
+ bond_type == BOND_ALT12NS ||
1644
+ bond_type == BOND_TAUTOM ) ||
1645
+ eif.cDonor && (bond_type == BOND_SINGLE ||
1646
+ bond_type == BOND_ALTERN || /* possibly single */
1647
+ bond_type == BOND_ALT12NS ||
1648
+ bond_type == BOND_TAUTOM ) ) &&
1649
+ (at[centerpoint].chem_bonds_valence > at[centerpoint].valence ||
1650
+ /* check for possible endpoint added 2004-02-24 */
1651
+ at[centerpoint].chem_bonds_valence == at[centerpoint].valence &&
1652
+ (at[centerpoint].endpoint || at[centerpoint].c_point) /* tautomerism or charge may increment at[centerpoint].chem_bonds_valence*/ ) &&
1653
+ is_centerpoint_elem( at[centerpoint].el_number ) ) {
1654
+ num_centerpoints ++;
1655
+ break; /* at least one possibly centerpoint neighbor has been found */
1656
+ }
1657
+ }
1658
+ if ( !num_centerpoints ) {
1659
+ return -1;
1660
+ }
1661
+ /* moved here from just after "type = 1;" line 2004-02-26 */
1662
+ if ( (tg = at[at_no].endpoint) && t_group_info && t_group_info->t_group ) {
1663
+ /* atom is in a tautomeric group */
1664
+ for ( i = 0; i < t_group_info->num_t_groups; i ++ ) {
1665
+ if ( tg == t_group_info->t_group[i].nGroupNumber ) {
1666
+ /*
1667
+ t_group_info->t_group[i].num[0] = number of attached H-atoms and negative charges
1668
+ t_group_info->t_group[i].num[1] = number of attached negative charges
1669
+ */
1670
+ if ( t_group_info->t_group[i].num[0] > t_group_info->t_group[i].num[1] ) {
1671
+ *s_subtype |= SALT_DONOR_H; /* has H */
1672
+ }
1673
+ if ( t_group_info->t_group[i].num[1] ) {
1674
+ *s_subtype |= SALT_DONOR_Neg; /* has (-) */
1675
+ }
1676
+ *s_subtype |= SALT_ACCEPTOR; /* there is always an acceptor in a t-group */
1677
+ return type;
1678
+ }
1679
+ }
1680
+ return -1; /* error: t-group not found */
1681
+ }
1682
+
1683
+ if ( eif.cAcceptor ) {
1684
+ *s_subtype |= SALT_ACCEPTOR;
1685
+ }
1686
+ if ( eif.cDonor ) {
1687
+ if ( at[at_no].charge == -1 ) {
1688
+ *s_subtype |= SALT_DONOR_Neg; /* has (-) */
1689
+ }
1690
+ if ( at[at_no].num_H ) {
1691
+ *s_subtype |= SALT_DONOR_H; /* has H */
1692
+ }
1693
+ }
1694
+ }
1695
+ return type;
1696
+ }
1697
+ /********************************************************************************************************/
1698
+ int GetOtherSaltType( inp_ATOM *at, int at_no, int *s_subtype )
1699
+ {
1700
+ static int el_number_C = 0;
1701
+ /* static int el_number_N = 0; */
1702
+ /* static int el_number_O = 0; */
1703
+ static int el_number_S = 0;
1704
+ static int el_number_Se = 0;
1705
+ static int el_number_Te = 0;
1706
+
1707
+ /*
1708
+ type (returned value):
1709
+ -1 => ignore
1710
+ 2 => found: SH
1711
+ proton donor -CH2-SH, >CH-SH, >C< S(-)
1712
+ proton acceptor -CH2-S(-), >CH-S(-), >C<
1713
+ subtype:
1714
+ 1 = SALT_DONOR_H => has H
1715
+ 2 = SALT_DONOR_Neg => has (-) charge
1716
+ 4 = SALT_ACCEPTOR => may be an acceptor of H or (-), but not necessarily
1717
+
1718
+ non-O-atom should be:
1719
+ - a tautomeric endpoint atom
1720
+ - connected to possible middle point atom
1721
+ */
1722
+ int type, endpoint_valence, bond_type, centerpoint;
1723
+ ENDPOINT_INFO eif;
1724
+
1725
+ if ( at[at_no].valence != 1 || at[at_no].chem_bonds_valence != 1 ||
1726
+ 1 != (at[at_no].num_H==1) + (at[at_no].charge==-1) ) {
1727
+ return -1;
1728
+ }
1729
+ /* one-time initialization */
1730
+ if ( !el_number_S ) {
1731
+ el_number_C = get_periodic_table_number( "C" );
1732
+ /* el_number_N = get_periodic_table_number( "N" ); */
1733
+ /* el_number_O = get_periodic_table_number( "O" ); */
1734
+ el_number_S = get_periodic_table_number( "S" );
1735
+ el_number_Se = get_periodic_table_number( "Se" );
1736
+ el_number_Te = get_periodic_table_number( "Te" );
1737
+ }
1738
+ *s_subtype = 0; /* initialize the output */
1739
+ if ( !(at[at_no].el_number == el_number_S ||
1740
+ at[at_no].el_number == el_number_Se ||
1741
+ at[at_no].el_number == el_number_Te ) ) {
1742
+ return -1; /* we are not looking for oxygen here */
1743
+ }
1744
+
1745
+ type = 2; /* non-tautomeric p-donor or acceptor: C-SH, C-S(-) */
1746
+
1747
+ if ( !(endpoint_valence = nGetEndpointInfo( at, at_no, &eif )) ||
1748
+ eif.cMoveableCharge && !at[at_no].c_point || !eif.cDonor || eif.cAcceptor ) {
1749
+ return -1; /* not a possible -SH or -S(-) */
1750
+ } else {
1751
+ /* at[at_no] is not not in a tautomeric group; use eif previously filled out by nGetEndpointInfo */
1752
+ /* check whether there is adjacent atom-candidate for a centerpoint */
1753
+ centerpoint = (int)at[at_no].neighbor[0];
1754
+ bond_type = (int)at[at_no].bond_type[0] & BOND_TYPE_MASK;
1755
+ if ( at[centerpoint].el_number != el_number_C ||
1756
+ at[centerpoint].charge ||
1757
+ at[centerpoint].radical && at[centerpoint].radical != RADICAL_SINGLET ||
1758
+ at[centerpoint].valence != at[centerpoint].chem_bonds_valence ) {
1759
+ return -1; /* not a carbon with all single bonds */
1760
+ }
1761
+ if ( at[at_no].num_H == 1 ) {
1762
+ *s_subtype |= SALT_p_DONOR;
1763
+ } else
1764
+ if ( at[at_no].charge == -1 ) {
1765
+ *s_subtype |= SALT_p_ACCEPTOR;
1766
+ } else {
1767
+ return -1;
1768
+ }
1769
+ }
1770
+ return type;
1771
+ }
1772
+
1773
+ /********************************************************************************************************/
1774
+ /* new version: merge all, check alt paths, then unmerge unreachable O-atoms if any */
1775
+ /* Check for oxygen negative charge-H tautomerism (Salts)
1776
+ allowed long-range tautomerism; more than one H or (-) can be moved, for example:
1777
+ HO-C=C-O(-) O=C-C=O
1778
+ / \ / \
1779
+ R R R R
1780
+ | | => | |
1781
+ R' R' R' R'
1782
+ \ / \ /
1783
+ O=C-C=O HO-C=C-O(-)
1784
+
1785
+ To check:
1786
+
1787
+ | |
1788
+ -add all possible HO-C=, O=C, (-)O-C= (including all containing O t-groups) into one t-group;
1789
+ -temporarily disconnect one of previously not belonging to any t-group O-atoms from the one t-group;
1790
+ -find whether there is an alt path allowing H or (-) to migrate
1791
+ from the temp. disconnected O to any one left in the group.
1792
+ If the alt path does not exist then the temp. disconnected atom does not
1793
+ participate in the H/(-) migrartion and it will be unmarked/unmerged.
1794
+
1795
+ */
1796
+ /********************************************************************************************************/
1797
+ int comp_candidates( const void *a1, const void *a2 )
1798
+ {
1799
+ const S_CANDIDATE *s1 = (const S_CANDIDATE *)a1;
1800
+ const S_CANDIDATE *s2 = (const S_CANDIDATE *)a2;
1801
+ int ret;
1802
+ if ( s1->type >= 0 /* enabled < */ && s2->type < 0 /* disabled */ )
1803
+ return -1; /* enabled goes first */
1804
+ if ( s1->type < 0 /* disabled > */ && s2->type >= 0 /* enabled */ )
1805
+ return 1;
1806
+ if ( s1->endpoint && !s2->endpoint )
1807
+ return -1; /* tautomeric goes first; only tautomeric may be disabled */
1808
+ if ( !s1->endpoint && s2->endpoint )
1809
+ return 1; /* tautomeric goes first; only tautomeric may be disabled */
1810
+ if ( s1->endpoint && s2->endpoint && (ret = (int)s1->endpoint - (int)s2->endpoint) ) {
1811
+ return ret;
1812
+ }
1813
+ return (int)s1->atnumber - (int)s2->atnumber;
1814
+ }
1815
+ /********************************************************************************************************/
1816
+ int MarkSaltChargeGroups2 ( inp_ATOM *at, int num_atoms, S_GROUP_INFO *s_group_info,
1817
+ T_GROUP_INFO *t_group_info, C_GROUP_INFO *c_group_info,
1818
+ struct BalancedNetworkStructure *pBNS, struct BalancedNetworkData *pBD )
1819
+ {
1820
+ /* BNS_EDGE_FORBIDDEN_TEMP */
1821
+ #define ALT_PATH_FOUND (MAX_ATOMS+1)
1822
+ #define NO_ENDPOINT (MAX_ATOMS+2) /* the two defines must be different */
1823
+ #define DISABLE_CANDIDATE 10
1824
+ #define cPAIR(a,b) cPair[a+b*nNumLeftCandidates]
1825
+ #define ACCEPTOR_PAIR 1
1826
+ #define DONOR_PAIR 2
1827
+
1828
+ int nNumChanges = 0, nNumOtherChanges = 0, nNumAcidicChanges = 0, nTotNumChanges = 0;
1829
+ S_CHAR *cPair = NULL;
1830
+ T_ENDPOINT *EndPoint = NULL;
1831
+ if ( s_group_info && s_group_info->s_candidate && s_group_info->max_num_candidates > 0 ) {
1832
+ int i, j, i1, j1;
1833
+ S_CANDIDATE *s_candidate = s_group_info->s_candidate;
1834
+ int nMaxNumCandidates = s_group_info->max_num_candidates;
1835
+ int nNumCandidates = s_group_info->num_candidates;
1836
+ int nNumOtherCandidates = s_group_info->num_other_candidates;
1837
+ int nNumPOnlyCandidates = s_group_info->num_p_only_candidates;
1838
+ int nNumLeftCandidates = 0;
1839
+ int nNumMarkedCandidates = 0;
1840
+ int s_type, s_subtype;
1841
+ int ret, nDelta;
1842
+ int bHardAddedRemovedProtons = t_group_info && (t_group_info->tni.bNormalizationFlags & FLAG_FORCE_SALT_TAUT);
1843
+
1844
+ int s_subtype_all = 0;
1845
+ int nDonorPairs, nAcceptorPairs, nCurDonorPairs, nCurAcceptorPairs, bAlreadyTested;
1846
+ /*
1847
+ ENDPOINT_INFO eif;
1848
+ */
1849
+
1850
+ #if( IGNORE_TGROUP_WITHOUT_H == 1 )
1851
+ int bTGroupHasNegativeChargesOnly = 1;
1852
+ #endif
1853
+ /*return 0;*/ /* debug only */
1854
+
1855
+ i1 = -1;
1856
+
1857
+ if ( nNumCandidates <= -2 || !t_group_info || !t_group_info->t_group ) {
1858
+ return 0;
1859
+ }
1860
+ /*************************************************************************/
1861
+ /* find all candidates including those with differen s_type (other type) */
1862
+ /*************************************************************************/
1863
+ for ( i = 0, nNumCandidates = nNumOtherCandidates = nNumPOnlyCandidates = 0; i < num_atoms; i ++ ) {
1864
+ if ( 0 == (s_type = GetSaltChargeType( at, i, t_group_info, &s_subtype )) ||
1865
+ /* -C=O or =C-OH, O = S, Se, Te */
1866
+ 1 == (s_type = GetOtherSaltChargeType( at, i, t_group_info, &s_subtype, 1/* bAccept_O*/ )) ||
1867
+ /* =Z-MH or -Z=M, Z = centerpoint, M = endpoint, other than above */
1868
+ 2 == (s_type = GetOtherSaltType( at, i, &s_subtype ) ) ||
1869
+ ( bHardAddedRemovedProtons && 4 == (s_type = bIsHardRemHCandidate( at, i, &s_subtype ) ) )
1870
+ /* >C-SH, >C-S(-); S=S,Se,Te */
1871
+ ) {
1872
+
1873
+ if ( nNumCandidates >= nMaxNumCandidates ) {
1874
+ return BNS_VERT_EDGE_OVFL;
1875
+ }
1876
+ s_candidate[nNumCandidates].atnumber = i;
1877
+ s_candidate[nNumCandidates].type = s_type;
1878
+ s_candidate[nNumCandidates].subtype = s_subtype;
1879
+ s_candidate[nNumCandidates].endpoint = at[i].endpoint;
1880
+ nNumCandidates ++;
1881
+ nNumOtherCandidates += (1 == s_type);
1882
+ s_subtype_all |= s_subtype;
1883
+ i1 = i; /* save a representative of a tautomeric group */
1884
+ }
1885
+ }
1886
+
1887
+ if ( nNumCandidates <= 1 || /* TG_FLAG_ALLOW_NO_NEGTV_O <=> CHARGED_SALTS_ONLY=0 */
1888
+ !(s_subtype_all & SALT_ACCEPTOR) ||
1889
+ (((t_group_info->bTautFlags & TG_FLAG_ALLOW_NO_NEGTV_O) ||
1890
+ (t_group_info->bTautFlagsDone & TG_FLAG_FOUND_SALT_CHARGES_DONE) ||
1891
+ (t_group_info->tni.bNormalizationFlags & FLAG_FORCE_SALT_TAUT)) ?
1892
+ !(s_subtype_all & (SALT_DONOR)):
1893
+ (!(s_subtype_all & SALT_DONOR_Neg) || nNumOtherCandidates == nNumCandidates ))
1894
+ ) {
1895
+ s_group_info->num_candidates = 0; /* no candidate exists */
1896
+ return 0;
1897
+ }
1898
+ if ( !(s_subtype_all & (SALT_DONOR_Neg) ) ) {
1899
+ t_group_info->bTautFlagsDone |= TG_FLAG_ALLOW_NO_NEGTV_O_DONE;
1900
+ }
1901
+
1902
+ /************************************************************************************/
1903
+ /* Mark redundant candidates so that only one candidate from one t-group is left in */
1904
+ /************************************************************************************/
1905
+ for ( i = 0; i < nNumCandidates; i ++ ) {
1906
+ if ( 2 == s_candidate[nNumCandidates].type ) {
1907
+ s_candidate[i].type -= DISABLE_CANDIDATE; /* disable >C-SH candidates */
1908
+ nNumLeftCandidates ++; /* count rejected */
1909
+ continue;
1910
+ }
1911
+ if ( s_candidate[i].endpoint ) {
1912
+ for ( j = i-1; 0 <= j; j -- ) {
1913
+ if ( s_candidate[i].endpoint == s_candidate[j].endpoint ) {
1914
+ s_candidate[i].type -= DISABLE_CANDIDATE; /* disable subsequent redundant */
1915
+ nNumLeftCandidates ++; /* count rejected */
1916
+ break;
1917
+ }
1918
+ }
1919
+ }
1920
+ }
1921
+ nNumLeftCandidates = nNumCandidates - nNumLeftCandidates; /* subtract num. rejected from the total */
1922
+ s_group_info->num_candidates = 0; /* reinit next time */
1923
+ /*********************************************************************/
1924
+ /* reorder so that all disabled are at the end, tautomeric are first */
1925
+ /*********************************************************************/
1926
+ qsort ( s_candidate, nNumCandidates, sizeof(s_candidate[0]), comp_candidates );
1927
+ cPair = (S_CHAR *)inchi_calloc( nNumLeftCandidates*nNumLeftCandidates, sizeof(cPair[0]) );
1928
+ if ( !cPair ) {
1929
+ /*printf("BNS_OUT_OF_RAM-6\n");*/
1930
+ nTotNumChanges = BNS_OUT_OF_RAM;
1931
+ goto quick_exit;
1932
+ }
1933
+ nDonorPairs = nAcceptorPairs = 0;
1934
+ /**********************************************************************/
1935
+ /* Find whether we have at least one donor pair and one acceptor pair */
1936
+ /**********************************************************************/
1937
+ for ( i = 0; i < nNumLeftCandidates; i ++ ) {
1938
+ nCurDonorPairs = nCurAcceptorPairs = 0;
1939
+ for ( j = 0; j <= i; j ++ ) {
1940
+ if ( i == j && !s_candidate[i].endpoint ) {
1941
+ continue; /* same non-taut atom. However, success for i==j means *
1942
+ * that the whole tautomeric group may donate or accept 2H */
1943
+ }
1944
+ /* check for acceptor pair */
1945
+ if ( (s_candidate[i].subtype & SALT_ACCEPTOR) && (s_candidate[j].subtype & SALT_ACCEPTOR) &&
1946
+ (ret = bExistsAltPath( pBNS, pBD, NULL, at, num_atoms, s_candidate[i].atnumber,
1947
+ s_candidate[j].atnumber, ALT_PATH_MODE_ADD2H_TST ))) {
1948
+ if ( IS_BNS_ERROR( ret ) ) {
1949
+ nTotNumChanges = ret;
1950
+ goto quick_exit;
1951
+ }
1952
+ if ( ret & 1 ) {
1953
+ nDelta = (ret & ~3) >> 2;
1954
+ /*nNumChanges += (ret & 2);*/
1955
+ if ( nDelta ) {
1956
+ /* alt path unleashed previously localized radicals and they annihilated */
1957
+ nNumChanges = 0;
1958
+ nTotNumChanges = BNS_RADICAL_ERR;
1959
+ goto quick_exit;
1960
+ }
1961
+ cPAIR(i,j) |= ACCEPTOR_PAIR; /* the result: mark the pair */
1962
+ /*cPAIR(j,i) |= ACCEPTOR_PAIR;*/
1963
+ }
1964
+ }
1965
+ /* check for donor pair */
1966
+ if ( (s_candidate[i].subtype & SALT_DONOR) && (s_candidate[j].subtype & SALT_DONOR) &&
1967
+ (ret = bExistsAltPath( pBNS, pBD, NULL, at, num_atoms, s_candidate[i].atnumber,
1968
+ s_candidate[j].atnumber, ALT_PATH_MODE_REM2H_TST ))) {
1969
+ if ( IS_BNS_ERROR( ret ) ) {
1970
+ nTotNumChanges = ret;
1971
+ goto quick_exit;
1972
+ }
1973
+ if ( ret & 1 ) {
1974
+ nDelta = (ret & ~3) >> 2;
1975
+ /*nNumChanges += (ret & 2);*/
1976
+ if ( nDelta ) {
1977
+ /* alt path unleashed previously localized radicals and they annihilated */
1978
+ nNumChanges = 0;
1979
+ nTotNumChanges = BNS_RADICAL_ERR;
1980
+ goto quick_exit;
1981
+ }
1982
+ cPAIR(i,j) |= DONOR_PAIR; /* the result: mark the pair */
1983
+ /*cPAIR(j,i) |= ACCEPTOR_PAIR;*/
1984
+ }
1985
+ }
1986
+ /* since the results will be used later to change bonds, check only now */
1987
+ /* when both results for (i,j) have been obtained. */
1988
+ if ( cPAIR(i,j) & ACCEPTOR_PAIR ) {
1989
+ nCurAcceptorPairs ++;
1990
+ if ( nDonorPairs ) {
1991
+ /* find donor pair (i1,j1) such that i!=i1, i!=j1, j!=i1, j!=j1 */
1992
+ for ( i1 = 0; i1 < i; i1 ++ ) {
1993
+ for ( j1 = 0; j1 <= i1; j1 ++ ) {
1994
+ /* here always j1 < i && i1 < i therefore we do not compare i to i1 or j1 */
1995
+ if ( j1 != j && i1 != j && (cPAIR(i1,j1) & DONOR_PAIR) ) {
1996
+ /* both the donor and the acceptor pairs have been found */
1997
+ goto bFound2Pairs;
1998
+ }
1999
+ }
2000
+ }
2001
+ }
2002
+ }
2003
+ if ( cPAIR(i,j) & DONOR_PAIR ) {
2004
+ nCurDonorPairs ++;
2005
+ if ( nAcceptorPairs ) {
2006
+ /* find acceptor pair (i1,j1) such that i!=i1, i!=j1, j!=i1, j!=j1 */
2007
+ for ( i1 = 0; i1 < i; i1 ++ ) {
2008
+ for ( j1 = 0; j1 <= i1; j1 ++ ) {
2009
+ /* here always j1 < i && i1 < i therefore we do not compare i to i1 or j1 */
2010
+ if ( j1 != j && i1 != j && (cPAIR(i1,j1) & ACCEPTOR_PAIR) ) {
2011
+ /* both the donor and the acceptor pairs have been found */
2012
+ goto bFound2Pairs;
2013
+ }
2014
+ }
2015
+ }
2016
+ }
2017
+ }
2018
+ }
2019
+ nDonorPairs += nCurDonorPairs;
2020
+ nAcceptorPairs += nCurAcceptorPairs;
2021
+ }
2022
+ /* nothing has been found */
2023
+ nNumChanges = 0;
2024
+ inchi_free( cPair );
2025
+ cPair = NULL;
2026
+ goto quick_exit;
2027
+
2028
+
2029
+ /* both the donor and the acceptor pairs have been found */
2030
+ bFound2Pairs:
2031
+ /* first, try already found pairs */
2032
+ i1 = i;
2033
+ j1 = j;
2034
+
2035
+ /* Find all possible donor and acceptor pairs */
2036
+ nNumMarkedCandidates = 0;
2037
+ for ( i = 0; i < nNumLeftCandidates; i ++ ) {
2038
+ nCurDonorPairs = nCurAcceptorPairs = 0;
2039
+ for ( j = 0; j <= i; j ++ ) {
2040
+ bAlreadyTested = (i < i1 || i == i1 && j <= j1);
2041
+ if ( bAlreadyTested && (cPAIR(i,j) & ACCEPTOR_PAIR) || !bAlreadyTested ) {
2042
+ /* checking for acceptor pair */
2043
+ if ( (s_candidate[i].subtype & SALT_ACCEPTOR) && (s_candidate[j].subtype & SALT_ACCEPTOR) &&
2044
+ (ret = bExistsAltPath( pBNS, pBD, NULL, at, num_atoms, s_candidate[i].atnumber,
2045
+ s_candidate[j].atnumber, ALT_PATH_MODE_ADD2H_CHG ))) {
2046
+ if ( IS_BNS_ERROR( ret ) ) {
2047
+ nTotNumChanges = ret;
2048
+ goto quick_exit;
2049
+ }
2050
+ if ( ret & 1 ) {
2051
+ nDelta = (ret & ~3) >> 2;
2052
+ nNumChanges += (ret & 2);
2053
+ if ( nDelta ) {
2054
+ /* alt path unleashed previously localized radicals and they annihilated */
2055
+ nNumChanges = 0;
2056
+ nTotNumChanges = BNS_RADICAL_ERR;
2057
+ goto quick_exit;
2058
+ }
2059
+ cPAIR(i,j) |= ACCEPTOR_PAIR;
2060
+ /*cPAIR(j,i) |= ACCEPTOR_PAIR;*/
2061
+ nCurAcceptorPairs += !bAlreadyTested;
2062
+ if ( !(s_candidate[i].subtype & SALT_SELECTED) ) {
2063
+ s_candidate[i].subtype |= SALT_SELECTED;
2064
+ nNumMarkedCandidates ++;
2065
+ if ( !s_candidate[i].endpoint && s_candidate[i].type ) {
2066
+ nNumOtherChanges ++;
2067
+ } else {
2068
+ nNumAcidicChanges ++;
2069
+ }
2070
+ }
2071
+ if ( !(s_candidate[j].subtype & SALT_SELECTED) ) {
2072
+ s_candidate[j].subtype |= SALT_SELECTED;
2073
+ nNumMarkedCandidates ++;
2074
+ if ( !s_candidate[j].endpoint && s_candidate[j].type ) {
2075
+ nNumOtherChanges ++;
2076
+ } else {
2077
+ nNumAcidicChanges ++;
2078
+ }
2079
+ }
2080
+ }
2081
+ }
2082
+ }
2083
+ if ( bAlreadyTested && (cPAIR(i,j) & DONOR_PAIR) || !bAlreadyTested ) {
2084
+ /* checking for donor pair */
2085
+ if ( (s_candidate[i].subtype & SALT_DONOR) && (s_candidate[j].subtype & SALT_DONOR) &&
2086
+ (ret = bExistsAltPath( pBNS, pBD, NULL, at, num_atoms, s_candidate[i].atnumber,
2087
+ s_candidate[j].atnumber, ALT_PATH_MODE_REM2H_CHG ))) {
2088
+ if ( IS_BNS_ERROR( ret ) ) {
2089
+ nTotNumChanges = ret;
2090
+ goto quick_exit;
2091
+ }
2092
+ if ( ret & 1 ) {
2093
+ nDelta = (ret & ~3) >> 2;
2094
+ nNumChanges += (ret & 2);
2095
+ if ( nDelta ) {
2096
+ /* alt path unleashed previously localized radicals and they annihilated */
2097
+ nNumChanges = 0;
2098
+ nTotNumChanges = BNS_RADICAL_ERR;
2099
+ goto quick_exit;
2100
+ }
2101
+ cPAIR(i,j) |= DONOR_PAIR;
2102
+ /*cPAIR(j,i) |= ACCEPTOR_PAIR;*/
2103
+ nCurDonorPairs += !bAlreadyTested;
2104
+ if ( !(s_candidate[i].subtype & SALT_SELECTED) ) {
2105
+ s_candidate[i].subtype |= SALT_SELECTED;
2106
+ nNumMarkedCandidates ++;
2107
+ if ( !s_candidate[i].endpoint && s_candidate[i].type ) {
2108
+ nNumOtherChanges ++;
2109
+ } else {
2110
+ nNumAcidicChanges ++;
2111
+ }
2112
+ }
2113
+ if ( !(s_candidate[j].subtype & SALT_SELECTED) ) {
2114
+ s_candidate[j].subtype |= SALT_SELECTED;
2115
+ nNumMarkedCandidates ++;
2116
+ if ( !s_candidate[j].endpoint && s_candidate[j].type ) {
2117
+ nNumOtherChanges ++;
2118
+ } else {
2119
+ nNumAcidicChanges ++;
2120
+ }
2121
+ }
2122
+ }
2123
+ }
2124
+ }
2125
+ }
2126
+ nDonorPairs += nCurDonorPairs;
2127
+ nAcceptorPairs += nCurAcceptorPairs;
2128
+ }
2129
+ inchi_free( cPair );
2130
+ cPair = NULL;
2131
+
2132
+ if ( nNumMarkedCandidates ) {
2133
+ EndPoint = (T_ENDPOINT *)inchi_calloc( nNumMarkedCandidates, sizeof(EndPoint[0]));
2134
+ if ( !EndPoint ) {
2135
+ /*printf("BNS_OUT_OF_RAM-7\n");*/
2136
+ nTotNumChanges = BNS_OUT_OF_RAM;
2137
+ goto quick_exit;
2138
+ }
2139
+ for ( i = 0, j = 0; i < nNumLeftCandidates; i ++ ) {
2140
+ if ( s_candidate[i].subtype & SALT_SELECTED ) {
2141
+ s_candidate[i].subtype ^= SALT_SELECTED; /* remove the flag */
2142
+ if ( j < nNumMarkedCandidates ) {
2143
+ i1 = s_candidate[i].atnumber; /* save a representative of the t-group to be created */
2144
+ AddEndPoint( EndPoint+j, at, i1 );
2145
+ }
2146
+ j ++;
2147
+ }
2148
+ }
2149
+ if ( j != nNumMarkedCandidates ) {
2150
+ nTotNumChanges = BNS_PROGRAM_ERR;
2151
+ goto quick_exit;
2152
+ }
2153
+ /* merge all marked atoms and their t-groups into one t-group */
2154
+ ret = RegisterEndPoints( t_group_info, EndPoint, nNumMarkedCandidates, at, num_atoms, c_group_info, pBNS );
2155
+ if ( ret == -1 ) {
2156
+ ret = BNS_PROGRAM_ERR;
2157
+ }
2158
+ if ( ret < 0 ) {
2159
+ nTotNumChanges = ret;
2160
+ goto quick_exit;
2161
+ }
2162
+ nTotNumChanges += (ret > 0);
2163
+ inchi_free( EndPoint );
2164
+ EndPoint = NULL;
2165
+
2166
+ if ( nNumMarkedCandidates ) {
2167
+ for ( i = nNumLeftCandidates; i < nNumCandidates; i ++ ) {
2168
+ s_candidate[i].type += DISABLE_CANDIDATE;
2169
+ j1 = s_candidate[i].atnumber;
2170
+ if ( at[j1].endpoint == at[i1].endpoint ) {
2171
+ if ( !s_candidate[i].endpoint && s_candidate[i].type ) {
2172
+ nNumOtherChanges ++;
2173
+ } else {
2174
+ nNumAcidicChanges ++;
2175
+ }
2176
+ }
2177
+ }
2178
+ } else {
2179
+ for ( i = nNumLeftCandidates; i < nNumCandidates; i ++ ) {
2180
+ s_candidate[i].type += DISABLE_CANDIDATE;
2181
+ }
2182
+ }
2183
+
2184
+ /* find whether the new t-group have any movable H */
2185
+ for ( i = 0, bTGroupHasNegativeChargesOnly = 0; i < t_group_info->num_t_groups; i ++ ) {
2186
+ if ( t_group_info->t_group[i].nGroupNumber == at[i1].endpoint &&
2187
+ t_group_info->t_group[i].num[0] == t_group_info->t_group[i].num[1] ) {
2188
+ bTGroupHasNegativeChargesOnly = 1;
2189
+ break;
2190
+ }
2191
+ }
2192
+ }
2193
+ nTotNumChanges = ( nTotNumChanges > 0);
2194
+
2195
+ #if( IGNORE_TGROUP_WITHOUT_H == 1 )
2196
+ if ( nTotNumChanges && bTGroupHasNegativeChargesOnly ) {
2197
+ nTotNumChanges = 2; /* means no moveable H has been affected */
2198
+ }
2199
+ #endif
2200
+ }
2201
+
2202
+ quick_exit:
2203
+ if ( nNumOtherChanges && nTotNumChanges == 1 ) {
2204
+ nTotNumChanges = 5; /* not only acidic atoms merged */
2205
+ }
2206
+ if ( cPair ) {
2207
+ inchi_free( cPair );
2208
+ /*cPair = NULL;*/
2209
+ }
2210
+ if ( EndPoint ) {
2211
+ inchi_free ( EndPoint );
2212
+ /*EndPoint = NULL;*/
2213
+ }
2214
+ return nTotNumChanges; /* 0=>no changes, 1=>new salt tautomerism found, 2=>only new charge tautomerism found */
2215
+ #undef ALT_PATH_FOUND
2216
+ #undef NO_ENDPOINT
2217
+ }
2218
+ /********************************************************************************************************/
2219
+ /* regular one-path version: find alt paths then merge */
2220
+ /* Check for oxygen negative charge-H tautomerism (Salts)
2221
+ allowed long-range tautomerism; only one H or (-) can be moved, for example:
2222
+ HO-C=X-Y=Z-...-C=O => O=C-X=Y-Z=...=C-OH
2223
+ */
2224
+
2225
+ #if ( SALT_WITH_PROTONS == 1 )
2226
+
2227
+ #define MAX_LOCAL_TGNUM 0 /* was 32; disable since it has not been used */
2228
+
2229
+ #if ( MAX_LOCAL_TGNUM > 0 )
2230
+ typedef struct tagTGroupData {
2231
+ S_SHORT nGroupNumber; /* t-group number from t_group_info->t_group->nGroupNumber */
2232
+ S_SHORT nGroupIndex; /* TGroupData[nGroupNumber]nGroupIndex = index of t_group in t_group_info */
2233
+ S_SHORT nDonorM; /* number of endpoint-donors that have negative charge (Minus) */
2234
+ S_SHORT nDonorH; /* number of endpoint-donors that have only H */
2235
+ S_SHORT nAccepM; /* number of endpoint-acceptors that have negative charge (Minus) */
2236
+ S_SHORT nAccepH; /* number of endpoint-acceptors that have H and no negative charge */
2237
+ S_SHORT nAccep0; /* number of endpoint-acceptors that have no H and no negative charge */
2238
+ S_SHORT nDonorA; /* number of acidic endpoint-donors */
2239
+ S_SHORT nAccepS; /* number of acidic endpoint-acceptors */
2240
+ } TGroupData;
2241
+ #endif
2242
+ /********************************************************************************************************/
2243
+ int MarkSaltChargeGroups ( inp_ATOM *at, int num_atoms, S_GROUP_INFO *s_group_info,
2244
+ T_GROUP_INFO *t_group_info, C_GROUP_INFO *c_group_info,
2245
+ struct BalancedNetworkStructure *pBNS, struct BalancedNetworkData *pBD )
2246
+ {
2247
+
2248
+ int nNumChanges = 0, nTotNumChanges = 0;
2249
+ if ( s_group_info && s_group_info->s_candidate && s_group_info->max_num_candidates > 0 ) {
2250
+ int i, i1, i2, j, j1, j2, jj, ii1, ii2, jj1, jj2, /*k,*/ num_tested;
2251
+ S_CANDIDATE *s_candidate = s_group_info->s_candidate;
2252
+ int nMaxNumCandidates = s_group_info->max_num_candidates;
2253
+ int nNumCandidates = s_group_info->num_candidates;
2254
+ int nNumOtherCandidates = s_group_info->num_other_candidates;
2255
+ int nNumPOnlyCandidates = s_group_info->num_p_only_candidates;
2256
+ int s_type, s_subtype;
2257
+ int ret, nDelta, /*nMobile,*/ err = 0;
2258
+ int s_subtype_all = 0;
2259
+ int nGroupNumber;
2260
+ T_ENDPOINT EndPoint[2];
2261
+ #if ( MAX_LOCAL_TGNUM > 0 )
2262
+ TGroupData tgData[MAX_LOCAL_TGNUM];
2263
+ TGroupData *ptgData = tgData;
2264
+ #endif
2265
+ if ( nNumCandidates <= -1 || !t_group_info || !t_group_info->t_group ) {
2266
+ return 0;
2267
+ }
2268
+
2269
+ /* count t-groups */
2270
+ for ( i = 0, nGroupNumber = 0; i < t_group_info->num_t_groups; i ++ ) {
2271
+ if ( nGroupNumber < t_group_info->t_group[i].nGroupNumber ) {
2272
+ nGroupNumber = t_group_info->t_group[i].nGroupNumber; /* max. t-group number */
2273
+ }
2274
+ }
2275
+ #if ( MAX_LOCAL_TGNUM > 0 )
2276
+ /* prepare memory */
2277
+ if ( nGroupNumber >= MAX_LOCAL_TGNUM ) {
2278
+ if ( !( ptgData = (TGroupData*)inchi_calloc( nGroupNumber+1, sizeof(TGroupData) ) ) ) {
2279
+ err = BNS_OUT_OF_RAM;
2280
+ goto quick_exit;
2281
+ }
2282
+ } else {
2283
+ memset( ptgData, 0, sizeof(tgData) );
2284
+ }
2285
+ ptgData[0].nGroupIndex = -1; /* data for non-tautomeric atoms */
2286
+ for ( i = 0, nGroupNumber = 0; i < t_group_info->num_t_groups; i ++ ) {
2287
+ if ( nGroupNumber = t_group_info->t_group[i].nGroupNumber ) {
2288
+ ptgData[nGroupNumber].nGroupIndex = i;
2289
+ ptgData[i].nGroupNumber = nGroupNumber;
2290
+ }
2291
+ }
2292
+ #endif
2293
+ nNumCandidates = 0; /* always recalculate 2004-03-22 */
2294
+
2295
+ if ( nNumCandidates == 0 ) {
2296
+ for ( i = 0, nNumCandidates = nNumOtherCandidates = nNumPOnlyCandidates = 0; i < num_atoms; i ++ ) {
2297
+ if ( 0 == (s_type = GetSaltChargeType( at, i, t_group_info, &s_subtype )) ||
2298
+ /* -C=O or =C-OH, O = S, Se, Te */
2299
+ #if( INCL_NON_SALT_CANDIDATATES == 1 )
2300
+ 1 == (s_type = GetOtherSaltChargeType( at, i, t_group_info, &s_subtype, 1 )) ||
2301
+ /* =Z-MH or -Z=M, Z = centerpoint, M = endpoint, other than above */
2302
+ #endif
2303
+ 2 == (s_type = GetOtherSaltType( at, i, &s_subtype ) )
2304
+ /* >C-SH, >C-S(-); S=S,Se,Te */
2305
+ ) {
2306
+
2307
+ if ( nNumCandidates >= nMaxNumCandidates ) {
2308
+ err = BNS_VERT_EDGE_OVFL;
2309
+ goto quick_exit;
2310
+ }
2311
+ s_candidate[nNumCandidates].atnumber = i;
2312
+ s_candidate[nNumCandidates].type = s_type;
2313
+ s_candidate[nNumCandidates].subtype = s_subtype;
2314
+ s_candidate[nNumCandidates].endpoint = at[i].endpoint;
2315
+ nNumCandidates ++;
2316
+ nNumOtherCandidates += (1 == s_type);
2317
+ nNumPOnlyCandidates += (2 == s_type);
2318
+ s_subtype_all |= s_subtype;
2319
+ /*i1 = i;*/ /* save a representative of a tautomeric group */
2320
+ }
2321
+ }
2322
+
2323
+ /* changes: TG_FLAG_ALLOW_NO_NEGTV_O replaced CHARGED_SALTS_ONLY==0 */
2324
+ if ( nNumCandidates <= 1 ||
2325
+ !(s_subtype_all & SALT_ACCEPTOR) ||
2326
+ (((t_group_info->bTautFlags & TG_FLAG_ALLOW_NO_NEGTV_O)||
2327
+ (t_group_info->bTautFlagsDone & TG_FLAG_FOUND_SALT_CHARGES_DONE) ||
2328
+ (t_group_info->tni.bNormalizationFlags & FLAG_FORCE_SALT_TAUT)) ?
2329
+ !(s_subtype_all & (SALT_DONOR_Neg | SALT_DONOR_H)):
2330
+ (!(s_subtype_all & SALT_DONOR_Neg) || nNumOtherCandidates==nNumCandidates))
2331
+ ) {
2332
+ s_group_info->num_candidates = -1; /* no candidate exists */
2333
+ goto quick_exit;
2334
+ }
2335
+ if ( !(s_subtype_all & (SALT_DONOR_Neg) ) ) {
2336
+ t_group_info->bTautFlagsDone |= TG_FLAG_ALLOW_NO_NEGTV_O_DONE;
2337
+ }
2338
+ } else {
2339
+ for ( i = 0; i < nNumCandidates; i ++ ) {
2340
+ i1 = s_candidate[i].atnumber;
2341
+ if ( 0 <= (s_type = GetSaltChargeType( at, i1, t_group_info, &s_subtype ))
2342
+ #if( INCL_NON_SALT_CANDIDATATES == 1 )
2343
+ || 0 < (s_type = GetOtherSaltChargeType( at, i1, t_group_info, &s_subtype, 1 /* bAccept_O*/ ))
2344
+ #endif
2345
+ ) {
2346
+ s_candidate[nNumCandidates].type = s_type;
2347
+ s_candidate[nNumCandidates].subtype = s_subtype;
2348
+ s_candidate[nNumCandidates].endpoint = at[i1].endpoint;
2349
+ }
2350
+ }
2351
+ }
2352
+ /* Look for alt paths connecting:
2353
+ SALT_DONOR_Neg to SALT_ACCEPTOR : long distance migration of negative charges
2354
+ SALT_DONOR_H to SALT_ACCEPTOR : long distance migration of H-atoms
2355
+ */
2356
+ num_tested = 0;
2357
+ do {
2358
+ nNumChanges = 0;
2359
+ for ( i1 = 0; i1 < nNumCandidates; i1 ++ ) {
2360
+ j1 = s_candidate[i1].atnumber;
2361
+ for ( i2 = i1+1; i2 < nNumCandidates; i2 ++ ) {
2362
+ /* prev. approach: do not test if both candidates are not "salt-type". Disabled 2004-03-18
2363
+ if ( s_candidate[i1].type && s_candidate[i2].type )
2364
+ continue;
2365
+ */
2366
+ j2 = s_candidate[i2].atnumber;
2367
+ if ( at[j1].endpoint && at[j1].endpoint == at[j2].endpoint ) {
2368
+ continue;
2369
+ }
2370
+ for ( j = 0; j < 2; j ++ ) {
2371
+ if ( j ) {
2372
+ ii1 = i2; /* candidate 1 (donor) ordering number */
2373
+ ii2 = i1; /* candidate 2 (acceptor) ordering number */
2374
+ jj1 = j2; /* candidate 1 (donor) atom number */
2375
+ jj2 = j1; /* candidate 2 (acceptor) atom number */
2376
+ } else { /* transposition */
2377
+ ii1 = i1; /* candidate 1 (donor) ordering number */
2378
+ ii2 = i2; /* candidate 2 (acceptor) ordering number */
2379
+ jj1 = j1; /* candidate 1 (donor) atom number */
2380
+ jj2 = j2; /* candidate 2 (acceptor) atom number */
2381
+ }
2382
+
2383
+ if ( ( s_candidate[ii1].subtype & (SALT_DONOR_Neg | SALT_DONOR_H) ) &&
2384
+ ( s_candidate[ii2].subtype & SALT_ACCEPTOR ) ) {
2385
+ ret = bExistsAltPath( pBNS, pBD, NULL, at, num_atoms, jj2, jj1, ALT_PATH_MODE_4_SALT );
2386
+ num_tested ++;
2387
+ if ( IS_BNS_ERROR( ret ) ) {
2388
+ err = ret;
2389
+ goto quick_exit;
2390
+ }
2391
+ if ( ret & 1 ) {
2392
+ nDelta = (ret & ~3) >> 2;
2393
+ nNumChanges += (ret & 2);
2394
+ for ( i = 0; i < 2; i ++ ) {
2395
+ jj = i? jj2 : jj1;
2396
+ AddEndPoint( EndPoint+i, at, jj );
2397
+ }
2398
+ /* add/merge taut groups and reinit pBNS in the fly */
2399
+ ret = RegisterEndPoints( t_group_info,
2400
+ EndPoint, 2, at, num_atoms, c_group_info, pBNS );
2401
+ if ( ret == -1 ) {
2402
+ ret = BNS_PROGRAM_ERR;
2403
+ }
2404
+ if ( ret < 0 ) {
2405
+ err = ret;
2406
+ goto quick_exit;
2407
+ }
2408
+ if ( nDelta ) {
2409
+ err = BNS_RADICAL_ERR;
2410
+ goto quick_exit;
2411
+ }
2412
+ nNumChanges += (ret > 0);
2413
+ break; /* avoid redundant repetition */
2414
+ }
2415
+ }
2416
+ }
2417
+ }
2418
+ }
2419
+ nTotNumChanges += nNumChanges;
2420
+ } while ( num_tested && nNumChanges );
2421
+
2422
+ quick_exit:
2423
+ if ( !err ) {
2424
+ nTotNumChanges += nNumChanges; /* nNumChanges != 0 only in case of 'goto quick_exit' */
2425
+ if ( s_group_info->num_candidates == 0 ) {
2426
+ /* first time: initialize */
2427
+ s_group_info->num_candidates = num_tested? nNumCandidates : -1; /* no candidate exists */
2428
+ }
2429
+ } else {
2430
+ nTotNumChanges = err;
2431
+ }
2432
+ #if ( MAX_LOCAL_TGNUM > 0 )
2433
+ if ( ptgData != tgData ) {
2434
+ inchi_free( ptgData );
2435
+ }
2436
+ #endif
2437
+ }
2438
+ return nTotNumChanges;
2439
+ }
2440
+ #else
2441
+ /********************************************************************************************************/
2442
+ int MarkSaltChargeGroups ( inp_ATOM *at, int num_atoms, S_GROUP_INFO *s_group_info,
2443
+ T_GROUP_INFO *t_group_info, C_GROUP_INFO *c_group_info,
2444
+ struct BalancedNetworkStructure *pBNS, struct BalancedNetworkData *pBD )
2445
+ {
2446
+
2447
+ int nNumChanges = 0, nTotNumChanges = 0;
2448
+ if ( s_group_info && s_group_info->s_candidate && s_group_info->max_num_candidates > 0 ) {
2449
+ int i, i1, i2, j, j1, j2, jj, ii1, ii2, jj1, jj2, k, num_tested;
2450
+ S_CANDIDATE *s_candidate = s_group_info->s_candidate;
2451
+ int nMaxNumCandidates = s_group_info->max_num_candidates;
2452
+ int nNumCandidates = s_group_info->num_candidates;
2453
+ int nNumOtherCandidates = s_group_info->num_other_candidates;
2454
+ int s_type, s_subtype;
2455
+ int ret, nDelta, nMobile;
2456
+ int s_subtype_all = 0;
2457
+ T_ENDPOINT EndPoint[2];
2458
+
2459
+ if ( nNumCandidates <= -1 || !t_group_info || !t_group_info->t_group ) {
2460
+ return 0;
2461
+ } else
2462
+ if ( nNumCandidates == 0 ) {
2463
+ for ( i = 0, nNumCandidates = nNumOtherCandidates = 0; i < num_atoms; i ++ ) {
2464
+ if ( 0 <= (s_type = GetSaltChargeType( at, i, t_group_info, &s_subtype )) ) {
2465
+ if ( nNumCandidates >= nMaxNumCandidates ) {
2466
+ return BNS_VERT_EDGE_OVFL;
2467
+ }
2468
+ s_candidate[nNumCandidates].atnumber = i;
2469
+ s_candidate[nNumCandidates].type = s_type;
2470
+ s_candidate[nNumCandidates].subtype = s_subtype;
2471
+ s_candidate[nNumCandidates].endpoint = at[i].endpoint;
2472
+ nNumCandidates ++;
2473
+ s_subtype_all |= s_subtype;
2474
+ /*i1 = i;*/ /* save a representative of a tautomeric group */
2475
+ }
2476
+ #if( INCL_NON_SALT_CANDIDATATES == 1 )
2477
+ else /* new */
2478
+ if ( 0 < (s_type = GetOtherSaltChargeType( at, i, t_group_info, &s_subtype, 1 /* bAccept_O*/ )) ) {
2479
+ if ( nNumCandidates >= nMaxNumCandidates ) {
2480
+ return BNS_VERT_EDGE_OVFL;
2481
+ }
2482
+ s_candidate[nNumCandidates].atnumber = i;
2483
+ s_candidate[nNumCandidates].type = s_type;
2484
+ s_candidate[nNumCandidates].subtype = s_subtype;
2485
+ s_candidate[nNumCandidates].endpoint = at[i].endpoint;
2486
+ nNumCandidates ++;
2487
+ nNumOtherCandidates ++;
2488
+ s_subtype_all |= s_subtype;
2489
+ }
2490
+ #endif
2491
+ }
2492
+
2493
+ /* changes: TG_FLAG_ALLOW_NO_NEGTV_O replaced CHARGED_SALTS_ONLY==0 */
2494
+ if ( nNumCandidates <= 1 || nNumOtherCandidates == nNumCandidates ||
2495
+ ((t_group_info->bTautFlags & TG_FLAG_ALLOW_NO_NEGTV_O) ?
2496
+ !(s_subtype_all & (SALT_DONOR_Neg | SALT_DONOR_H)):
2497
+ !(s_subtype_all & SALT_DONOR_Neg)) ||
2498
+ !(s_subtype_all & SALT_ACCEPTOR)) {
2499
+ s_group_info->num_candidates = -1; /* no candidate exists */
2500
+ return 0;
2501
+ }
2502
+ if ( !(s_subtype_all & (SALT_DONOR_Neg) ) ) {
2503
+ t_group_info->bTautFlagsDone |= TG_FLAG_ALLOW_NO_NEGTV_O_DONE;
2504
+ }
2505
+ } else {
2506
+ for ( i = 0; i < nNumCandidates; i ++ ) {
2507
+ i1 = s_candidate[i].atnumber;
2508
+ if ( 0 <= (s_type = GetSaltChargeType( at, i1, t_group_info, &s_subtype ))
2509
+ #if( INCL_NON_SALT_CANDIDATATES == 1 )
2510
+ || 0 < (s_type = GetOtherSaltChargeType( at, i1, t_group_info, &s_subtype, 1 /* bAccept_O*/ ))
2511
+ #endif
2512
+ ) {
2513
+ s_candidate[nNumCandidates].type = s_type;
2514
+ s_candidate[nNumCandidates].subtype = s_subtype;
2515
+ s_candidate[nNumCandidates].endpoint = at[i1].endpoint;
2516
+ }
2517
+ }
2518
+ }
2519
+ /* Look for alt paths connecting:
2520
+ SALT_DONOR_Neg to SALT_ACCEPTOR : long distance migration of negative charges
2521
+ SALT_DONOR_H to SALT_ACCEPTOR : long distance migration of H-atoms
2522
+ */
2523
+ num_tested = 0;
2524
+ do {
2525
+ nNumChanges = 0;
2526
+ for ( i1 = 0; i1 < nNumCandidates; i1 ++ ) {
2527
+ j1 = s_candidate[i1].atnumber;
2528
+ for ( i2 = i1+1; i2 < nNumCandidates; i2 ++ ) {
2529
+ if ( s_candidate[i1].type && s_candidate[i2].type )
2530
+ continue; /* both candidates are not "salt-type" */
2531
+ j2 = s_candidate[i2].atnumber;
2532
+ if ( at[j1].endpoint && at[j1].endpoint == at[j2].endpoint ) {
2533
+ continue;
2534
+ }
2535
+ for ( j = 0; j < 2; j ++ ) {
2536
+ if ( j ) {
2537
+ ii1 = i2; /* candidate 1 (donor) ordering number */
2538
+ ii2 = i1; /* candidate 2 (acceptor) ordering number */
2539
+ jj1 = j2; /* candidate 1 (donor) atom number */
2540
+ jj2 = j1; /* candidate 2 (acceptor) atom number */
2541
+ } else { /* transposition */
2542
+ ii1 = i1; /* candidate 1 (donor) ordering number */
2543
+ ii2 = i2; /* candidate 2 (acceptor) ordering number */
2544
+ jj1 = j1; /* candidate 1 (donor) atom number */
2545
+ jj2 = j2; /* candidate 2 (acceptor) atom number */
2546
+ }
2547
+
2548
+ if ( ( s_candidate[ii1].subtype & (SALT_DONOR_Neg | SALT_DONOR_H) ) &&
2549
+ ( s_candidate[ii2].subtype & SALT_ACCEPTOR ) ) {
2550
+ ret = bExistsAltPath( pBNS, pBD, NULL, at, num_atoms, jj2, jj1, ALT_PATH_MODE_4_SALT );
2551
+ num_tested ++;
2552
+ if ( IS_BNS_ERROR( ret ) ) {
2553
+ return ret;
2554
+ }
2555
+ if ( ret & 1 ) {
2556
+ nDelta = (ret & ~3) >> 2;
2557
+ nNumChanges += (ret & 2);
2558
+ for ( i = 0; i < 2; i ++ ) {
2559
+ jj = i? jj2 : jj1;
2560
+ EndPoint[i].nAtomNumber = jj;
2561
+ EndPoint[i].nEquNumber = 0;
2562
+ EndPoint[i].nGroupNumber = at[jj].endpoint;
2563
+ if ( at[jj].endpoint ) {
2564
+ memset( EndPoint[i].num, 0, sizeof(EndPoint[i].num) );
2565
+ } else {
2566
+ AddAtom2num( EndPoint[i].num, at, jj, 2 ); /* fill out */
2567
+ AddAtom2DA( EndPoint[i].num_DA, at, jj, 2 );
2568
+ /*
2569
+ nMobile = EndPoint[i].num[1] = (at[jj].charge == -1);
2570
+ nMobile = EndPoint[i].num[0] = at[jj].num_H + nMobile;
2571
+ for ( k = 0; k < T_NUM_ISOTOPIC; k ++ ) {
2572
+ EndPoint[i].num[T_NUM_NO_ISOTOPIC+k] = at[jj].num_iso_H[NUM_H_ISOTOPES-k-1];
2573
+ }
2574
+ */
2575
+ }
2576
+ }
2577
+ /* add/merge taut groups and reinit pBNS */
2578
+ ret = RegisterEndPoints( t_group_info,
2579
+ EndPoint, 2, at, num_atoms, c_group_info, pBNS );
2580
+ if ( ret < 0 ) {
2581
+ return ret;
2582
+ }
2583
+ nNumChanges += (ret > 0);
2584
+ if ( nDelta ) {
2585
+ goto quick_exit;
2586
+ }
2587
+ break; /* avoid redundant repetition */
2588
+ }
2589
+ }
2590
+ }
2591
+ }
2592
+ }
2593
+ nTotNumChanges += nNumChanges;
2594
+ } while ( num_tested && nNumChanges );
2595
+
2596
+ quick_exit:
2597
+ nTotNumChanges += nNumChanges; /* nNumChanges != 0 only in case of 'goto quick_exit' */
2598
+ if ( s_group_info->num_candidates == 0 ) {
2599
+ /* first time: initialize */
2600
+ s_group_info->num_candidates = num_tested? nNumCandidates : -1; /* no candidate exists */
2601
+ }
2602
+
2603
+ }
2604
+ return nTotNumChanges;
2605
+ }
2606
+ #endif
2607
+
2608
+ /*****************************************************************************/
2609
+ int MergeSaltTautGroups( inp_ATOM *at, int num_atoms, S_GROUP_INFO *s_group_info,
2610
+ T_GROUP_INFO *t_group_info, C_GROUP_INFO *c_group_info,
2611
+ struct BalancedNetworkStructure *pBNS )
2612
+ {
2613
+ /* count candidates to be connected: exclude pure donors that do not belong to any t-group */
2614
+ AT_NUMB nCurTGroupNumber;
2615
+ int i, j, /*k,*/ ret, iat, /*nMobile,*/ nMinNumEndpoints;
2616
+ int s_subtype_all, s_subtype_taut;
2617
+ int nMaxNumCandidates, nNumCandidates, nNumCandidates2;
2618
+ T_ENDPOINT EndPointStackArray[MAX_STACK_ARRAY_LEN]; /* will be reallocated if too short */
2619
+ T_ENDPOINT *EndPoint = EndPointStackArray;
2620
+
2621
+
2622
+ if ( !s_group_info || !s_group_info->s_candidate || /*s_group_info->num_candidates <= 0 ||*/
2623
+ !t_group_info || !t_group_info->t_group || !c_group_info ) {
2624
+ return 0;
2625
+ }
2626
+ nMinNumEndpoints = 0;
2627
+ nMaxNumCandidates = s_group_info->max_num_candidates;
2628
+ nCurTGroupNumber = MAX_ATOMS; /* impossible t-group number */
2629
+ s_subtype_all = s_subtype_taut = 0;
2630
+ /* collect tautomeric acidic O and previously non-tautomeric C-OH, C-SH, C-O(-), C-S(-) */
2631
+ /* find whether previously found tautomeric atoms have both mobile H and (-) */
2632
+ if ( 1 || (s_group_info->num_candidates < 0) ) {
2633
+ /* can be only -O(-) and -OH */
2634
+ int s_type, s_subtype;
2635
+ S_CANDIDATE *s_candidate = s_group_info->s_candidate;
2636
+ for ( i = 0, nNumCandidates = nNumCandidates2 = 0; i < num_atoms; i ++ ) {
2637
+ s_subtype = 0;
2638
+ if ( 0 == (s_type = GetSaltChargeType( at, i, t_group_info, &s_subtype )) ||
2639
+ /* -C=O or =C-OH, O = S, Se, Te */
2640
+
2641
+ /*(t_group_info->tni.bNormalizationFlags & FLAG_FORCE_SALT_TAUT) &&*/
2642
+ 1 == (s_type = GetOtherSaltChargeType( at, i, t_group_info, &s_subtype, 1/* bAccept_O*/ )) ||
2643
+ /* =Z-MH or -Z=M, Z = centerpoint, M = endpoint, other than above. M may be N */
2644
+
2645
+ 2 == (s_type = GetOtherSaltType( at, i, &s_subtype )) ||
2646
+ /* >C-SH, >C-S(-); S=S,Se,Te */
2647
+
2648
+ /* other proton donor or acceptor */
2649
+ bHasAcidicHydrogen( at, i) && ((s_type=3), (s_subtype = SALT_p_DONOR)) ||
2650
+ bHasAcidicMinus( at, i) && ((s_type=3), (s_subtype = SALT_p_ACCEPTOR))
2651
+ ) {
2652
+
2653
+ if ( nNumCandidates >= nMaxNumCandidates ) {
2654
+ return BNS_VERT_EDGE_OVFL;
2655
+ }
2656
+ if ( at[i].endpoint ) {
2657
+ s_subtype_taut |= s_subtype;
2658
+ } else
2659
+ if ( bDoNotMergeNonTautAtom(at, i) ) {
2660
+ continue; /* ignore non-tautomeric N */
2661
+ }
2662
+ if ( !( s_subtype & SALT_DONOR_ALL ) ||
2663
+ (s_subtype & SALT_ACCEPTOR) && !at[i].endpoint ) {
2664
+ continue; /* do not include non-taut acceptors like -C=O */
2665
+ }
2666
+ s_candidate[nNumCandidates].atnumber = i;
2667
+ s_candidate[nNumCandidates].type = s_type;
2668
+ s_candidate[nNumCandidates].subtype = s_subtype;
2669
+ s_candidate[nNumCandidates].endpoint = at[i].endpoint;
2670
+ nNumCandidates ++;
2671
+ s_subtype_all |= s_subtype;
2672
+ }
2673
+ }
2674
+ /*
2675
+ Forced merging occurs upon:
2676
+ ===========================
2677
+ (t_group_info->bTautFlags & TG_FLAG_ALLOW_NO_NEGTV_O) or
2678
+ (t_group_info->tni.bNormalizationFlags & FLAG_FORCE_SALT_TAUT)
2679
+
2680
+
2681
+ Allow forced merging in cases:
2682
+ {t-groups} (H, (-)} {H, (-), t-groups}
2683
+
2684
+
2685
+ Normal salt merging in cases:
2686
+ (H, (-)} {H, (-), t-groups},
2687
+
2688
+ Cannot merge H into t-groups if no (-) is present
2689
+ */
2690
+
2691
+
2692
+ if ( (t_group_info->bTautFlags & TG_FLAG_ALLOW_NO_NEGTV_O) ||
2693
+ (t_group_info->bTautFlagsDone & TG_FLAG_FOUND_SALT_CHARGES_DONE) ||
2694
+ (t_group_info->tni.bNormalizationFlags & FLAG_FORCE_SALT_TAUT) ) {
2695
+ /* force merge even though no negative charges are present */
2696
+ if ( nNumCandidates <= 1 ||
2697
+ (!(s_subtype_all & SALT_DONOR_Neg2) || !(s_subtype_all & SALT_DONOR_H2)) &&
2698
+ !t_group_info->num_t_groups ) {
2699
+ s_group_info->num_candidates = -1; /* no candidate exists */
2700
+ return 0;
2701
+ }
2702
+ } else {
2703
+ /* normal salt mode: merge if both -XH and -X(-) are present */
2704
+ if ( nNumCandidates <= 1 ||
2705
+ (!(s_subtype_all & SALT_DONOR_Neg2) || !(s_subtype_all & SALT_DONOR_H2)) ) {
2706
+ s_group_info->num_candidates = -1; /* no candidate exists */
2707
+ return 0;
2708
+ }
2709
+ }
2710
+ /* -- old code --
2711
+ if ( nNumCandidates <= 1 ||
2712
+ (((t_group_info->bTautFlags & TG_FLAG_ALLOW_NO_NEGTV_O) ||
2713
+ (t_group_info->tni.bNormalizationFlags & FLAG_FORCE_SALT_TAUT)) ?
2714
+ !(s_subtype_all & SALT_DONOR_ALL):
2715
+ !(s_subtype_all & SALT_DONOR_Neg2)
2716
+ )
2717
+ ) {
2718
+ s_group_info->num_candidates = -1;
2719
+ return 0;
2720
+ }
2721
+ */
2722
+ if ( !(s_subtype_all & (SALT_DONOR_Neg2) ) ) {
2723
+ t_group_info->bTautFlagsDone |= TG_FLAG_ALLOW_NO_NEGTV_O_DONE;
2724
+ }
2725
+ s_group_info->num_candidates = nNumCandidates;
2726
+ }
2727
+
2728
+ for ( i = 0; i < s_group_info->num_candidates; i ++ ) {
2729
+ iat = s_group_info->s_candidate[i].atnumber;
2730
+ if ( (s_group_info->s_candidate[i].subtype & SALT_ACCEPTOR) && !at[iat].endpoint ) {
2731
+ continue; /* should not happen */
2732
+ }
2733
+ s_subtype_all |= s_group_info->s_candidate[i].subtype;
2734
+ if ( at[iat].endpoint != nCurTGroupNumber || !at[iat].endpoint ) {
2735
+ nMinNumEndpoints ++;
2736
+ }
2737
+ nCurTGroupNumber = (int)at[iat].endpoint;
2738
+ }
2739
+ if ( nMinNumEndpoints <= 1 ) {
2740
+ return 0; /* too few endpoints */
2741
+ }
2742
+
2743
+ /* make sure we have enough memory */
2744
+ if ( nMinNumEndpoints > MAX_STACK_ARRAY_LEN ) {
2745
+ if ( !(EndPoint = (T_ENDPOINT *)inchi_calloc( nMinNumEndpoints, sizeof(EndPoint[0]) ) ) ) {
2746
+ /*printf("BNS_OUT_OF_RAM-8\n");*/
2747
+ return BNS_OUT_OF_RAM;
2748
+ }
2749
+ }
2750
+
2751
+ nCurTGroupNumber = MAX_ATOMS; /* impossible t-group number */
2752
+ for ( i = j = 0; i < s_group_info->num_candidates; i ++ ) {
2753
+ iat = s_group_info->s_candidate[i].atnumber;
2754
+ if ( s_group_info->s_candidate[i].subtype == SALT_ACCEPTOR && !at[iat].endpoint ) {
2755
+ continue;
2756
+ }
2757
+ if ( at[iat].endpoint != nCurTGroupNumber || !at[iat].endpoint ) {
2758
+ AddEndPoint( EndPoint+j, at, iat );
2759
+ j ++;
2760
+ }
2761
+ nCurTGroupNumber = (int)at[iat].endpoint;
2762
+ }
2763
+
2764
+ ret = RegisterEndPoints( t_group_info,
2765
+ EndPoint, j, at, num_atoms, c_group_info, pBNS );
2766
+ if ( ret == -1 ) {
2767
+ ret = BNS_PROGRAM_ERR;
2768
+ }
2769
+
2770
+ if ( EndPoint != EndPointStackArray ) {
2771
+ inchi_free( EndPoint );
2772
+ }
2773
+
2774
+ return ret;
2775
+ }
2776
+
2777
+ /*****************************************************************************/
2778
+ int MakeIsotopicHGroup( inp_ATOM *at, int num_atoms, S_GROUP_INFO *s_group_info,
2779
+ T_GROUP_INFO *t_group_info )
2780
+ {
2781
+ /* all tautomeric atoms and all possible H+ donors and acceptors that have H */
2782
+ int i, j, k, n, bHasH, tg, nError=0;
2783
+ int s_subtype_all, s_subtype_taut;
2784
+ int nMaxNumCandidates, nNumCandidates, nNumNonTautCandidates;
2785
+
2786
+
2787
+ if ( !s_group_info || !s_group_info->s_candidate || /*s_group_info->num_candidates <= 0 ||*/
2788
+ !t_group_info || !t_group_info->t_group ) {
2789
+ return 0;
2790
+ }
2791
+ nMaxNumCandidates = s_group_info->max_num_candidates;
2792
+ s_subtype_all = s_subtype_taut = 0;
2793
+ memset( t_group_info->num_iso_H, 0, sizeof(t_group_info->num_iso_H) );
2794
+ if ( 1 || (s_group_info->num_candidates < 0) ) {
2795
+ int s_type, s_subtype;
2796
+ S_CANDIDATE *s_candidate = s_group_info->s_candidate;
2797
+ for ( i = 0, nNumCandidates = nNumNonTautCandidates = 0; i < num_atoms; i ++ ) {
2798
+ s_subtype = 0;
2799
+ s_type = 0;
2800
+ if ( at[i].endpoint ) {
2801
+ if ( (tg = t_group_info->tGroupNumber[at[i].endpoint]) &&
2802
+ at[i].endpoint == t_group_info->t_group[tg-=1].nGroupNumber ) {
2803
+ bHasH = (int)t_group_info->t_group[tg].num[0] - (int)t_group_info->t_group[tg].num[1];
2804
+ } else {
2805
+ nError = BNS_PROGRAM_ERR;
2806
+ break;
2807
+ }
2808
+ } else {
2809
+ bHasH = (int)at[i].num_H;
2810
+ }
2811
+ if ( bHasH && at[i].endpoint || /* tautomeric atoms */
2812
+
2813
+ /* non-tautomeric heteroatoms that
2814
+ (a) have H and
2815
+ (b) may be donors of H
2816
+ therefore may exchange isotopic-non-isotopic H */
2817
+ bHasH &&
2818
+ (0 == (s_type = GetSaltChargeType( at, i, t_group_info, &s_subtype )) ||
2819
+ /* -C=O or =C-OH, O = S, Se, Te */
2820
+
2821
+ /*(t_group_info->tni.bNormalizationFlags & FLAG_FORCE_SALT_TAUT) &&*/
2822
+ 1 == (s_type = GetOtherSaltChargeType( at, i, t_group_info, &s_subtype, 1/* bAccept_O*/ )) ||
2823
+ /* =Z-MH or -Z=M, Z = centerpoint, M = endpoint, other than above. M may be N */
2824
+
2825
+ 2 == (s_type = GetOtherSaltType( at, i, &s_subtype )) ||
2826
+ /* >C-SH, >C-S(-); S=S,Se,Te */
2827
+
2828
+ /* other proton donor or acceptor */
2829
+ bHasAcidicHydrogen( at, i) && ((s_type=3), (s_subtype = SALT_p_DONOR)) ||
2830
+ bHasAcidicMinus( at, i) && ((s_type=3), (s_subtype = SALT_p_ACCEPTOR)) ||
2831
+ bHasOtherExchangableH (at, i) && ((s_type=3), (s_subtype = SALT_DONOR_H)) )
2832
+
2833
+ ) {
2834
+
2835
+ if ( nNumCandidates >= nMaxNumCandidates ) {
2836
+ return BNS_VERT_EDGE_OVFL;
2837
+ }
2838
+ s_candidate[nNumCandidates].atnumber = i;
2839
+ s_candidate[nNumCandidates].type = s_type;
2840
+ s_candidate[nNumCandidates].subtype = s_subtype;
2841
+ s_candidate[nNumCandidates].endpoint = at[i].endpoint;
2842
+ nNumCandidates ++;
2843
+ nNumNonTautCandidates += !at[i].endpoint;
2844
+ s_subtype_all |= s_subtype;
2845
+ }
2846
+ }
2847
+ if ( nError ) {
2848
+ return nError;
2849
+ }
2850
+ if ( nNumCandidates > 0 ) {
2851
+ t_group_info->nIsotopicEndpointAtomNumber = (AT_NUMB *)inchi_calloc( nNumNonTautCandidates+1, sizeof(t_group_info->nIsotopicEndpointAtomNumber[0]));
2852
+ t_group_info->nIsotopicEndpointAtomNumber[0] = nNumNonTautCandidates;
2853
+ for ( i = 0, n = 1; i < nNumCandidates; i ++ ) {
2854
+ k = s_candidate[i].atnumber;
2855
+ if ( !at[k].endpoint ) {
2856
+ t_group_info->nIsotopicEndpointAtomNumber[n++] = k;
2857
+ }
2858
+ for ( j = 0; j < NUM_H_ISOTOPES; j ++ ) {
2859
+ t_group_info->num_iso_H[j] += at[k].num_iso_H[j];
2860
+ }
2861
+ at[k].cFlags |= AT_FLAG_ISO_H_POINT;
2862
+ }
2863
+ t_group_info->nNumIsotopicEndpoints = nNumNonTautCandidates+1;
2864
+ }
2865
+ }
2866
+ return nNumCandidates;
2867
+ }
2868
+
2869
+ /*#else*/ /* } DISCONNECT_SALTS == 0 */
2870
+
2871
+ /**********************************************************************************
2872
+ Charges and tautomeric endpoints (N only)
2873
+ **********************************************************************************
2874
+
2875
+ H = number of possibly moveable hydrogen atoms
2876
+ C = possibly moveable positive charge
2877
+
2878
+ - = single bond
2879
+ = = double bond
2880
+ # = triple bond
2881
+
2882
+ +-----------------------------------------------------------------------------+
2883
+ |ca-| H | edges to t- | 1 bond | 2 bonds | 3 bonds *) |
2884
+ |se | C | and c-groups | (valence) | (valence) | (valence) |
2885
+ | # | | (edges flow) | | | |
2886
+ +---|------+---------------+----------------+----------------+----------------|
2887
+ | 1 | H=0 | -- (1) | =NH (3) | =N- (3) | >N- (3) |
2888
+ | | C=0 | == | | | |
2889
+ +---|------+---------------+----------------+----------------+----------------|
2890
+ | 2 | H=1 | == (2) | -NH2 (3) | -NH- (3) | none |
2891
+ | | C=0 | == | | | |
2892
+ +---|------+---------------+----------------+----------------+----------------|
2893
+ | 3 | H=0 | -- (0) | #NH(+) (4) | =N(+)= (4) +)| >N(+)= (4) |
2894
+ | | C=1 | -- | (prohibited | | |
2895
+ | | | | by edge cap) | | |
2896
+ +---|------+---------------+----------------+----------------+----------------|
2897
+ | 4 | H=1 | == (1) | =NH2(+) (4) +)| =NH(+)- (4) +)| >NH(+)- (4) |
2898
+ | | C=1 | -- | | | |
2899
+ +---+-------------------------------------------------------------------------+
2900
+
2901
+ *) Cannot be a tautomeric endpoint
2902
+
2903
+ +) The three charged types of atoms [=N(+)=, =NH(+)-, =NH2(+)] should be
2904
+ checked for possible H-tautomerism. Other types in the marked by *)
2905
+ column should not be checked as long as H(+) exchange is not considered
2906
+ tautomeric.
2907
+
2908
+ Other possibilities: -NH3(+) >NH2(+) >N(+)< cannot be H-tautomeric endpoints.
2909
+
2910
+ Case #1 (H=0, C=0) and #4 (H=1,C=0) is indistinguishable from the
2911
+ viewpoint of edges flow and capacities except for flow from N to (+) vertex.
2912
+
2913
+ Without taking precautions H(+) can be transferred
2914
+
2915
+ from =NH2(+) to =NH,
2916
+ from =NH(+)- to =N-,
2917
+ from >NH(+)- to >N-
2918
+
2919
+ or to any other appropriate atom that has a lone electron pair and bonds
2920
+ will not change. In this case no bond must be marked as tautomeric.
2921
+
2922
+ For this reason before attempting to transfer H from one endpoint to
2923
+ another the charges on the two atoms should be set to zero by
2924
+ forcing zero flow from each of atoms to the (+)-vertices if the
2925
+ atoms belong to a c-group.
2926
+
2927
+ **********************************************************************************/
2928
+
2929
+
2930
+ /********************************************************************************************************/
2931
+ /* MarkTautomerGroups: do not identify positively charged N as endpoints for now */
2932
+ int MarkTautomerGroups( inp_ATOM *at, int num_atoms, T_GROUP_INFO *t_group_info, C_GROUP_INFO *c_group_info
2933
+ , struct BalancedNetworkStructure *pBNS, struct BalancedNetworkData *pBD )
2934
+ {
2935
+ int i, j, k, m, endpoint_valence, centerpoint, endpoint, bond_type, nMobile, num_changes=0, tot_changes=0;
2936
+ T_ENDPOINT EndPoint[MAXVAL];
2937
+ T_BONDPOS BondPos[MAXVAL];
2938
+ AT_NUMB nGroupNumber;
2939
+ int bDiffGroups;
2940
+ int nNumEndPoints, nNumBondPos, nNumPossibleMobile;
2941
+ int bTautBond, bNonTautBond, bAltBond;
2942
+ int nNumDonor, nNumAcceptor, bPossiblyEndpoint;
2943
+ T_GROUP *t_group;
2944
+ int *pnum_t, max_num_t, bIgnoreIsotopic;
2945
+ ENDPOINT_INFO eif1, eif2;
2946
+ int nErr = 0;
2947
+ #define ALLOWED_EDGE(PBNS, IAT,IBOND) ( !PBNS || !PBNS->edge || !PBNS->vert || !PBNS->edge[PBNS->vert[IAT].iedge[IBOND]].forbidden)
2948
+
2949
+ if ( !t_group_info || !(t_group_info->bTautFlags & TG_FLAG_TEST_TAUT__ATOMS) )
2950
+ return 0;
2951
+ /* initial t_group allocation */
2952
+ if ( !t_group_info->t_group && !t_group_info->max_num_t_groups ) {
2953
+ INCHI_MODE bTautFlags = t_group_info->bTautFlags; /* save initial setting */
2954
+ INCHI_MODE bTautFlagsDone = t_group_info->bTautFlagsDone; /* save previous findings, if any */
2955
+ TNI tni = t_group_info->tni;
2956
+ AT_NUMB *tGroupNumber = t_group_info->tGroupNumber;
2957
+ bIgnoreIsotopic = t_group_info->bIgnoreIsotopic;
2958
+ memset( t_group_info, 0, sizeof(*t_group_info) );
2959
+ t_group_info->bIgnoreIsotopic = bIgnoreIsotopic; /* restore initial setting */
2960
+ t_group_info->bTautFlags = bTautFlags;
2961
+ t_group_info->bTautFlagsDone = bTautFlagsDone;
2962
+ t_group_info->tni = tni;
2963
+ t_group_info->tGroupNumber = tGroupNumber;
2964
+ t_group_info->max_num_t_groups = num_atoms/2+1; /* upper limit */
2965
+ if (!(t_group_info->t_group = (T_GROUP*)inchi_calloc(t_group_info->max_num_t_groups, sizeof(t_group[0])))) {
2966
+ return (t_group_info->max_num_t_groups = -1); /* failed, out of RAM */
2967
+ }
2968
+ }
2969
+ /* check if t_group_info exists */
2970
+ if ( !t_group_info->t_group || !t_group_info->max_num_t_groups )
2971
+ return 0;
2972
+
2973
+ if ( 0 > t_group_info->max_num_t_groups )
2974
+ return t_group_info->max_num_t_groups;
2975
+
2976
+ pnum_t = &t_group_info->num_t_groups; /* number of found tautomer endpoint groups */
2977
+ t_group = t_group_info->t_group;
2978
+ max_num_t = t_group_info->max_num_t_groups;
2979
+ bIgnoreIsotopic = t_group_info->bIgnoreIsotopic;
2980
+ /* 1-3 tautomers */
2981
+ for ( i = 0; i < num_atoms; i ++ ) {
2982
+ /* find possible endpoint Z = at[i] */
2983
+ if ( endpoint_valence = nGetEndpointInfo( at, i, &eif1 ) ) {
2984
+ /* 1st endpoint candidate found. Find centerpoint candidate */
2985
+ for ( j = 0; j < at[i].valence; j ++ ) {
2986
+ bond_type = (int)at[i].bond_type[j] & ~BOND_MARK_ALL;
2987
+ centerpoint = (int)at[i].neighbor[j]; /* a centerpoint candidate */
2988
+ if ( (bond_type == BOND_DOUBLE ||
2989
+ bond_type == BOND_ALTERN ||
2990
+ bond_type == BOND_ALT12NS ||
2991
+ bond_type == BOND_TAUTOM) && is_centerpoint_elem( at[centerpoint].el_number )
2992
+ && ALLOWED_EDGE(pBNS, i, j)
2993
+ ) {
2994
+ /* test a centerpoint candidate. */
2995
+ /* find all endpoints including at[i] and store them into EndPoint[] */
2996
+ nNumPossibleMobile = 0;
2997
+ nGroupNumber = (AT_NUMB)num_atoms; /* greater than any tautomeric group number */
2998
+ bDiffGroups = -1; /* ignore the first difference */
2999
+ nNumDonor = nNumAcceptor = 0;
3000
+ for ( k = 0, nNumEndPoints = 0, nNumBondPos = 0; k < at[centerpoint].valence; k ++ ) {
3001
+ endpoint = at[centerpoint].neighbor[k]; /* endpoint candidate */
3002
+ bond_type = (int)at[centerpoint].bond_type[k] & ~BOND_MARK_ALL;
3003
+ bTautBond =
3004
+ bNonTautBond =
3005
+ bAltBond =
3006
+ bPossiblyEndpoint = 0;
3007
+ if ( !( !pBNS || !pBNS->edge || !pBNS->vert || !pBNS->edge[pBNS->vert[centerpoint].iedge[k]].forbidden) ) {
3008
+ continue;
3009
+ }
3010
+ if ( !ALLOWED_EDGE(pBNS, centerpoint, k) ) {
3011
+ continue;
3012
+ } else
3013
+ if ( bond_type == BOND_ALTERN || bond_type == BOND_ALT12NS || bond_type == BOND_TAUTOM ) {
3014
+ bTautBond = 1;
3015
+ #if( REPLACE_ALT_WITH_TAUT == 1 )
3016
+ bAltBond = (bond_type == BOND_ALTERN || bond_type == BOND_ALT12NS);
3017
+ #endif
3018
+ } else
3019
+ if ( bond_type == BOND_SINGLE || bond_type == BOND_DOUBLE )
3020
+ bNonTautBond = 1;
3021
+ else
3022
+ continue;
3023
+
3024
+ if ( !(endpoint_valence = nGetEndpointInfo( at, endpoint, &eif1 )) )
3025
+ continue; /* not an endpoint element or can't have mobile groups */
3026
+ /* save information about the found possible tautomeric endpoint */
3027
+ /* 2 = T_NUM_NO_ISOTOPIC non-isotopic values */
3028
+ nMobile =
3029
+ AddAtom2num( EndPoint[nNumEndPoints].num, at, endpoint, 2 ); /* fill out */
3030
+ AddAtom2DA( EndPoint[nNumEndPoints].num_DA, at, endpoint, 2 );
3031
+ /* --- why is isitopic info missing ? -- see below
3032
+ nMobile = EndPoint[nNumEndPoints].num[1] = (at[endpoint].charge == -1);
3033
+ nMobile = EndPoint[nNumEndPoints].num[0] = at[endpoint].num_H + nMobile;
3034
+ */
3035
+ if ( bNonTautBond ) {
3036
+ m = (bond_type == BOND_SINGLE && (nMobile || at[endpoint].endpoint));
3037
+ nNumDonor += m;
3038
+ bPossiblyEndpoint += m;
3039
+ m = (bond_type == BOND_DOUBLE );
3040
+ nNumAcceptor += m;
3041
+ bPossiblyEndpoint += m;
3042
+ } else {
3043
+ /* tautomeric or alternating bond */
3044
+ m = (0 != at[endpoint].endpoint || eif1.cDonor );
3045
+ nNumDonor += m;
3046
+ bPossiblyEndpoint += m;
3047
+ m = ( at[endpoint].endpoint ||
3048
+ eif1.cNeutralBondsValence > at[endpoint].valence );
3049
+ nNumAcceptor += m;
3050
+ bPossiblyEndpoint += m;
3051
+ }
3052
+ if ( !bPossiblyEndpoint )
3053
+ continue;
3054
+ EndPoint[nNumEndPoints].nGroupNumber = at[endpoint].endpoint; /* =0 if it is an endpoint for the 1st time */
3055
+ EndPoint[nNumEndPoints].nEquNumber = 0;
3056
+ EndPoint[nNumEndPoints].nAtomNumber = (AT_NUMB)endpoint;
3057
+ if ( nGroupNumber != at[endpoint].endpoint ) {
3058
+ bDiffGroups ++;
3059
+ nGroupNumber = at[endpoint].endpoint;
3060
+ }
3061
+
3062
+ /* save positions of all, not only possibly tautomeric bonds */
3063
+ #if( REPLACE_ALT_WITH_TAUT != 1 )
3064
+ if ( bNonTautBond || bAltBond ) {
3065
+ #endif
3066
+ BondPos[nNumBondPos].nAtomNumber = (AT_NUMB)centerpoint;
3067
+ BondPos[nNumBondPos].neighbor_index = (AT_NUMB)k; /* bond ordering number; used to change bonds to tautomeric only */
3068
+ nNumBondPos ++;
3069
+ #if( REPLACE_ALT_WITH_TAUT != 1 )
3070
+ }
3071
+ #endif
3072
+ /* mobile group is possible if (a) the endpoint has a mobile group or */
3073
+ /* (b) the centerpoint is adjacent to another endpoint */
3074
+ nNumPossibleMobile += (nMobile>0 || at[endpoint].endpoint);
3075
+ nNumEndPoints ++;
3076
+ }
3077
+ if ( nNumEndPoints > 1 && nNumPossibleMobile && nNumDonor && nNumAcceptor ) {
3078
+ /*
3079
+ * a tautomeric group has been found
3080
+ *
3081
+ * at this point:
3082
+ * nGroupNumber = 0 if all endpoints belong to a newly discovered tautomeric group
3083
+ * bDiffGroups > 0 if at least 2 tautomeric groups are to be merged (one of them can be new)
3084
+ * case (nGroupNumber != 0 && bDiffGroups = 0 ) ignored because all endpoints belong to the same known t-group
3085
+ * case (nGroupNumber != 0 && bDiffGroups < 0 ) cannot happen
3086
+ */
3087
+
3088
+ nErr=FindAccessibleEndPoints( EndPoint, &nNumEndPoints, BondPos, &nNumBondPos,
3089
+ pBNS, pBD, at, num_atoms, c_group_info );
3090
+ if ( IS_BNS_ERROR(nErr) ) {
3091
+ return nErr;
3092
+ }
3093
+ nErr = 0;
3094
+
3095
+ if ( nNumEndPoints > 0 ) {
3096
+ if ( !nGroupNumber || bDiffGroups > 0 ) {
3097
+ num_changes = RegisterEndPoints( t_group_info, EndPoint, nNumEndPoints, at, num_atoms, c_group_info, pBNS );
3098
+ if ( num_changes == -1 ) {
3099
+ nErr = CT_TAUCOUNT_ERR;
3100
+ }
3101
+ if ( num_changes < 0 ) {
3102
+ nErr = num_changes;
3103
+ }
3104
+ if ( nErr )
3105
+ goto exit_function;
3106
+ tot_changes += (num_changes>0);
3107
+ }
3108
+ if ( nNumBondPos > 0 ) {
3109
+ /* some of the bonds have not been marked as tautomeric yet */
3110
+ num_changes = SetTautomericBonds( at, nNumBondPos, BondPos );
3111
+ tot_changes += (num_changes>0);
3112
+ }
3113
+ }
3114
+ }
3115
+ }
3116
+ }
3117
+ }
3118
+ }
3119
+
3120
+ #if( TAUT_OTHER == 1 ) /* { */
3121
+ if ( !tot_changes ) {
3122
+ #define MAX_ALT_PATH_LEN 8
3123
+ int nMaxLenDfsPath = MAX_ALT_PATH_LEN;
3124
+ int i1, i2;
3125
+ AT_RANK *nDfsPathPos = (AT_RANK *)inchi_calloc( num_atoms, sizeof(nDfsPathPos[0]) );
3126
+ DFS_PATH DfsPath[MAX_ALT_PATH_LEN];
3127
+ int ret;
3128
+ if ( !nDfsPathPos || !DfsPath ) {
3129
+ tot_changes = CT_OUT_OF_RAM; /* <BRKPT> */
3130
+ goto free_memory;
3131
+ }
3132
+ #if( TAUT_4PYRIDINOL_RINGS == 1 )
3133
+ /* 6-member rings */
3134
+ /*
3135
+ O OH OH
3136
+ || | |
3137
+ / \ // \ / \\
3138
+ || || <--> | || <--> || |
3139
+ \ / \\ / \ //
3140
+ NH N N
3141
+ */
3142
+ for ( i1 = 0; i1 < num_atoms; i1 ++ ) {
3143
+ /* find possible endpoint Z = at[i1] */
3144
+ if ( 3 != (endpoint_valence = nGetEndpointInfo( at, i1, &eif1 ) ) ||
3145
+ 2 != at[i1].valence ) {
3146
+ continue; /* not a nitrogen atom or a wrong valence */
3147
+ }
3148
+
3149
+ if ( at[i1].nNumAtInRingSystem >= 6 ) {
3150
+ nNumEndPoints = 0;
3151
+ nNumBondPos = 0;
3152
+
3153
+ ret = nGet15TautIn6MembAltRing( at, i1, nDfsPathPos,
3154
+ DfsPath, nMaxLenDfsPath,
3155
+ EndPoint, sizeof(EndPoint)/sizeof(EndPoint[0]),
3156
+ BondPos, sizeof(BondPos)/sizeof(BondPos[0]),
3157
+ &nNumEndPoints, &nNumBondPos,
3158
+ pBNS, pBD, num_atoms);
3159
+ if ( ret > 0 ) {
3160
+ if ( nNumEndPoints ) {
3161
+ num_changes = RegisterEndPoints( t_group_info, EndPoint, nNumEndPoints, at, num_atoms, c_group_info, pBNS);
3162
+ if ( num_changes == -1 ) {
3163
+ nErr = CT_TAUCOUNT_ERR;
3164
+ }
3165
+ if ( num_changes < 0 ) {
3166
+ nErr = num_changes;
3167
+ }
3168
+ if ( nErr )
3169
+ goto free_memory;
3170
+ tot_changes += (num_changes > 0);
3171
+ }
3172
+ if ( nNumBondPos ) {
3173
+ tot_changes += ( 0 < SetTautomericBonds( at, nNumBondPos, BondPos ) );
3174
+ }
3175
+ } else
3176
+ if ( IS_BNS_ERROR( ret ) ) {
3177
+ nErr = ret;
3178
+ goto free_memory;
3179
+ }
3180
+ }
3181
+ }
3182
+ #endif /* TAUT_4PYRIDINOL_RINGS */
3183
+ #if( TAUT_PYRAZOLE_RINGS == 1 )
3184
+ /* 5-member rings:
3185
+
3186
+ Z Z
3187
+ / \\ // \
3188
+ X Y <--> X Y
3189
+ \\ / \ //
3190
+ N--NH HN--N
3191
+
3192
+ ^ ^
3193
+ search for these NH
3194
+ */
3195
+ /* 5-member rings (pyrazole derivatives): look for the neighboring N */
3196
+ for ( i1 = 0; i1 < num_atoms; i1 ++ ) {
3197
+ if ( 2 == at[i1].valence &&
3198
+ at[i1].nNumAtInRingSystem >= 5 &&
3199
+ 3 == (endpoint_valence = nGetEndpointInfo( at, i1, &eif1 ))
3200
+ ) {
3201
+ nMobile = at[i1].num_H + (at[i1].charge == -1);
3202
+ for ( j = 0; j < at[i1].valence; j ++ ) {
3203
+ int nMobile2, endpoint_valence2;
3204
+ i2 = at[i1].neighbor[j];
3205
+
3206
+ /* may be important */
3207
+ if ( i2 >= i1 )
3208
+ continue; /* do not try same pair 2 times */
3209
+
3210
+ if ( at[i2].nRingSystem != at[i1].nRingSystem )
3211
+ continue;
3212
+
3213
+ bond_type = (at[i1].bond_type[j] & ~BOND_MARK_ALL);
3214
+ if ( bond_type != BOND_SINGLE &&
3215
+ bond_type != BOND_TAUTOM &&
3216
+ bond_type != BOND_ALT12NS &&
3217
+ bond_type != BOND_ALTERN || /* added 1-15-2002 */
3218
+ 2 != at[i2].valence ||
3219
+ 3 != (endpoint_valence2 = nGetEndpointInfo( at, i2, &eif2 ) ) ) {
3220
+ continue; /* not a nitrogen atom or a wrong valence or not a single bond */
3221
+ }
3222
+ nMobile2 = at[i2].num_H + (at[i2].charge == -1); /* number of mobile groups */
3223
+ #if( TAUT_IGNORE_EQL_ENDPOINTS == 1 )
3224
+ if ( at[i1].endpoint && at[i1].endpoint == at[i2].endpoint )
3225
+ continue; /* atoms already belong to the same t-group */
3226
+ #endif
3227
+ if ( !at[i1].endpoint && !at[i2].endpoint && 1!=nMobile + nMobile2 )
3228
+ continue;
3229
+
3230
+ ret = nGet12TautIn5MembAltRing( at, i1, j, nDfsPathPos,
3231
+ DfsPath, nMaxLenDfsPath,
3232
+ EndPoint, sizeof(EndPoint)/sizeof(EndPoint[0]),
3233
+ BondPos, sizeof(BondPos)/sizeof(BondPos[0]),
3234
+ &nNumEndPoints, &nNumBondPos
3235
+ , pBNS, pBD, num_atoms);
3236
+ if ( ret > 0 ) {
3237
+ if ( nNumEndPoints ) {
3238
+ num_changes = RegisterEndPoints( t_group_info, EndPoint, nNumEndPoints, at, num_atoms, c_group_info, pBNS);
3239
+ if ( num_changes == -1 ) {
3240
+ nErr = CT_TAUCOUNT_ERR;
3241
+ }
3242
+ if ( num_changes < 0 ) {
3243
+ nErr = num_changes;
3244
+ }
3245
+ if ( nErr )
3246
+ goto free_memory;
3247
+ tot_changes += (num_changes > 0);
3248
+ }
3249
+ if ( nNumBondPos ) {
3250
+ tot_changes += ( 0 < SetTautomericBonds( at, nNumBondPos, BondPos ) );
3251
+ }
3252
+ } else
3253
+ if ( IS_BNS_ERROR( ret ) ) {
3254
+ nErr = ret;
3255
+ goto free_memory;
3256
+ }
3257
+ }
3258
+ }
3259
+ }
3260
+ #endif /* TAUT_PYRAZOLE_RINGS */
3261
+ #if ( TAUT_TROPOLONE_7 == 1 || TAUT_TROPOLONE_5 == 1 ) /* { */
3262
+ /********************************************************
3263
+ * A B
3264
+ * | ||
3265
+ * 7-member rings (tropolones): look for M=Q--R--ZH,
3266
+ * ^ ^ ^ ^
3267
+ * endpoint1 i1 i2 endpoint2
3268
+ * where A-Q-R=B belong to a 7-member alt. (except Q-R bond) ring: ..=A-(Q-R)=B-..
3269
+ * Bond Q-R should be single or tautomeric or alternating
3270
+ * M=Q and R-ZH should be chain (non-ring) bonds
3271
+ * Same for 5-member rings
3272
+ */
3273
+ for ( i1 = 0; i1 < num_atoms; i1 ++ ) {
3274
+ if ( at[i1].nNumAtInRingSystem >=
3275
+ #if( TAUT_TROPOLONE_5 == 1 )
3276
+ 5
3277
+ #else
3278
+ 7
3279
+ #endif
3280
+ &&
3281
+ bIsCenterPointStrict( at, i1 ) &&
3282
+ #if( TAUT_RINGS_ATTACH_CHAIN == 1 )
3283
+ at[i1].bCutVertex &&
3284
+ #endif
3285
+ at[i1].valence == 3 && !at[i1].endpoint ) {
3286
+ int nMobile1, endpoint1, endpoint1_valence, bond_type1;
3287
+ int nMobile2, endpoint2, endpoint2_valence, bond_type2;
3288
+ for ( j = 0; j < at[i1].valence; j ++ ) {
3289
+ i2 = at[i1].neighbor[j];
3290
+ /*
3291
+ // may be important
3292
+ if ( i2 > i1 )
3293
+ continue; // do not try same pair 2 times
3294
+ */
3295
+ if ( at[i2].nRingSystem != at[i1].nRingSystem ||
3296
+ !bIsCenterPointStrict( at, i2 ) ||
3297
+ #if( TAUT_RINGS_ATTACH_CHAIN == 1 )
3298
+ !at[i2].bCutVertex ||
3299
+ #endif
3300
+ at[i2].valence != 3 || at[i2].endpoint )
3301
+ continue;
3302
+ bond_type = (at[i1].bond_type[j] & ~BOND_MARK_ALL);
3303
+ if ( bond_type != BOND_SINGLE &&
3304
+ bond_type != BOND_TAUTOM &&
3305
+ bond_type != BOND_ALT12NS &&
3306
+ bond_type != BOND_ALTERN ) {
3307
+ continue; /* not a single bond between Q-R */
3308
+ }
3309
+ /* find endpoints */
3310
+ for ( k = 0; k < at[i1].valence; k ++ ) {
3311
+ endpoint1 = at[i1].neighbor[k];
3312
+ if ( endpoint1 == i2 )
3313
+ continue; /* j == k */
3314
+ if ( !(endpoint1_valence = nGetEndpointInfo( at, endpoint1, &eif1 ) ) )
3315
+ continue; /* not an endpoint1 element or can't have mobile groups */
3316
+ #if( TAUT_RINGS_ATTACH_CHAIN == 1 )
3317
+ if ( at[endpoint1].nRingSystem == at[i1].nRingSystem )
3318
+ continue;
3319
+ #endif
3320
+ nMobile1 = at[endpoint1].num_H + (at[endpoint1].charge == -1); /* number of mobile groups */
3321
+ if ( nMobile1 + at[endpoint1].chem_bonds_valence != endpoint1_valence )
3322
+ continue; /* abnormal endpoint1 valence; ignore. */
3323
+ bond_type1 = (at[i1].bond_type[k] & ~BOND_MARK_ALL);
3324
+
3325
+ if ( bond_type1 != BOND_SINGLE &&
3326
+ bond_type1 != BOND_DOUBLE &&
3327
+ bond_type1 != BOND_TAUTOM &&
3328
+ bond_type1 != BOND_ALT12NS &&
3329
+ bond_type1 != BOND_ALTERN )
3330
+ continue;
3331
+
3332
+ for ( m = 0; m < at[i2].valence; m ++ ) {
3333
+ endpoint2 = at[i2].neighbor[m];
3334
+ if ( endpoint2 == i1 )
3335
+ continue;
3336
+ if ( !(endpoint2_valence = nGetEndpointInfo( at, endpoint2, &eif2 )) )
3337
+ continue; /* not an endpoint2 element or can't have mobile groups */
3338
+ #if( TAUT_RINGS_ATTACH_CHAIN == 1 )
3339
+ if ( at[endpoint2].nRingSystem == at[i2].nRingSystem )
3340
+ continue;
3341
+ #endif
3342
+ nMobile2 = at[endpoint2].num_H + (at[endpoint2].charge == -1); /* number of mobile groups */
3343
+ bond_type2 = (at[i2].bond_type[m] & ~BOND_MARK_ALL);
3344
+
3345
+ if ( bond_type2 != BOND_SINGLE &&
3346
+ bond_type2 != BOND_DOUBLE &&
3347
+ bond_type2 != BOND_TAUTOM &&
3348
+ bond_type2 != BOND_ALT12NS &&
3349
+ bond_type2 != BOND_ALTERN )
3350
+ continue;
3351
+
3352
+ /* final test for possible tautomerism */
3353
+ nMobile = 0;
3354
+
3355
+ if ( ALLOWED_EDGE(pBNS, i1, k) && ALLOWED_EDGE(pBNS, i2, m) ) {
3356
+
3357
+ /* can mobile group move from 1 to 2? */
3358
+ nMobile += (at[endpoint1].endpoint || nMobile1) && /* from endpoint1 */
3359
+ (bond_type1 != BOND_DOUBLE) &&
3360
+
3361
+ (at[endpoint2].endpoint || /* to endpoint2 */
3362
+ eif2.cNeutralBondsValence > at[endpoint2].valence ) &&
3363
+ (bond_type2 != BOND_SINGLE);
3364
+
3365
+
3366
+ /* can mobile group move from 2 to 1? */
3367
+ nMobile += (at[endpoint2].endpoint || nMobile2) && /* from endpoint2 */
3368
+ (bond_type2 != BOND_DOUBLE) && /*changed from BOND_SINGLE 2004-02-26 */
3369
+
3370
+ (at[endpoint1].endpoint || /* to endpoint1 */
3371
+ eif1.cNeutralBondsValence > at[endpoint1].valence ) &&
3372
+ (bond_type1 != BOND_SINGLE);
3373
+ }
3374
+ if ( !nMobile )
3375
+ continue;
3376
+
3377
+ if ( bond_type1 == bond_type2 &&
3378
+ (bond_type1 == BOND_SINGLE || bond_type1 == BOND_DOUBLE) )
3379
+ continue;
3380
+ /* -- old --
3381
+ if ( !at[endpoint1].endpoint && !at[endpoint2].endpoint && 1 != nMobile1 + nMobile2 )
3382
+ continue;
3383
+ */
3384
+ /* -- new --
3385
+
3386
+ if ( !at[endpoint1].endpoint && !at[endpoint2].endpoint ) {
3387
+ if ( !(bond_type1 == BOND_SINGLE || bond_type1 == BOND_DOUBLE) ||
3388
+ !(bond_type2 == BOND_SINGLE || bond_type2 == BOND_DOUBLE) ) {
3389
+ // at this point bond_type1 != bond_type2
3390
+ continue;
3391
+ }
3392
+ if ( bond_type1 == BOND_SINGLE && !nMobile1 ||
3393
+ bond_type2 == BOND_SINGLE && !nMobile2 ||
3394
+ 0 == nMobile1 + nMobile2 ) {
3395
+ continue;
3396
+ }
3397
+ }
3398
+ */
3399
+ #if ( TAUT_TROPOLONE_7 == 1 )
3400
+ if ( at[i1].nNumAtInRingSystem >= 7 ) {
3401
+ ret = nGet14TautIn7MembAltRing( at, i1, j, k, m, nDfsPathPos,
3402
+ DfsPath, nMaxLenDfsPath,
3403
+ EndPoint, sizeof(EndPoint)/sizeof(EndPoint[0]),
3404
+ BondPos, sizeof(BondPos)/sizeof(BondPos[0]),
3405
+ &nNumEndPoints, &nNumBondPos,
3406
+ pBNS, pBD, num_atoms);
3407
+ if ( ret > 0 ) {
3408
+ if ( nNumEndPoints ) {
3409
+ num_changes = RegisterEndPoints( t_group_info, EndPoint, nNumEndPoints, at, num_atoms, c_group_info, pBNS);
3410
+ if ( num_changes == -1 ) {
3411
+ nErr = CT_TAUCOUNT_ERR;
3412
+ }
3413
+ if ( num_changes < 0 ) {
3414
+ nErr = num_changes;
3415
+ }
3416
+ if ( nErr )
3417
+ goto free_memory;
3418
+ tot_changes += (num_changes > 0);
3419
+ }
3420
+ if ( nNumBondPos ) {
3421
+ tot_changes += ( 0 < SetTautomericBonds( at, nNumBondPos, BondPos ) );
3422
+ }
3423
+ } else
3424
+ if ( IS_BNS_ERROR( ret ) ) {
3425
+ nErr = ret;
3426
+ goto free_memory;
3427
+ }
3428
+ }
3429
+ #endif
3430
+
3431
+ #if ( TAUT_TROPOLONE_5 == 1 )
3432
+ if ( at[i1].nNumAtInRingSystem >= 5 ) {
3433
+ ret = nGet14TautIn5MembAltRing( at, i1, j, k, m, nDfsPathPos,
3434
+ DfsPath, nMaxLenDfsPath,
3435
+ EndPoint, sizeof(EndPoint)/sizeof(EndPoint[0]),
3436
+ BondPos, sizeof(BondPos)/sizeof(BondPos[0]),
3437
+ &nNumEndPoints, &nNumBondPos,
3438
+ pBNS, pBD, num_atoms);
3439
+ if ( ret > 0 ) {
3440
+ if ( nNumEndPoints ) {
3441
+ num_changes = RegisterEndPoints( t_group_info, EndPoint, nNumEndPoints, at, num_atoms, c_group_info, pBNS);
3442
+ if ( num_changes == -1 ) {
3443
+ nErr = CT_TAUCOUNT_ERR;
3444
+ }
3445
+ if ( num_changes < 0 ) {
3446
+ nErr = num_changes;
3447
+ }
3448
+ if ( nErr )
3449
+ goto free_memory;
3450
+ tot_changes += (num_changes > 0);
3451
+ }
3452
+ if ( nNumBondPos ) {
3453
+ tot_changes += ( 0 < SetTautomericBonds( at, nNumBondPos, BondPos ) );
3454
+ }
3455
+ } else
3456
+ if ( IS_BNS_ERROR( ret ) ) {
3457
+ nErr = ret;
3458
+ goto free_memory;
3459
+ }
3460
+ }
3461
+ #endif
3462
+ }
3463
+ }
3464
+ }
3465
+ }
3466
+ }
3467
+ #endif /* } TAUT_TROPOLONE */
3468
+ free_memory:
3469
+ if ( nDfsPathPos ) {
3470
+ inchi_free( nDfsPathPos );
3471
+ }
3472
+ #undef MAX_ALT_PATH_LEN
3473
+ }
3474
+ #endif /* } FIND_RING_SYSTEMS */
3475
+ exit_function:
3476
+ return nErr < 0? nErr : tot_changes;
3477
+ }
3478
+
3479
+ /******************************************************************************/
3480
+ int free_t_group_info( T_GROUP_INFO *t_group_info )
3481
+ {
3482
+ if ( t_group_info ) {
3483
+ if ( t_group_info->t_group ) {
3484
+ inchi_free( t_group_info->t_group );
3485
+ }
3486
+ if ( t_group_info->nEndpointAtomNumber ) {
3487
+ inchi_free( t_group_info->nEndpointAtomNumber );
3488
+ }
3489
+ if ( t_group_info->tGroupNumber ) {
3490
+ inchi_free( t_group_info->tGroupNumber );
3491
+ }
3492
+ if ( t_group_info->nIsotopicEndpointAtomNumber ) {
3493
+ inchi_free( t_group_info->nIsotopicEndpointAtomNumber );
3494
+ }
3495
+ memset( t_group_info, 0, sizeof(*t_group_info));
3496
+ }
3497
+ return 0;
3498
+ }
3499
+
3500
+ /*******************************************************************************/
3501
+ /**/
3502
+ int make_a_copy_of_t_group_info( T_GROUP_INFO *t_group_info, T_GROUP_INFO *t_group_info_orig )
3503
+ {
3504
+ int err = 0, len;
3505
+ free_t_group_info( t_group_info );
3506
+ if ( t_group_info_orig && t_group_info ) {
3507
+ if ( (len=t_group_info_orig->max_num_t_groups) > 0 ) {
3508
+ if (t_group_info->t_group =
3509
+ (T_GROUP*)inchi_malloc( len * sizeof(t_group_info->t_group[0]))) {
3510
+ memcpy(t_group_info->t_group,
3511
+ t_group_info_orig->t_group,
3512
+ len * sizeof(t_group_info->t_group[0]));
3513
+ } else {
3514
+ err ++;
3515
+ }
3516
+ }
3517
+ if ( (len = t_group_info_orig->nNumEndpoints) > 0 ) {
3518
+ if (t_group_info->nEndpointAtomNumber =
3519
+ (AT_NUMB*)inchi_malloc( len * sizeof(t_group_info->nEndpointAtomNumber[0]))) {
3520
+ memcpy(t_group_info->nEndpointAtomNumber,
3521
+ t_group_info_orig->nEndpointAtomNumber,
3522
+ len * sizeof(t_group_info->nEndpointAtomNumber[0]));
3523
+ } else {
3524
+ err ++;
3525
+ }
3526
+ }
3527
+ if ( (len = t_group_info_orig->num_t_groups) > 0 ) {
3528
+ if (t_group_info->tGroupNumber =
3529
+ (AT_NUMB*)inchi_malloc( len * TGSO_TOTAL_LEN * sizeof(t_group_info->tGroupNumber[0]))) {
3530
+ memcpy(t_group_info->tGroupNumber,
3531
+ t_group_info_orig->tGroupNumber,
3532
+ len * TGSO_TOTAL_LEN * sizeof(t_group_info->tGroupNumber[0]));
3533
+ } else {
3534
+ err ++;
3535
+ }
3536
+ }
3537
+ if ( (len = t_group_info_orig->nNumIsotopicEndpoints) > 0 ) {
3538
+ if (t_group_info->nIsotopicEndpointAtomNumber =
3539
+ (AT_NUMB*)inchi_malloc( len * sizeof(t_group_info->nIsotopicEndpointAtomNumber[0]))) {
3540
+ memcpy(t_group_info->nIsotopicEndpointAtomNumber,
3541
+ t_group_info_orig->nIsotopicEndpointAtomNumber,
3542
+ len * sizeof(t_group_info->nIsotopicEndpointAtomNumber[0]));
3543
+ } else {
3544
+ err ++;
3545
+ }
3546
+ }
3547
+ if ( !err ) {
3548
+ t_group_info->nNumEndpoints = t_group_info_orig->nNumEndpoints;
3549
+ t_group_info->num_t_groups = t_group_info_orig->num_t_groups;
3550
+ t_group_info->max_num_t_groups = t_group_info_orig->max_num_t_groups;
3551
+ t_group_info->bIgnoreIsotopic = t_group_info_orig->bIgnoreIsotopic;
3552
+ t_group_info->nNumIsotopicEndpoints = t_group_info_orig->nNumIsotopicEndpoints;
3553
+ t_group_info->tni = t_group_info_orig->tni;
3554
+ /*
3555
+ t_group_info->nNumRemovedExplicitH = t_group_info_orig->nNumRemovedExplicitH;
3556
+ t_group_info->nNumRemovedProtons = t_group_info_orig->nNumRemovedProtons;
3557
+ t_group_info->bNormalizationFlags = t_group_info_orig->bNormalizationFlags;
3558
+ */
3559
+ /*
3560
+ t_group_info->bHardAddedRemovedProtons = t_group_info_orig->bHardAddedRemovedProtons;
3561
+ t_group_info->bSimpleAddedRemovedProtons = t_group_info_orig->bSimpleAddedRemovedProtons;
3562
+ t_group_info->nNumCanceledCharges = t_group_info_orig->nNumCanceledCharges;
3563
+ */
3564
+ }
3565
+ t_group_info->bTautFlags = t_group_info_orig->bTautFlags;
3566
+ t_group_info->bTautFlagsDone = t_group_info_orig->bTautFlagsDone;
3567
+ }
3568
+ return err;
3569
+ }
3570
+ /*******************************************************************************/
3571
+ /* set tautomer group isotopic sort keys */
3572
+ int set_tautomer_iso_sort_keys( T_GROUP_INFO *t_group_info )
3573
+ {
3574
+ T_GROUP *t_group;
3575
+ T_GROUP_ISOWT Mult = 1;
3576
+ int i, j, num_t_groups, num_iso_t_groups = 0;
3577
+ if ( !t_group_info || !(t_group = t_group_info->t_group) ||
3578
+ 0 >= (num_t_groups = t_group_info->num_t_groups) || t_group_info->nNumIsotopicEndpoints )
3579
+ return 0;
3580
+ for ( i = 0; i < num_t_groups; i ++ ) {
3581
+ t_group[i].iWeight = 0;
3582
+ j = T_NUM_ISOTOPIC - 1;
3583
+ Mult = 1;
3584
+ do {
3585
+ t_group[i].iWeight += Mult * (T_GROUP_ISOWT)t_group[i].num[T_NUM_NO_ISOTOPIC+j];
3586
+ } while ( --j >= 0 && (Mult *= T_GROUP_ISOWT_MULT) );
3587
+ num_iso_t_groups += (t_group[i].iWeight != 0);
3588
+ }
3589
+ return num_iso_t_groups;
3590
+ }
3591
+
3592
+ /******************************************************************************
3593
+ *
3594
+ * Fill t_group_info with information necessary to fill out tautomer part
3595
+ * of the linear connection table record.
3596
+ * Note: on input, t_group_info should contain information created by MarkTautomerGroups()
3597
+ * No previous t_group_info adjustment due to throwing out disconnected parts of
3598
+ * the chemical structure is needed.
3599
+ *
3600
+ * Note2: throw out t_groups containing negative charges only (IGNORE_TGROUP_WITHOUT_H==1)
3601
+ * (leave their tautomeric bonds unchanged)
3602
+ * Note3: remove negative charges from other tautomeric groups
3603
+ * and adjust counts of mobile atoms if permitted (REMOVE_TGROUP_CHARGE==1)
3604
+ */
3605
+ int CountTautomerGroups( sp_ATOM *at, int num_atoms, T_GROUP_INFO *t_group_info )
3606
+ {
3607
+ int i, j, ret = 0, nNumEndpoints, max_t_group, num_groups_noH;
3608
+
3609
+ AT_NUMB nGroupNumber, nNewGroupNumber, *nCurrEndpointAtNoPos = NULL;
3610
+
3611
+ T_GROUP *t_group;
3612
+ int num_t;
3613
+ /* int bIgnoreIsotopic, max_num_t; */
3614
+ AT_NUMB *nTautomerGroupNumber = NULL;
3615
+ AT_NUMB *nEndpointAtomNumber = NULL;
3616
+ AT_NUMB *tGroupNumber = NULL;
3617
+
3618
+ if ( !t_group_info || !t_group_info->t_group || 0 >= t_group_info->max_num_t_groups ) {
3619
+ return 0; /* empty t-groups */
3620
+ }
3621
+ num_t = t_group_info->num_t_groups;
3622
+ t_group = t_group_info->t_group;
3623
+ /*
3624
+ max_num_t = t_group_info->max_num_t_groups;
3625
+ bIgnoreIsotopic = t_group_info->bIgnoreIsotopic;
3626
+ */
3627
+ num_groups_noH = 0;
3628
+
3629
+ /* the following 2 arrays are to be rebuilt here */
3630
+ if ( t_group_info->nEndpointAtomNumber ) {
3631
+ inchi_free ( t_group_info->nEndpointAtomNumber );
3632
+ t_group_info->nEndpointAtomNumber = NULL;
3633
+ }
3634
+ if ( t_group_info->tGroupNumber ) {
3635
+ inchi_free ( t_group_info->tGroupNumber );
3636
+ t_group_info->tGroupNumber = NULL;
3637
+ }
3638
+ /* find max_t_group */
3639
+ for ( i = 0, max_t_group = 0; i < t_group_info->num_t_groups; i ++ ) {
3640
+ if ( max_t_group < t_group[i].nGroupNumber )
3641
+ max_t_group = t_group[i].nGroupNumber;
3642
+ }
3643
+ /* allocate memory for temp storage of numbers of endpoints */
3644
+ if ( max_t_group &&
3645
+ !(nTautomerGroupNumber = (AT_NUMB*) inchi_calloc( max_t_group+1, sizeof(nTautomerGroupNumber[0]) ) /*temp*/ ) ) {
3646
+ goto err_exit_function; /* program error: out of RAM */ /* <BRKPT> */
3647
+ }
3648
+
3649
+ /* count endpoints for each tautomer group */
3650
+ for ( i = 0, nNumEndpoints = 0; i < num_atoms; i ++ ) {
3651
+ if ( (j = at[i].endpoint) == 0 )
3652
+ continue;
3653
+ if ( j > max_t_group ) /* debug only */
3654
+ goto err_exit_function; /* program error */ /* <BRKPT> */
3655
+ nTautomerGroupNumber[j] ++;
3656
+ nNumEndpoints ++;
3657
+ }
3658
+
3659
+ if ( !nNumEndpoints ) {
3660
+ goto exit_function; /* not a tautomer */
3661
+ }
3662
+
3663
+ /* allocate temporary array */
3664
+ if ( !(nEndpointAtomNumber = (AT_NUMB*) inchi_calloc( nNumEndpoints, sizeof(nEndpointAtomNumber[0]) ) ) ||
3665
+ !(nCurrEndpointAtNoPos = (AT_NUMB*) inchi_calloc( num_t, sizeof(nCurrEndpointAtNoPos[0]) ) /*temp*/ ) ) {
3666
+ goto err_exit_function; /* program error: out of RAM */ /* <BRKPT> */
3667
+ }
3668
+ /*
3669
+ * Remove missing endpoints from t_group. Since only one
3670
+ * disconnected part is processed, some endpoints groups may have disappeared.
3671
+ * Mark t_groups containing charges only for subsequent removal
3672
+ */
3673
+ for ( i = 0, nNewGroupNumber = 0; i < num_t; /*i ++*/ ) {
3674
+ int bNoH = 0, nNumH;
3675
+ nGroupNumber = t_group[i].nGroupNumber;
3676
+ for ( j = 1, nNumH = t_group[i].num[0]; j < T_NUM_NO_ISOTOPIC; j ++ ) {
3677
+ nNumH -= (int)t_group[i].num[j];
3678
+ }
3679
+ if ( t_group[i].nNumEndpoints != nTautomerGroupNumber[(int)nGroupNumber]
3680
+ #if( IGNORE_TGROUP_WITHOUT_H == 1 )
3681
+ || (bNoH = (t_group[i].num[0]==t_group[i].num[1])) /* only for (H,-) t-groups; (+) t-groups are not removed */
3682
+ #endif
3683
+ ) {
3684
+ if ( !nTautomerGroupNumber[(int)nGroupNumber] || bNoH ) {
3685
+ /* the group belongs to another disconnected part of the structure or has only charges */
3686
+ /* Remove the group */
3687
+ num_t --;
3688
+ if ( i < num_t )
3689
+ memmove( t_group+i, t_group+i+1, (num_t-i)*sizeof(t_group[0]) );
3690
+ if ( bNoH ) {
3691
+ /* group contains no mobile hydrogen atoms, only charges. Prepare to remove it. */
3692
+ nTautomerGroupNumber[(int)nGroupNumber] = 0;
3693
+ num_groups_noH ++;
3694
+ }
3695
+ /*i --;*/
3696
+ } else {
3697
+ /* different number of endpoints */
3698
+ goto err_exit_function; /* program error */ /* <BRKPT> */
3699
+ }
3700
+ } else {
3701
+ /* renumber t_group and prepare to renumber at[i].endpoint */
3702
+ nTautomerGroupNumber[(int)nGroupNumber] =
3703
+ t_group[i].nGroupNumber = ++nNewGroupNumber; /* = i+1 */
3704
+ /* get first group atom orig. number position in the nEndpointAtomNumber[] */
3705
+ /* and in the tautomer endpoint canon numbers part of the connection table */
3706
+ t_group[i].nFirstEndpointAtNoPos = nCurrEndpointAtNoPos[i] =
3707
+ i? (t_group[i-1].nFirstEndpointAtNoPos+t_group[i-1].nNumEndpoints) : 0;
3708
+ t_group[i].num[0] = nNumH;
3709
+ #if( REMOVE_TGROUP_CHARGE == 1 )
3710
+ t_group[i].num[1] = 0; /* remove only (-) charges */
3711
+ #endif
3712
+ /* -- wrong condition. Disabled.
3713
+ if ( t_group[i].nGroupNumber != i + 1 ) { // for debug only
3714
+ goto err_exit_function; // program error
3715
+ }
3716
+ */
3717
+ i ++;
3718
+ }
3719
+ }
3720
+ if ( num_t != nNewGroupNumber ) { /* for debug only */
3721
+ goto err_exit_function; /* program error */ /* <BRKPT> */
3722
+ }
3723
+
3724
+ /* check if any tautomer group was left */
3725
+ if ( !nNewGroupNumber ) {
3726
+ if ( !num_groups_noH )
3727
+ goto err_exit_function; /* program error: not a tautomer */ /* <BRKPT> */
3728
+ else
3729
+ goto exit_function;
3730
+ }
3731
+ /*
3732
+ * an array for tautomer group sorting later, at the time of storing Connection Table
3733
+ * Later the sorting consists out of 2 steps:
3734
+ * 1) Sort t_group[i].nNumEndpoints endpoint atom ranks within each endpoint group
3735
+ * starting from t_group[i].nFirstEndpointAtNoPos; i = 0..t_group_info->num_t_groups-1
3736
+ * 2) Sort the groups indexes t_group_info->tGroupNumber[]
3737
+ */
3738
+ if ( !(tGroupNumber=
3739
+ (AT_NUMB*)inchi_calloc(nNewGroupNumber*TGSO_TOTAL_LEN, sizeof(tGroupNumber[0])))) {
3740
+ goto err_exit_function; /* out of RAM */
3741
+ }
3742
+ for ( i = 0; i < nNewGroupNumber; i ++ ) {
3743
+ tGroupNumber[i] = (AT_NUMB)i; /* initialization: original t_group number = (at[i]->endpoint-1) */
3744
+ }
3745
+ /*
3746
+ * renumber endpoint atoms and save their orig. atom
3747
+ * numbers for filling out the tautomer part of the LinearCT.
3748
+ * nCurrEndpointAtNoPos[j] is an index of the atom number in the nEndpointAtomNumber[]
3749
+ */
3750
+ for ( i = 0; i < num_atoms; i ++ ) {
3751
+ if ( j = (int)at[i].endpoint ) {
3752
+ j = (int)(at[i].endpoint = nTautomerGroupNumber[j])-1; /* new t_group number */
3753
+ if ( j >= 0 ) { /* j=-1 in case of no mobile hydrogen atoms (charges only), group being removed */
3754
+ if ( nCurrEndpointAtNoPos[j] >= /* debug only */
3755
+ t_group[j].nFirstEndpointAtNoPos+t_group[j].nNumEndpoints ) {
3756
+ goto err_exit_function; /* program error */ /* <BRKPT> */
3757
+ }
3758
+ nEndpointAtomNumber[(int)nCurrEndpointAtNoPos[j] ++] = (AT_NUMB)i;
3759
+ } else {
3760
+ nNumEndpoints --; /* endpoint has been removed */
3761
+ }
3762
+ }
3763
+ }
3764
+ t_group_info->num_t_groups = nNewGroupNumber;
3765
+ t_group_info->nNumEndpoints = nNumEndpoints;
3766
+ t_group_info->nEndpointAtomNumber = nEndpointAtomNumber;
3767
+ t_group_info->tGroupNumber = tGroupNumber; /* only the 1st segment filled */
3768
+ inchi_free ( nTautomerGroupNumber );
3769
+ inchi_free ( nCurrEndpointAtNoPos );
3770
+ return nNumEndpoints + T_GROUP_HDR_LEN * nNewGroupNumber + 1; /* nLenLinearCTTautomer */
3771
+
3772
+ err_exit_function:
3773
+ ret = CT_TAUCOUNT_ERR;
3774
+ exit_function:
3775
+ /* release allocated memory; set "no tautomeric group" */
3776
+ if ( nEndpointAtomNumber )
3777
+ inchi_free ( nEndpointAtomNumber );
3778
+ if ( nTautomerGroupNumber )
3779
+ inchi_free ( nTautomerGroupNumber );
3780
+ if ( tGroupNumber )
3781
+ inchi_free ( tGroupNumber );
3782
+ if ( nCurrEndpointAtNoPos )
3783
+ inchi_free ( nCurrEndpointAtNoPos );
3784
+ t_group_info->nNumEndpoints = 0;
3785
+ t_group_info->num_t_groups = 0;
3786
+ if ( !ret && ((t_group_info->tni.bNormalizationFlags & FLAG_NORM_CONSIDER_TAUT) ||
3787
+ t_group_info->nNumIsotopicEndpoints>1 && (t_group_info->bTautFlagsDone & (TG_FLAG_FOUND_ISOTOPIC_H_DONE | TG_FLAG_FOUND_ISOTOPIC_ATOM_DONE))) ) {
3788
+ ret = 1; /* only protons have been (re)moved or neitralization happened */
3789
+ }
3790
+ return ret;
3791
+ }
3792
+ /**************************************************************
3793
+ * tautomers: Compare for sorting
3794
+ ******************************************************************/
3795
+ /* Compare for sorting Ranks only */
3796
+ /* Globals: pn_tRankForSort */
3797
+ int CompRankTautomer(const void* a1, const void* a2 )
3798
+ {
3799
+ int ret = (int)pn_tRankForSort[(int)(*(const AT_RANK*)a1)] -
3800
+ (int)pn_tRankForSort[(int)(*(const AT_RANK*)a2)];
3801
+ return ret;
3802
+ }
3803
+ /*********************************************************************/
3804
+ int SortTautomerGroupsAndEndpoints( T_GROUP_INFO *t_group_info, int num_atoms, int num_at_tg, AT_RANK *nRank )
3805
+ {
3806
+ int i, nFirstEndpointAtNoPos, nNumEndpoints;
3807
+ AT_NUMB *nEndpointAtomNumber;
3808
+ int num_t_groups = num_at_tg - num_atoms;
3809
+ T_GROUP *t_group = NULL;
3810
+ /* check if sorting is required */
3811
+
3812
+ if ( num_t_groups <= 0 || t_group_info->nNumEndpoints < 2 ) {
3813
+ return 0; /* no tautomer data */
3814
+ }
3815
+ t_group = t_group_info->t_group;
3816
+ /* sort endpoints within the groups */
3817
+ for ( i = 0; i < num_t_groups; i ++ ) {
3818
+ if ( t_group[i].nNumEndpoints < 2 )
3819
+ continue; /* program error; should not happen */ /* <BRKPT> */
3820
+ /* set globals for sorting */
3821
+ nFirstEndpointAtNoPos = t_group[i].nFirstEndpointAtNoPos;
3822
+ nNumEndpoints = t_group[i].nNumEndpoints;
3823
+ if ( nNumEndpoints + nFirstEndpointAtNoPos > t_group_info->nNumEndpoints ) { /* for debug only */
3824
+ return CT_TAUCOUNT_ERR; /* program error */ /* <BRKPT> */
3825
+ }
3826
+ nEndpointAtomNumber = t_group_info->nEndpointAtomNumber+(int)nFirstEndpointAtNoPos;
3827
+ pn_tRankForSort = nRank;
3828
+ insertions_sort( nEndpointAtomNumber, nNumEndpoints, sizeof(nEndpointAtomNumber[0]), CompRankTautomer);
3829
+ }
3830
+ /* sort the tautomeric groups according to their ranks only
3831
+ (that is, ignoring the isotopic composition of the mobile groups and ranks of the endpoints) */
3832
+ if ( t_group_info->num_t_groups > 1 ) {
3833
+ /* set globals for sorting */
3834
+ /* a hack: the ranks of all tautomeric groups are */
3835
+ /* located at nRank[num_atoms..num_at_tg-1] */
3836
+ pn_tRankForSort = nRank+num_atoms;
3837
+ /* sort */
3838
+ /* ordering numbers to sort : t_group_info->tGroupNumber; */
3839
+ insertions_sort( t_group_info->tGroupNumber, num_t_groups,
3840
+ sizeof(t_group_info->tGroupNumber[0]), CompRankTautomer);
3841
+ }
3842
+ return t_group_info->num_t_groups;
3843
+ }