rino 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (104) hide show
  1. data/Rakefile +1 -1
  2. data/ext/extconf.rb +1 -24
  3. data/ext/libinchi.so +0 -0
  4. data/ext/src/aux2atom.h +120 -39
  5. data/ext/src/comdef.h +3 -3
  6. data/ext/src/dispstru.c +2547 -0
  7. data/ext/src/dispstru.h +73 -0
  8. data/ext/src/extr_ct.h +5 -2
  9. data/ext/src/ichi.h +27 -11
  10. data/ext/src/ichi_bns.c +1800 -254
  11. data/ext/src/ichi_bns.h +205 -4
  12. data/ext/src/ichican2.c +197 -86
  13. data/ext/src/ichicano.c +8 -13
  14. data/ext/src/ichicano.h +2 -2
  15. data/ext/src/ichicans.c +11 -6
  16. data/ext/src/ichicant.h +2 -2
  17. data/ext/src/ichicomn.h +2 -2
  18. data/ext/src/ichicomp.h +19 -4
  19. data/ext/src/ichidrp.h +9 -5
  20. data/ext/src/ichierr.h +5 -3
  21. data/ext/src/ichiisot.c +2 -2
  22. data/ext/src/ichimain.c +461 -0
  23. data/ext/src/ichimain.h +23 -15
  24. data/ext/src/ichimak2.c +6 -6
  25. data/ext/src/ichimake.c +843 -42
  26. data/ext/src/ichimake.h +4 -2
  27. data/ext/src/ichimap1.c +5 -5
  28. data/ext/src/ichimap2.c +2 -2
  29. data/ext/src/ichimap4.c +34 -21
  30. data/ext/src/ichinorm.c +11 -5
  31. data/ext/src/ichinorm.h +3 -2
  32. data/ext/src/ichiparm.c +2 -2
  33. data/ext/src/ichiparm.h +232 -30
  34. data/ext/src/ichiprt1.c +35 -11
  35. data/ext/src/ichiprt2.c +78 -7
  36. data/ext/src/ichiprt3.c +300 -120
  37. data/ext/src/ichiqueu.c +17 -2
  38. data/ext/src/ichiread.c +6932 -0
  39. data/ext/src/ichiring.c +3 -2
  40. data/ext/src/ichiring.h +2 -2
  41. data/ext/src/ichirvr1.c +4891 -0
  42. data/ext/src/ichirvr2.c +6344 -0
  43. data/ext/src/ichirvr3.c +5499 -0
  44. data/ext/src/ichirvr4.c +3177 -0
  45. data/ext/src/ichirvr5.c +1166 -0
  46. data/ext/src/ichirvr6.c +1287 -0
  47. data/ext/src/ichirvr7.c +2319 -0
  48. data/ext/src/ichirvrs.h +882 -0
  49. data/ext/src/ichisize.h +2 -2
  50. data/ext/src/ichisort.c +5 -5
  51. data/ext/src/ichister.c +281 -86
  52. data/ext/src/ichister.h +9 -3
  53. data/ext/src/ichitaut.c +208 -9
  54. data/ext/src/ichitaut.h +13 -11
  55. data/ext/src/ichitime.h +16 -2
  56. data/ext/src/inchicmp.h +107 -0
  57. data/ext/src/inpdef.h +6 -3
  58. data/ext/src/libinchi_wrap.c +912 -0
  59. data/ext/src/lreadmol.h +34 -31
  60. data/ext/src/mode.h +244 -7
  61. data/ext/src/mol2atom.c +1060 -0
  62. data/ext/src/mol2atom.h +31 -0
  63. data/ext/src/readinch.c +239 -0
  64. data/ext/src/readmol.c +28 -0
  65. data/ext/src/{e_readmol.h → readmol.h} +7 -9
  66. data/ext/src/runichi.c +251 -177
  67. data/ext/src/strutil.c +444 -238
  68. data/ext/src/strutil.h +150 -11
  69. data/ext/src/util.c +176 -118
  70. data/ext/src/util.h +15 -3
  71. data/lib/rino.rb +71 -3
  72. data/test/test.rb +33 -4
  73. metadata +22 -34
  74. data/ext/ruby_inchi_main.so +0 -0
  75. data/ext/src/e_0dstereo.c +0 -3014
  76. data/ext/src/e_0dstereo.h +0 -31
  77. data/ext/src/e_comdef.h +0 -57
  78. data/ext/src/e_ctl_data.h +0 -147
  79. data/ext/src/e_ichi_io.c +0 -498
  80. data/ext/src/e_ichi_io.h +0 -40
  81. data/ext/src/e_ichi_parms.c +0 -37
  82. data/ext/src/e_ichi_parms.h +0 -41
  83. data/ext/src/e_ichicomp.h +0 -50
  84. data/ext/src/e_ichierr.h +0 -40
  85. data/ext/src/e_ichimain.c +0 -593
  86. data/ext/src/e_ichisize.h +0 -43
  87. data/ext/src/e_inchi_atom.c +0 -75
  88. data/ext/src/e_inchi_atom.h +0 -33
  89. data/ext/src/e_inpdef.h +0 -41
  90. data/ext/src/e_mode.h +0 -706
  91. data/ext/src/e_mol2atom.c +0 -649
  92. data/ext/src/e_readinch.c +0 -58
  93. data/ext/src/e_readmol.c +0 -54
  94. data/ext/src/e_readstru.c +0 -251
  95. data/ext/src/e_readstru.h +0 -33
  96. data/ext/src/e_util.c +0 -284
  97. data/ext/src/e_util.h +0 -61
  98. data/ext/src/ichilnct.c +0 -286
  99. data/ext/src/inchi_api.h +0 -670
  100. data/ext/src/inchi_dll.c +0 -1480
  101. data/ext/src/inchi_dll.h +0 -34
  102. data/ext/src/inchi_dll_main.c +0 -23
  103. data/ext/src/inchi_dll_main.h +0 -31
  104. data/ext/src/ruby_inchi_main.c +0 -558
@@ -2,8 +2,8 @@
2
2
  * International Union of Pure and Applied Chemistry (IUPAC)
3
3
  * International Chemical Identifier (InChI)
4
4
  * Version 1
5
- * Software version 1.00
6
- * April 13, 2005
5
+ * Software version 1.01
6
+ * July 21, 2006
7
7
  * Developed at NIST
8
8
  */
9
9
 
@@ -27,8 +27,7 @@
27
27
 
28
28
  /* local prototypes */
29
29
  int cmp_components( const void *a1, const void *a2 );
30
- int mark_one_struct_component( inp_ATOM* at, int j, AT_NUMB *mark, AT_NUMB num_disconnected_components );
31
- int Free_INChI_Stereo( INChI_Stereo *pINChI_Stereo );
30
+ /*int mark_one_struct_component( inp_ATOM* at, int j, AT_NUMB *mark, AT_NUMB num_disconnected_components );*/
32
31
  INChI_Stereo *Alloc_INChI_Stereo(int num_at, int num_bonds);
33
32
  int RemoveInpAtBond( inp_ATOM *at, int iat, int k );
34
33
  int DisconnectInpAtBond( inp_ATOM *at, AT_NUMB *nOldCompNumber, int iat, int neigh_ord );
@@ -37,7 +36,7 @@ int DisconnectOneLigand( inp_ATOM *at, AT_NUMB *nOldCompNumber, S_CHAR *bMetal,
37
36
  int num_halogens, int num_atoms, int iMetal, int jLigand, INCHI_MODE *bTautFlagsDone );
38
37
  int bIsAmmoniumSalt( inp_ATOM *at, int i, int *piO, int *pk, S_CHAR *num_explicit_H );
39
38
  int DisconnectAmmoniumSalt ( inp_ATOM *at, int i, int iO, int k, S_CHAR *num_explicit_H );
40
- int bIsMetalSalt( inp_ATOM *at, int i );
39
+ /*int bIsMetalSalt( inp_ATOM *at, int i ); - moved to strutil,h */
41
40
  int DisconnectMetalSalt( inp_ATOM *at, int i );
42
41
  int bIsMetalToDisconnect(inp_ATOM *at, int i, int bCheckMetalValence);
43
42
 
@@ -93,7 +92,7 @@ int the_only_doublet_neigh(inp_ATOM *at, int i1, int *ineigh1, int *ineigh2)
93
92
  }
94
93
 
95
94
  /************************************************************************/
96
- int fix_odd_things( int num_atoms, inp_ATOM *at )
95
+ int fix_odd_things( int num_atoms, inp_ATOM *at, int bFixBug )
97
96
  { /* 0 1 2 3 4 5 6 7 8 9 */
98
97
  static const char el[] = "N;P;As;Sb;O;S;Se;Te;"; /* 8 elements + C, Si */
99
98
  static U_CHAR en[10]; /* same number: 8 elements */
@@ -196,7 +195,7 @@ int fix_odd_things( int num_atoms, inp_ATOM *at )
196
195
  }
197
196
  } else
198
197
  if ( at[i1].bond_type[ineigh] == BOND_TYPE_DOUBLE ) {
199
- /* found a candidate for Y; bond must bedouble */
198
+ /* found a candidate for Y; bond must be double */
200
199
  i1_c = ineigh;
201
200
  c = neigh;
202
201
  }
@@ -322,6 +321,13 @@ int fix_odd_things( int num_atoms, inp_ATOM *at )
322
321
  break;
323
322
  }
324
323
  }
324
+ #if( FIX_ODD_THINGS_REM_Plus_BUG == 1 )
325
+ at[c].charge -= charge;
326
+ #else
327
+ if ( bFixBug ) {
328
+ at[c].charge -= charge;
329
+ }
330
+ #endif
325
331
  break;
326
332
  }
327
333
  }
@@ -454,7 +460,11 @@ int remove_ion_pairs( int num_atoms, inp_ATOM *at )
454
460
  int num_changes = 0;
455
461
 
456
462
  /* 0 1 2 3 4 5 6 7 8 9 8 9 */
463
+ #if( FIX_REM_ION_PAIRS_Si_BUG == 1 )
464
+ static const char el[] = "N;P;As;Sb;O;S;Se;Te;C;Si;"; /* 8 elements + C, Si */
465
+ #else
457
466
  static const char el[] = "N;P;As;Sb;O;S;Se;Te;C;Si"; /* 8 elements + C, Si */
467
+ #endif
458
468
  static char en[12]; /* same number: 8 elements */
459
469
  static int ne=0; /* will be 8 and 10 */
460
470
 
@@ -1267,7 +1277,7 @@ int RemoveInpAtBond( inp_ATOM *atom, int iat, int k )
1267
1277
  {
1268
1278
  int i, j, m, m2, k2;
1269
1279
  inp_ATOM *at = atom + iat;
1270
- inp_ATOM *at2;
1280
+ inp_ATOM *at2 = NULL;
1271
1281
  int val = at->valence - 1;
1272
1282
  if ( val >= 0 ) {
1273
1283
  int bond = at->bond_type[k];
@@ -2550,6 +2560,85 @@ int get_iat_number( int el_number, const int el_num[], int el_num_len )
2550
2560
  IAT_MAX
2551
2561
  } ION_ATOM_TYPE;
2552
2562
 
2563
+ #if( READ_INCHI_STRING == 1 )
2564
+ /****************************************************************************************/
2565
+ int bHeteroAtomMayHaveXchgIsoH( inp_ATOM *atom, int iat )
2566
+ {
2567
+ inp_ATOM *at = atom + iat, *at2;
2568
+ static int el_num[IAT_MAX];
2569
+ int j, val, is_O=0, is_Cl=0, is_N=0, is_H=0, num_H, iat_numb, bAccept, cur_num_iso_H;
2570
+
2571
+ if ( !el_num[IAT_H]) {
2572
+ el_num[IAT_H ] = get_periodic_table_number( "H" );
2573
+ el_num[IAT_C ] = get_periodic_table_number( "C" );
2574
+ el_num[IAT_N ] = get_periodic_table_number( "N" );
2575
+ el_num[IAT_P ] = get_periodic_table_number( "P" );
2576
+ el_num[IAT_O ] = get_periodic_table_number( "O" );
2577
+ el_num[IAT_S ] = get_periodic_table_number( "S" );
2578
+ el_num[IAT_Se] = get_periodic_table_number( "Se");
2579
+ el_num[IAT_Te] = get_periodic_table_number( "Te");
2580
+ el_num[IAT_F ] = get_periodic_table_number( "F" );
2581
+ el_num[IAT_Cl] = get_periodic_table_number( "Cl");
2582
+ el_num[IAT_Br] = get_periodic_table_number( "Br");
2583
+ el_num[IAT_I ] = get_periodic_table_number( "I" );
2584
+ }
2585
+ if ( 0 > (iat_numb = get_iat_number( at->el_number, el_num, IAT_MAX )) ) {
2586
+ return 0;
2587
+ }
2588
+ if ( abs(at->charge) > 1 || at->radical && RADICAL_SINGLET != at->radical ) {
2589
+ return 0;
2590
+ }
2591
+ val = -1;
2592
+ switch( iat_numb ) {
2593
+ case IAT_N:
2594
+ case IAT_P:
2595
+ is_N = 1;
2596
+ val = 3+at->charge;
2597
+ break;
2598
+ case IAT_O:
2599
+ case IAT_S:
2600
+ case IAT_Se:
2601
+ case IAT_Te:
2602
+ is_O = 1;
2603
+ val = 2+at->charge;
2604
+ break;
2605
+ case IAT_F:
2606
+ case IAT_Cl:
2607
+ case IAT_Br:
2608
+ case IAT_I:
2609
+ if ( at->charge == 0 ) {
2610
+ is_Cl = 1; /* isolated HCl */
2611
+ val = 1;
2612
+ }
2613
+ break;
2614
+ case IAT_H:
2615
+ if ( at->valence == 0 &&
2616
+ at->charge == 1 ) {
2617
+ is_H = 1; /* isolated proton */
2618
+ val = 0;
2619
+ }
2620
+ }
2621
+ if ( val < 0 ) {
2622
+ return 0;
2623
+ }
2624
+ num_H = NUMH(at,0);
2625
+ if ( val != at->chem_bonds_valence + num_H ) {
2626
+ return 0;
2627
+ }
2628
+ if ( is_H ) {
2629
+ return 2; /* H atom */
2630
+ } else {
2631
+ cur_num_iso_H = 0;
2632
+ for ( j = 0, bAccept = 1; j < at->valence && bAccept; j ++ ) {
2633
+ at2 = atom + (int)at->neighbor[j];
2634
+ if ( at2->charge && at->charge || (at2->radical && RADICAL_SINGLET != at2->radical ) ) {
2635
+ return 0; /* adjacent charged/radical atoms: do not neutralizate */
2636
+ }
2637
+ }
2638
+ }
2639
+ return 1;
2640
+ }
2641
+ #endif
2553
2642
  /****************************************************************************************/
2554
2643
  int bNumHeterAtomHasIsotopicH( inp_ATOM *atom, int num_atoms )
2555
2644
  {
@@ -2673,22 +2762,6 @@ int cmp_components( const void *a1, const void *a2 )
2673
2762
 
2674
2763
  }
2675
2764
  /*************************************************************************************************/
2676
- int mark_one_struct_component( inp_ATOM* at, int j, AT_NUMB *mark, AT_NUMB num_disconnected_components )
2677
- {
2678
- if ( mark[j] ) {
2679
- return 0;
2680
- } else {
2681
- int i;
2682
- mark[j] = num_disconnected_components;
2683
- for ( i = 0; i < at[j].valence; i++ ) {
2684
- if ( !mark[(int)at[j].neighbor[i]] ) {
2685
- mark_one_struct_component( at, (int)at[j].neighbor[i], mark, num_disconnected_components );
2686
- }
2687
- }
2688
- }
2689
- return 1;
2690
- }
2691
- /*************************************************************************************************/
2692
2765
  int MarkDisconnectedComponents( ORIG_ATOM_DATA *orig_at_data, int bProcessOldCompNumbers )
2693
2766
  {
2694
2767
  typedef AT_NUMB AT_TRIPLE[3];
@@ -2696,7 +2769,11 @@ int MarkDisconnectedComponents( ORIG_ATOM_DATA *orig_at_data, int bProcessOldCom
2696
2769
  inp_ATOM *at = orig_at_data->at;
2697
2770
  int num_at = orig_at_data->num_inp_atoms;
2698
2771
  AT_NUMB *nCurAtLen = NULL;
2772
+
2699
2773
  AT_NUMB *nNewCompNumber = NULL;
2774
+ AT_NUMB *nPrevAtom = NULL;
2775
+ S_CHAR *iNeigh = NULL;
2776
+
2700
2777
  AT_NUMB *nOldCompNumber = NULL;
2701
2778
  int i, j, num_components, ret;
2702
2779
  int new_comp_no;
@@ -2713,6 +2790,7 @@ int MarkDisconnectedComponents( ORIG_ATOM_DATA *orig_at_data, int bProcessOldCom
2713
2790
  if ( bProcessOldCompNumbers && !orig_at_data->nOldCompNumber ) {
2714
2791
  bProcessOldCompNumbers = 0;
2715
2792
  }
2793
+ num_components = 0;
2716
2794
  /*
2717
2795
  for ( j = 0; j < num_at; j ++ ) {
2718
2796
  at[j].component = 0;
@@ -2722,17 +2800,43 @@ int MarkDisconnectedComponents( ORIG_ATOM_DATA *orig_at_data, int bProcessOldCom
2722
2800
  if ( !num_at ) {
2723
2801
  return 0;
2724
2802
  }
2725
- if ( !( nNewCompNumber = (AT_NUMB *) inchi_calloc( num_at, sizeof(nNewCompNumber[0]) ) ) ) {
2803
+ if ( !( nNewCompNumber = (AT_NUMB *) inchi_calloc( num_at, sizeof(nNewCompNumber[0]) ) ) ||
2804
+ /* for non-recursive DFS only: */
2805
+ !( nPrevAtom = (AT_NUMB *) inchi_calloc( num_at, sizeof(nPrevAtom[0]) ) ) ||
2806
+ !( iNeigh = (S_CHAR *) inchi_calloc( num_at, sizeof(iNeigh[0]) ) )) {
2726
2807
  goto exit_function;
2727
2808
  }
2728
- /* mark and count */
2729
- for ( j = 0, num_components = 0; j < num_at; j++ ) {
2809
+ /* mark and count; avoid deep DFS recursion: it may make verifying software unhappy */
2810
+ /* nNewCompNumber[i] will contain new component number for atoms at[i], i=0..num_at-1 */
2811
+ for ( j = 0; j < num_at; j++ ) {
2730
2812
  if ( !nNewCompNumber[j] ) {
2731
- /* nNewCompNumber[i] will contain new component number for atom at[i], i=0..num_at-1 */
2732
- mark_one_struct_component( at, j, nNewCompNumber, (AT_NUMB)(num_components+1) );
2813
+ /* mark starting with at[j] */
2814
+ int fst_at, nxt_at, cur_at = j;
2733
2815
  num_components ++;
2816
+ /* first time at at[j] */
2817
+ nNewCompNumber[fst_at = cur_at] = (AT_NUMB) num_components;
2818
+ /* find next neighbor */
2819
+ while ( 1 ) {
2820
+ if ( iNeigh[cur_at] < at[cur_at].valence ) {
2821
+ nxt_at = at[cur_at].neighbor[(int)iNeigh[cur_at] ++];
2822
+ if ( !nNewCompNumber[nxt_at] ) {
2823
+ /* forward edge: found new atom */
2824
+ nNewCompNumber[nxt_at] = (AT_NUMB) num_components;
2825
+ nPrevAtom[nxt_at] = (AT_NUMB) cur_at;
2826
+ cur_at = nxt_at;
2827
+ }
2828
+ } else
2829
+ if ( cur_at == fst_at ) {
2830
+ break; /* done */
2831
+ } else {
2832
+ cur_at = nPrevAtom[cur_at]; /* retract */
2833
+ }
2834
+ }
2734
2835
  }
2735
2836
  }
2837
+ inchi_free( nPrevAtom ); nPrevAtom = NULL;
2838
+ inchi_free( iNeigh ); iNeigh = NULL;
2839
+
2736
2840
  /* Allocate more memory */
2737
2841
  i = inchi_max( num_components, orig_at_data->num_components );
2738
2842
  if ( !(nCurAtLen = (AT_NUMB *) inchi_calloc( num_components+1, sizeof(nCurAtLen[0]) ) ) ||
@@ -2814,6 +2918,14 @@ exit_function:
2814
2918
  inchi_free( component_nbr );
2815
2919
 
2816
2920
  if ( ret < 0 ) {
2921
+ if ( nPrevAtom ) {
2922
+ inchi_free( nPrevAtom );
2923
+ nPrevAtom = NULL;
2924
+ }
2925
+ if ( iNeigh ) {
2926
+ inchi_free( iNeigh );
2927
+ iNeigh = NULL;
2928
+ }
2817
2929
  if ( nCurAtLen ) {
2818
2930
  inchi_free( nCurAtLen );
2819
2931
  nCurAtLen = NULL;
@@ -2835,7 +2947,7 @@ exit_function:
2835
2947
 
2836
2948
  orig_at_data->num_components = num_components;
2837
2949
 
2838
- return num_components; /* number of disconnected components; 1=>single connected structure*/
2950
+ return ret; /* number of disconnected components; 1=>single connected structure*/
2839
2951
  }
2840
2952
  /******************************************************************************/
2841
2953
  /* Extract one (connected) component */
@@ -2932,13 +3044,21 @@ int Free_INChI(INChI **ppINChI)
2932
3044
  if ( pINChI->nRefCount -- > 0 )
2933
3045
  return 1;
2934
3046
  #endif
3047
+ Free_INChI_Members(pINChI);
3048
+
3049
+ qzfree( pINChI );
3050
+ *ppINChI = NULL;
3051
+
3052
+ }
3053
+ return 0;
3054
+ }
3055
+ /****************************************************************/
3056
+ int Free_INChI_Members(INChI *pINChI)
3057
+ {
3058
+ if ( pINChI ) {
2935
3059
 
2936
3060
  Free_INChI_Stereo(pINChI->Stereo );
2937
3061
  Free_INChI_Stereo(pINChI->StereoIsotopic );
2938
- /*
2939
- Free_INChI_Stereo(pINChI->StereoInv );
2940
- Free_INChI_Stereo(pINChI->StereoIsotopicInv);
2941
- */
2942
3062
  qzfree(pINChI->nAtom );
2943
3063
  qzfree(pINChI->nConnTable );
2944
3064
  qzfree(pINChI->nTautomer );
@@ -2949,29 +3069,10 @@ int Free_INChI(INChI **ppINChI)
2949
3069
  qzfree(pINChI->nPossibleLocationsOfIsotopicH);
2950
3070
  qzfree(pINChI->Stereo );
2951
3071
  qzfree(pINChI->StereoIsotopic );
2952
- /*
2953
- qzfree(pINChI->StereoInv );
2954
- qzfree(pINChI->StereoIsotopicInv);
2955
- */
2956
3072
  qzfree(pINChI->szHillFormula );
2957
- /*
2958
- pINChI->nAtom = NULL;
2959
- pINChI->nConnTable = NULL;
2960
- pINChI->nTautomer = NULL;
2961
- pINChI->nNum_H = NULL;
2962
- pINChI->IsotopicAtom = NULL;
2963
- pINChI->IsotopicTGroup = NULL;
2964
- pINChI->Stereo = NULL;
2965
- pINChI->StereoIsotopic = NULL;
2966
- pINChI->szHillFormula = NULL;
2967
- */
2968
- qzfree( pINChI );
2969
- *ppINChI = NULL;
2970
-
2971
3073
  }
2972
3074
  return 0;
2973
3075
  }
2974
-
2975
3076
  /****************************************************************/
2976
3077
  INChI *Alloc_INChI( inp_ATOM *at, int num_at, int *found_num_bonds, int *found_num_isotopic, int nAllocMode )
2977
3078
  {
@@ -3126,7 +3227,7 @@ INChI_Aux *Alloc_INChI_Aux( int num_at, int num_isotopic_atoms, int nAllocMode,
3126
3227
  }
3127
3228
 
3128
3229
  if ( num_at > 1 &&
3129
- (pINChI_Aux->nConstitEquTGroupNumbers = (AT_NUMB*)inchi_calloc(sizeof(pINChI_Aux->nConstitEquTGroupNumbers[0]), num_at/2)) ) {
3230
+ (pINChI_Aux->nConstitEquTGroupNumbers = (AT_NUMB*)inchi_calloc(sizeof(pINChI_Aux->nConstitEquTGroupNumbers[0]), num_at/2+1)) ) {
3130
3231
  ;
3131
3232
  } else
3132
3233
  if ( num_at > 1 ) {
@@ -3154,7 +3255,7 @@ INChI_Aux *Alloc_INChI_Aux( int num_at, int num_isotopic_atoms, int nAllocMode,
3154
3255
  goto out_of_RAM;
3155
3256
  }
3156
3257
  if ( /*num_isotopic_atoms && num_at > 1 &&*/
3157
- (pINChI_Aux->nConstitEquIsotopicTGroupNumbers = (AT_NUMB*)inchi_calloc(sizeof(pINChI_Aux->nConstitEquIsotopicTGroupNumbers[0]), num_at/2)) ) {
3258
+ (pINChI_Aux->nConstitEquIsotopicTGroupNumbers = (AT_NUMB*)inchi_calloc(sizeof(pINChI_Aux->nConstitEquIsotopicTGroupNumbers[0]), num_at/2+1)) ) {
3158
3259
  ;
3159
3260
  } else
3160
3261
  if ( num_isotopic_atoms && num_at > 1 ) {
@@ -3174,21 +3275,44 @@ out_of_RAM:
3174
3275
  return NULL;
3175
3276
  }
3176
3277
  /***********************************************************************************/
3177
- #define ABNORMAL_AT(i) ( at[i].charge && at[i].radical || abs(at[i].charge) > 3 || \
3178
- ( at[i].radical && at[i].radical != RADICAL_DOUBLET) || \
3179
- at[i].iso_atw_diff && (at[i].iso_atw_diff == 1 || at[i].iso_atw_diff < -3 || at[i].iso_atw_diff > 6 ))
3278
+
3279
+ #define IS_DEUTERIUM(i) (!strcmp( at[i].elname, "D" ) || at[i].iso_atw_diff == 2 && !strcmp( at[i].elname, "H" ))
3280
+ #define IS_TRITIUM(i) (!strcmp( at[i].elname, "T" ) || at[i].iso_atw_diff == 3 && !strcmp( at[i].elname, "H" ))
3281
+
3282
+ #define ABNORMAL_ISO(i) (at[i].iso_atw_diff == 1 || at[i].iso_atw_diff < -3 || at[i].iso_atw_diff > 5 )
3283
+ #define ABNORMAL_CHG(i) (abs(at[i].charge) > 3)
3284
+ #define ABNORMAL_RAD(i) (RADICAL_SINGLET <= at[i].radical && at[i].radical <= RADICAL_TRIPLET )
3285
+
3286
+ #define ANY_ISO(i, X) ((X)? (at[i].iso_atw_diff && !IS_DEUTERIUM(i) && !IS_TRITIUM(i)) :\
3287
+ (at[i].iso_atw_diff || IS_DEUTERIUM(i) || IS_TRITIUM(i)))
3288
+ #define ANY_CHG(i) (0 != at[i].charge)
3289
+ #define ANY_RAD(i) (RADICAL_SINGLET <= at[i].radical && at[i].radical <= RADICAL_TRIPLET )
3290
+
3291
+ #define NORMAL_ISO(i, X) (ANY_ISO(i, X) && !ABNORMAL_ISO(i))
3292
+
3293
+
3294
+ /* needs additional M CHG. M RAD, M ISO line */
3295
+ /* due to ISIS/Draw feature always include M RAD for any radical */
3296
+ #define ABNORMAL_AT(i) ( at[i].radical || abs(at[i].charge) > 3 || \
3297
+ ABNORMAL_ISO(i) )
3298
+
3299
+ /* always add M ISO, M RAD, M CHG; Except: (bAtomsDT && D or T) */
3300
+ #define ADD_LINE_AT(i) ( at[i].charge || \
3301
+ at[i].radical || \
3302
+ at[i].iso_atw_diff && (bAtomsDT? (at[i].iso_atw_diff != 1 || strcmp(at[i].elname, "H")) : 1) )
3180
3303
  #define ALIASED_AT(i) (0 < NUM_ISO_H(at, i))
3181
3304
  /***********************************************************************************/
3182
- #if( TEST_RENUMB_ATOMS_SAVE_LONGEST == 1 )
3305
+ #if( TEST_RENUMB_ATOMS_SAVE_LONGEST == 1 || TEST_RENUMB_SWITCH == 1 )
3183
3306
  int WriteToSDfile( const INP_ATOM_DATA *inp_at_data, INCHI_FILE* fcb, const char* name, const char* comment,
3184
3307
  const char *szLabel, const char *szValue)
3185
3308
  {
3186
- int i, j, k, num_bonds=0, ret=0;
3309
+ int i, j, k, num_bonds=0, ret=0, bAtomsDT = 1 /* treat D, T as normal atoms */, bV2000 = 0 /*V2000 Molfile */;
3187
3310
  int bAtomNeedsAlias;
3188
- int flag_bad_charge=0, nNumAddLines=0, nNumIso=0, nNumChargeLines=0, nNumRadicalLines=0, nNumAliasLines=0;
3311
+ int flag_bad_charge=0, flag_bad_iso=0, nNumAddLines=0, nNumIsoLines=0, nNumChargeLines=0, nNumRadicalLines=0, nNumAliasLines=0;
3312
+ int nNumNecessaryIsoLines = 0, nNumNecessaryChgLines = 0, nNumNecessaryRadLines = 0;
3189
3313
  /*sp_ATOM *at; */
3190
- float fzero=0.0F;
3191
- double x, y;
3314
+ /*float fzero=0.0F;*/
3315
+ double x, y, z;
3192
3316
  int bNext /*, s*/;
3193
3317
  const inp_ATOM *at = inp_at_data->at_fixed_bonds? inp_at_data->at_fixed_bonds : inp_at_data->at;
3194
3318
  int num_atoms = inp_at_data->num_at;
@@ -3233,105 +3357,95 @@ int WriteToSDfile( const INP_ATOM_DATA *inp_at_data, INCHI_FILE* fcb, const char
3233
3357
  num_bonds += at[i].valence;
3234
3358
  num_bonds /= 2;
3235
3359
 
3236
- /*find if we need "M CHG" and "M RAD"*/
3237
- for (i=0, nNumAddLines = 0, nNumIso=0; i < num_atoms; i++) {
3238
- bAtomNeedsAlias = ALIASED_AT(i); /* 5-3-99 DCh */
3239
- nNumAddLines += !bAtomNeedsAlias && ABNORMAL_AT(i);
3240
- nNumAliasLines += 2 * bAtomNeedsAlias;
3241
- nNumIso += ( 0 == strcmp( at[i].elname, "D" ) || ( 0 == strcmp( at[i].elname, "T" ) ) );
3360
+ /*find if we need "M CHG", "M RAD", "M ISO" */
3361
+ for (i=0, nNumAddLines = 0; i < num_atoms; i++) {
3362
+ if ( bAtomNeedsAlias = ALIASED_AT(i) ) {
3363
+ nNumAliasLines += 2 * bAtomNeedsAlias;
3364
+ } else {
3365
+ nNumNecessaryIsoLines += ABNORMAL_ISO(i);
3366
+ nNumNecessaryChgLines += ABNORMAL_CHG(i);
3367
+ nNumNecessaryRadLines += ABNORMAL_RAD(i);
3368
+ nNumIsoLines += ANY_ISO(i, bAtomsDT);
3369
+ nNumChargeLines += ANY_CHG(i);
3370
+ nNumRadicalLines += ANY_RAD(i);
3371
+ }
3242
3372
  }
3243
-
3244
- /* count additional M lines*/
3245
- if ( nNumAddLines || nNumAliasLines ) {
3246
- for (i=0, nNumChargeLines=0, nNumRadicalLines=0; i < num_atoms; i++) {
3247
- nNumChargeLines += (0 != at[i].charge) && !ALIASED_AT(i);
3248
- nNumRadicalLines += (0 != at[i].radical) && !ALIASED_AT(i);
3373
+ if ( !bV2000 ) {
3374
+ if ( !nNumNecessaryRadLines && !nNumNecessaryChgLines ) {
3375
+ nNumRadicalLines = 0;
3376
+ nNumChargeLines = 0;
3377
+ }
3378
+ if ( !nNumNecessaryIsoLines ) {
3379
+ nNumIsoLines = 0;
3249
3380
  }
3250
3381
  }
3382
+
3383
+
3384
+ /* count additional M lines*/
3251
3385
  nNumChargeLines = ( nNumChargeLines + 7 ) / 8;
3252
3386
  nNumRadicalLines = ( nNumRadicalLines + 7 ) / 8;
3253
- nNumIso = ( nNumIso + 7 ) / 8;
3387
+ nNumIsoLines = ( nNumIsoLines + 7 ) / 8;
3254
3388
 
3255
- nNumAddLines = nNumChargeLines + nNumRadicalLines + nNumAliasLines; /* 1 for M END*/
3389
+ nNumAddLines = nNumChargeLines + nNumRadicalLines + nNumIsoLines + nNumAliasLines; /* 1 for M END*/
3256
3390
 
3257
- if ( nNumAddLines == 0 ) {
3258
- nNumIso = 0; /* keep isotopes description in CTable only*/
3259
- } else {
3260
- nNumAddLines += nNumIso+1; /* add 1 for "M END" line*/
3391
+ if ( nNumAddLines || bV2000 ) {
3392
+ nNumAddLines += 1; /* add 1 for "M END" line*/
3261
3393
  }
3262
3394
 
3263
- /* aaa bbblllfffcccsssxxxrrrpppiiimmmvvvvvv*/
3395
+ /* aaabbblllfffcccsssxxxrrrpppiiimmmvvvvvv*/
3264
3396
  inchi_print_nodisplay(fcb,"%3d%3d 0 0 0 0 0 0 0 0%3d%s\n",num_atoms, num_bonds, nNumAddLines,nNumAddLines?" V2000":"");
3265
3397
  /* atoms block*/
3266
3398
  for (i=0; i < num_atoms; i++) {
3267
- char elname[ATOM_EL_LEN] = "\0\0\0\0\0";
3399
+ char elname[ATOM_EL_LEN];
3268
3400
  int iso = 0;
3269
3401
  int charge = 0;
3270
3402
  int valence = 0;
3271
- bAtomNeedsAlias = ALIASED_AT(i);
3272
- /* isotope*/
3273
- iso = !strcmp( at[i].elname, "D" )? 1:
3274
- !strcmp( at[i].elname, "T" )? 2: 0;
3275
-
3276
- if ( iso ) {
3277
- /* deuterium or tritium*/
3278
- strcpy ( elname, "H" );
3279
- } else
3403
+ int nIsotopeH = IS_DEUTERIUM(i)? 1 : IS_TRITIUM(i)? 2 : 0;
3404
+ bAtomNeedsAlias = ALIASED_AT(i); /* Has implicit D and/or T neighbors */
3405
+ memset( elname, 0, sizeof(elname) );
3406
+
3280
3407
  if ( bAtomNeedsAlias ) {
3408
+ /* alias */
3281
3409
  strcpy ( elname, "C" );
3282
3410
  } else {
3283
- strncpy ( elname, at[i].elname, sizeof(elname)-1 );
3284
- }
3285
- /*
3286
- if ( !iso ) {
3287
- strncpy ( elname, at[i].elname, sizeof(elname)-1 );
3288
- } else
3289
- if ( ALIASED_AT(i) ) {
3290
- strcpy ( elname, "C" );
3291
- } else {
3292
- -- deuterium or tritium --
3293
- strcpy ( elname, "H" );
3294
- }
3295
- */
3296
- if ( !ABNORMAL_AT(i) && !bAtomNeedsAlias ) {
3297
-
3298
- /* Only normal atoms without alias can be here*/
3299
-
3300
- /* charge*/
3301
- switch ( at[i].charge ) {
3302
- case 3: charge = 1; break;
3303
- case 2: charge = 2; break;
3304
- case 1: charge = 3; break;
3305
- case -1: charge = 5; break;
3306
- case -2: charge = 6; break;
3307
- case -3: charge = 7; break;
3308
- case 0: charge = 0; break;
3309
- default: flag_bad_charge = 1; break;
3310
- };
3411
+ /* isotope*/
3412
+ if ( nIsotopeH ) {
3413
+ strcpy( elname, bAtomsDT? ( nIsotopeH==1? "D" : "T" ) : "H" );
3414
+ } else {
3415
+ strncpy ( elname, at[i].elname, sizeof(elname)-1 );
3416
+ }
3417
+ if ( !ABNORMAL_CHG(i) && !ANY_RAD(i) ) {
3418
+ /* charge*/
3419
+ /* Only atoms without alias can be here*/
3420
+ switch ( at[i].charge ) {
3421
+ case 3: charge = 1; break;
3422
+ case 2: charge = 2; break;
3423
+ case 1: charge = 3; break;
3424
+ case -1: charge = 5; break;
3425
+ case -2: charge = 6; break;
3426
+ case -3: charge = 7; break;
3427
+ case 0: charge = 0; break;
3428
+ default: flag_bad_charge = 1; break;
3429
+ };
3430
+ }
3311
3431
  /* radical*/
3312
- if ( at[i].radical ) {
3432
+ if ( ANY_RAD(i) && !ANY_CHG(i) ) {
3313
3433
  if ( at[i].radical == RADICAL_DOUBLET ) {
3314
- flag_bad_charge |= (charge != 0);
3315
3434
  charge = 4;
3316
- } else {
3317
- flag_bad_charge |= 2;
3318
3435
  }
3319
3436
  }
3320
- if ( flag_bad_charge ) {
3321
- charge = 0;
3322
- }
3437
+ }
3438
+ /* allow isotopic shift for aliased atoms */
3439
+ if ( NORMAL_ISO(i, bAtomsDT) ) {
3440
+ iso = at[i].iso_atw_diff > 0? at[i].iso_atw_diff-1:
3441
+ at[i].iso_atw_diff < 0? at[i].iso_atw_diff :
3442
+ nIsotopeH? nIsotopeH : (flag_bad_iso ++, 0);
3323
3443
  }
3324
3444
 
3325
3445
  x = at[i].x;
3326
3446
  y = at[i].y;
3327
- /* --- just removed --
3328
- if ( c && c->xCoeff != 0.0 && c->yCoeff != 0.0 ) {
3329
- x = (x - c->xShift)/c->xCoeff;
3330
- y = (y - c->yShift)/c->yCoeff;
3331
- } else {
3332
- y = -y;
3333
- }
3334
- ----------------------*/
3447
+ z = at[i].z;
3448
+
3335
3449
  if( at[i].num_H > 0 ) {
3336
3450
  for ( j = 0, valence = 0; j < at[i].valence; j++ ) {
3337
3451
  switch( k = at[i].bond_type[j] ) { /* fixed valence calculation 12-23-99 DCh.*/
@@ -3349,7 +3463,7 @@ int WriteToSDfile( const INP_ATOM_DATA *inp_at_data, INCHI_FILE* fcb, const char
3349
3463
  valence = valence/2 + at[i].num_H;
3350
3464
  } else
3351
3465
  /* Added 07-09-2003 DCh*/
3352
- if ( at[i].chem_bonds_valence > 0 && at[i].chem_bonds_valence < 15 ) {
3466
+ if ( at[i].chem_bonds_valence > 0 ) {
3353
3467
  valence = at[i].chem_bonds_valence;
3354
3468
  } else
3355
3469
  /* Added 07-09-2003 DCh*/
@@ -3360,7 +3474,7 @@ int WriteToSDfile( const INP_ATOM_DATA *inp_at_data, INCHI_FILE* fcb, const char
3360
3474
  /* (float)at[i].x, (float)(-at[i].y), fzero, at[i].elname, iso, charge);*/
3361
3475
  /* xxxxxxyyyyyyzzzzzz aaa____ddcccsssnnnbbbvvvrrriiimmmeee */
3362
3476
  inchi_print_nodisplay(fcb,"%10.4f%10.4f%10.4f %-3.3s%2d%3d 0 0%3d 0 0 0 0\n",
3363
- x, y, (double)fzero, elname, (int)iso, (int)charge, valence /* at[i].special*/);
3477
+ x, y, z, elname, (int)iso, (int)charge, valence /* at[i].special*/);
3364
3478
  /* reflect image against x-axis;
3365
3479
  when transforming MOLfile back to STDATA in mol_to_stdata(...),
3366
3480
  make one more reflection to restore original orientation.
@@ -3401,9 +3515,29 @@ int WriteToSDfile( const INP_ATOM_DATA *inp_at_data, INCHI_FILE* fcb, const char
3401
3515
  num_m = 0;
3402
3516
  for (i=0; i < num_atoms; i++) {
3403
3517
  if ( ALIASED_AT(i) ) {
3518
+ int num_H;
3404
3519
  inchi_print_nodisplay( fcb, "A %d\n", i+1 );
3405
3520
  num_m ++;
3406
3521
  strcpy( str_m, at[i].elname );
3522
+ /* Add H, D, T */
3523
+ if ( num_H = at[i].num_H + at[i].num_iso_H[0] ) { /* protium is lost here */
3524
+ strcat( str_m, "H" );
3525
+ if ( num_H > 1 ) {
3526
+ sprintf( str_m + strlen(str_m), "%d", num_H );
3527
+ }
3528
+ }
3529
+ if ( num_H = at[i].num_iso_H[1] ) { /* deuterium */
3530
+ strcat( str_m, "D" );
3531
+ if ( num_H > 1 ) {
3532
+ sprintf( str_m + strlen(str_m), "%d", num_H );
3533
+ }
3534
+ }
3535
+ if ( num_H = at[i].num_iso_H[2] ) { /* Tritium */
3536
+ strcat( str_m, "T" );
3537
+ if ( num_H > 1 ) {
3538
+ sprintf( str_m + strlen(str_m), "%d", num_H );
3539
+ }
3540
+ }
3407
3541
  /* Add charge to the Alias */
3408
3542
  if ( at[i].charge){
3409
3543
  strcat(str_m, at[i].charge>0? "+" : "-");
@@ -3411,8 +3545,16 @@ int WriteToSDfile( const INP_ATOM_DATA *inp_at_data, INCHI_FILE* fcb, const char
3411
3545
  sprintf( str_m+strlen(str_m), "%d", j );
3412
3546
  }
3413
3547
  /* Add radical to the Alias */
3414
- for ( j = inchi_min(2,at[i].radical); 0 < j; j-- ) {
3548
+ switch( at[i].radical ) {
3549
+ case RADICAL_SINGLET:
3550
+ strcat( str_m, ":" );
3551
+ break;
3552
+ case RADICAL_DOUBLET:
3415
3553
  strcat( str_m, "^" );
3554
+ break;
3555
+ case RADICAL_TRIPLET:
3556
+ strcat( str_m, "^^" );
3557
+ break;
3416
3558
  }
3417
3559
  inchi_print_nodisplay( fcb, "%s\n", str_m );
3418
3560
  num_m ++;
@@ -3428,7 +3570,7 @@ int WriteToSDfile( const INP_ATOM_DATA *inp_at_data, INCHI_FILE* fcb, const char
3428
3570
  num_m = 0;
3429
3571
  if ( nNumChargeLines ) {
3430
3572
  for (i=0; i < num_atoms; i++) {
3431
- if ( at[i].charge && !ALIASED_AT(i) ) {
3573
+ if ( ANY_CHG(i) && !ALIASED_AT(i) ) {
3432
3574
  sprintf( entry, " %3d %3d", i+1, (int)at[i].charge );
3433
3575
  strcat( str_m, entry );
3434
3576
  num_m ++;
@@ -3445,7 +3587,7 @@ int WriteToSDfile( const INP_ATOM_DATA *inp_at_data, INCHI_FILE* fcb, const char
3445
3587
  num_m = 0;
3446
3588
  if ( nNumRadicalLines ) {
3447
3589
  for (i=0; i < num_atoms; i++) {
3448
- if ( at[i].radical && !ALIASED_AT(i) ) {
3590
+ if ( ANY_RAD(i) && !ALIASED_AT(i) ) {
3449
3591
  int radical = (at[i].radical==RADICAL_SINGLET ||
3450
3592
  at[i].radical==RADICAL_DOUBLET ||
3451
3593
  at[i].radical==RADICAL_TRIPLET)? at[i].radical : 0;
@@ -3465,18 +3607,28 @@ int WriteToSDfile( const INP_ATOM_DATA *inp_at_data, INCHI_FILE* fcb, const char
3465
3607
  /* isotopes*/
3466
3608
  str_m[0] = 0;
3467
3609
  num_m = 0;
3468
- if ( nNumIso ) {
3610
+ if ( nNumIsoLines ) {
3611
+ int el_num, iso;
3469
3612
  for (i=0; i < num_atoms; i++) {
3470
- if ( 0 == strcmp( at[i].elname, "D" ) ) {
3471
- sprintf( entry, " %3d %3d", i+1, 2 );
3472
- strcat( str_m, entry );
3473
- num_m ++;
3474
- } else
3475
- if ( 0 == strcmp( at[i].elname, "T" ) ) {
3476
- sprintf( entry, " %3d %3d", i+1, 3 );
3613
+ if ( ANY_ISO(i,bAtomsDT) && !ALIASED_AT(i) ) {
3614
+ if ( IS_DEUTERIUM(i) ) {
3615
+ iso = 1;
3616
+ el_num = 1;
3617
+ } else
3618
+ if ( IS_TRITIUM(i) ) {
3619
+ iso = 2;
3620
+ el_num = 1;
3621
+ } else {
3622
+ iso = at[i].iso_atw_diff > 0? at[i].iso_atw_diff-1 : at[i].iso_atw_diff;
3623
+ el_num = at[i].el_number;
3624
+ }
3625
+ iso += get_atw_from_elnum( el_num );
3626
+
3627
+ sprintf( entry, " %3d %3d", i+1, iso );
3477
3628
  strcat( str_m, entry );
3478
3629
  num_m ++;
3479
3630
  }
3631
+
3480
3632
  if ( i == num_atoms-1 && num_m || num_m == 8 ) {
3481
3633
  inchi_print_nodisplay( fcb, "M ISO%3d%s\n", num_m, str_m );
3482
3634
  str_m[0] = 0;
@@ -3488,11 +3640,11 @@ int WriteToSDfile( const INP_ATOM_DATA *inp_at_data, INCHI_FILE* fcb, const char
3488
3640
  }
3489
3641
  if ( szValue && szValue[0] ) {
3490
3642
  if ( szLabel && szLabel[0] ) {
3491
- inchi_print_nodisplay( fcb, "> <%s>\n", szLabel );
3643
+ inchi_print_nodisplay( fcb, "> <%s>\n", szLabel );
3492
3644
  } else {
3493
- inchi_print_nodisplay( fcb, "> <ID>\n" );
3645
+ inchi_print_nodisplay( fcb, "> <ID>\n" );
3494
3646
  }
3495
- inchi_print_nodisplay( fcb, " %s\n\n", szValue );
3647
+ inchi_print_nodisplay( fcb, "%s\n\n", szValue );
3496
3648
  }
3497
3649
  inchi_print_nodisplay(fcb, "$$$$\n");
3498
3650
 
@@ -3503,13 +3655,14 @@ int WriteToSDfile( const INP_ATOM_DATA *inp_at_data, INCHI_FILE* fcb, const char
3503
3655
  #endif
3504
3656
  /***************************************************************************************************/
3505
3657
  int WriteOrigAtomDataToSDfile( const ORIG_ATOM_DATA *inp_at_data, INCHI_FILE* fcb, const char* name, const char* comment,
3506
- int bChiralFlag, const char *szLabel, const char *szValue)
3658
+ int bChiralFlag, int bAtomsDT, const char *szLabel, const char *szValue)
3507
3659
  {
3508
3660
  int i, j, k, num_bonds=0, ret=0;
3509
3661
  int bAtomNeedsAlias;
3510
- int flag_bad_charge=0, nNumAddLines=0, nNumIso=0, nNumAddIso=0, nNumChargeLines=0, nNumRadicalLines=0, nNumAliasLines=0;
3511
- /*sp_ATOM *at; */
3512
- /* float fzero=0.0F; */
3662
+ int flag_bad_charge=0, flag_bad_iso = 0;
3663
+ int nNumAddLines=0, nNumIsoLines=0, nNumChargeLines=0, nNumRadicalLines=0, nNumAliasLines=0;
3664
+ int nNumNecessaryIsoLines = 0, nNumNecessaryChgLines = 0, nNumNecessaryRadLines = 0;
3665
+ int bV2000 = SDF_OUTPUT_V2000;
3513
3666
  double x, y, z;
3514
3667
  int bNext /*, s*/;
3515
3668
  const inp_ATOM *at = inp_at_data->at;
@@ -3563,34 +3716,43 @@ int WriteOrigAtomDataToSDfile( const ORIG_ATOM_DATA *inp_at_data, INCHI_FILE* fc
3563
3716
  num_bonds /= 2;
3564
3717
 
3565
3718
  /*find if we need "M CHG" and "M RAD"*/
3566
- for (i=0, nNumAddLines = 0, nNumIso=0; i < num_atoms; i++) {
3567
- bAtomNeedsAlias = ALIASED_AT(i); /* has isotopic implicit D or T; ignoring pure 1H */
3568
- nNumAddLines += !bAtomNeedsAlias && ABNORMAL_AT(i); /* abnormal means atom needs CHG, RAD, or ISO entry */
3569
- nNumAliasLines += 2 * bAtomNeedsAlias;
3570
- nNumIso += ( 0 == strcmp( at[i].elname, "D" ) || ( 0 == strcmp( at[i].elname, "T" ) || at[i].iso_atw_diff ) );
3571
- nNumAddIso += at[i].iso_atw_diff && (at[i].iso_atw_diff == 1 || at[i].iso_atw_diff < -3 || at[i].iso_atw_diff > 6 );
3572
- }
3573
-
3574
- /* count additional M lines*/
3575
- if ( nNumAddLines || nNumAliasLines ) {
3576
- for (i=0, nNumChargeLines=0, nNumRadicalLines=0; i < num_atoms; i++) {
3577
- nNumChargeLines += (0 != at[i].charge) && !ALIASED_AT(i);
3578
- nNumRadicalLines += (0 != at[i].radical) && !ALIASED_AT(i);
3719
+ for (i=0; i < num_atoms; i++) {
3720
+ if ( bAtomNeedsAlias = ALIASED_AT(i) ) { /* has isotopic implicit D or T; ignoring pure 1H */
3721
+ nNumAliasLines += 2 * bAtomNeedsAlias;
3722
+ } else {
3723
+ /* abnormal means atom needs CHG, RAD, or ISO entry */
3724
+ /* nNumAddLines += ABNORMAL_AT(i); */
3725
+ /* nNumIso += ( 0 == strcmp( at[i].elname, "D" ) || ( 0 == strcmp( at[i].elname, "T" ) || at[i].iso_atw_diff ) ); */
3726
+ /* nNumAddIso += at[i].iso_atw_diff && (at[i].iso_atw_diff == 1 || at[i].iso_atw_diff < -3 || at[i].iso_atw_diff > 5 ); */
3727
+ nNumNecessaryIsoLines += ABNORMAL_ISO(i);
3728
+ nNumNecessaryChgLines += ABNORMAL_CHG(i);
3729
+ nNumNecessaryRadLines += ABNORMAL_RAD(i);
3730
+ nNumIsoLines += ANY_ISO(i, bAtomsDT);
3731
+ nNumChargeLines += ANY_CHG(i);
3732
+ nNumRadicalLines += ANY_RAD(i);
3579
3733
  }
3580
3734
  }
3581
3735
  nNumChargeLines = ( nNumChargeLines + 7 ) / 8;
3582
3736
  nNumRadicalLines = ( nNumRadicalLines + 7 ) / 8;
3583
- nNumIso = ( nNumIso + 7 ) / 8;
3737
+ nNumIsoLines = ( nNumIsoLines + 7 ) / 8;
3738
+
3739
+ if ( !bV2000 ) {
3740
+ if ( !nNumNecessaryRadLines && !nNumNecessaryChgLines ) {
3741
+ nNumRadicalLines = 0;
3742
+ nNumChargeLines = 0;
3743
+ }
3744
+ if ( !nNumNecessaryIsoLines ) {
3745
+ nNumIsoLines = 0;
3746
+ }
3747
+ }
3748
+
3749
+
3584
3750
  /* recalculate number of added lines */
3585
- nNumAddLines = nNumChargeLines + nNumRadicalLines + nNumAliasLines; /* 1 for M END*/
3751
+ nNumAddLines = nNumChargeLines + nNumRadicalLines + nNumIsoLines + nNumAliasLines; /* 1 for M END*/
3586
3752
 
3587
- if ( nNumAddLines == 0 && nNumAddIso == 0 ) {
3588
- nNumIso = 0; /* keep isotopes description in CTable only*/
3589
- } else {
3590
- nNumAddLines += nNumIso+1; /* add 1 for "M END" line*/
3753
+ if ( nNumAddLines || bV2000 ) {
3754
+ nNumAddLines += 1; /* add 1 for "M END" line*/
3591
3755
  }
3592
- if ( !nNumAddLines )
3593
- nNumAddLines = 1; /* always add V2000 and M END */
3594
3756
 
3595
3757
  /* aaabbblllfffcccsssxxxrrrpppiiimmmvvvvvv*/
3596
3758
  inchi_print_nodisplay(fcb,"%3d%3d 0 0%3d 0 0 0 0 0%3d%s\n",
@@ -3601,78 +3763,57 @@ int WriteOrigAtomDataToSDfile( const ORIG_ATOM_DATA *inp_at_data, INCHI_FILE* fc
3601
3763
  int iso = 0;
3602
3764
  int charge = 0;
3603
3765
  int valence = 0;
3766
+ int nIsotopeH = IS_DEUTERIUM(i)? 1 : IS_TRITIUM(i)? 2 : 0;
3767
+ int bonds_val;
3604
3768
  bAtomNeedsAlias = ALIASED_AT(i);
3605
- /* isotope*/
3606
- iso = !strcmp( at[i].elname, "D" )? 1:
3607
- !strcmp( at[i].elname, "T" )? 2: 0;
3608
-
3609
- if ( iso ) {
3610
- /* deuterium or tritium*/
3611
- strcpy ( elname, "H" );
3612
- } else
3769
+ memset( elname, 0, sizeof(elname) );
3770
+
3613
3771
  if ( bAtomNeedsAlias ) {
3772
+ /* alias */
3614
3773
  strcpy ( elname, "C" );
3615
3774
  } else {
3616
- strncpy ( elname, at[i].elname, sizeof(elname)-1 );
3617
- }
3618
- if ( !iso && at[i].iso_atw_diff && at[i].iso_atw_diff != 1 && -3 <= at[i].iso_atw_diff && at[i].iso_atw_diff <= 5 ) {
3619
- iso = (at[i].iso_atw_diff > 0)? at[i].iso_atw_diff-1 : at[i].iso_atw_diff;
3620
- }
3621
-
3622
- /*
3623
- if ( !iso ) {
3624
- strncpy ( elname, at[i].elname, sizeof(elname)-1 );
3625
- } else
3626
- if ( ALIASED_AT(i) ) {
3627
- strcpy ( elname, "C" );
3628
- } else {
3629
- -- deuterium or tritium --
3630
- strcpy ( elname, "H" );
3631
- }
3632
- */
3633
- if ( !ABNORMAL_AT(i) && !bAtomNeedsAlias ) {
3634
-
3635
- /* Only normal atoms without alias can be here*/
3636
-
3637
- /* charge*/
3638
- switch ( at[i].charge ) {
3639
- case 3: charge = 1; break;
3640
- case 2: charge = 2; break;
3641
- case 1: charge = 3; break;
3642
- case -1: charge = 5; break;
3643
- case -2: charge = 6; break;
3644
- case -3: charge = 7; break;
3645
- case 0: charge = 0; break;
3646
- default: flag_bad_charge = 1; break;
3647
- };
3775
+ /* isotope*/
3776
+ if ( nIsotopeH ) {
3777
+ strcpy( elname, bAtomsDT? ( nIsotopeH==1? "D" : "T" ) : "H" );
3778
+ } else {
3779
+ strncpy ( elname, at[i].elname, sizeof(elname)-1 );
3780
+ }
3781
+ if ( !ABNORMAL_CHG(i) && !ANY_RAD(i) ) {
3782
+ /* charge*/
3783
+ /* Only atoms without alias can be here*/
3784
+ switch ( at[i].charge ) {
3785
+ case 3: charge = 1; break;
3786
+ case 2: charge = 2; break;
3787
+ case 1: charge = 3; break;
3788
+ case -1: charge = 5; break;
3789
+ case -2: charge = 6; break;
3790
+ case -3: charge = 7; break;
3791
+ case 0: charge = 0; break;
3792
+ default: flag_bad_charge = 1; break;
3793
+ };
3794
+ }
3648
3795
  /* radical*/
3649
- if ( at[i].radical ) {
3796
+ if ( ANY_RAD(i) && !ANY_CHG(i) ) {
3650
3797
  if ( at[i].radical == RADICAL_DOUBLET ) {
3651
- flag_bad_charge |= (charge != 0);
3652
3798
  charge = 4;
3653
- } else {
3654
- flag_bad_charge |= 2;
3655
3799
  }
3656
3800
  }
3657
- if ( flag_bad_charge ) {
3658
- charge = 0;
3659
- }
3801
+ }
3802
+ /* allow isotopic shift for aliased atoms */
3803
+ if ( NORMAL_ISO(i, bAtomsDT) ) {
3804
+ iso = at[i].iso_atw_diff > 0? at[i].iso_atw_diff-1:
3805
+ at[i].iso_atw_diff < 0? at[i].iso_atw_diff :
3806
+ nIsotopeH? nIsotopeH : (flag_bad_iso ++, 0);
3660
3807
  }
3661
3808
 
3662
3809
  x = at[i].x;
3663
3810
  y = at[i].y;
3664
3811
  z = at[i].z;
3665
- /* --- just removed --
3666
- if ( c && c->xCoeff != 0.0 && c->yCoeff != 0.0 ) {
3667
- x = (x - c->xShift)/c->xCoeff;
3668
- y = (y - c->yShift)/c->yCoeff;
3669
- } else {
3670
- y = -y;
3671
- }
3672
- ----------------------*/
3812
+
3673
3813
  /* valence -- set only if needed */
3814
+ bonds_val = nBondsValenceInpAt( at+i, NULL, NULL );
3674
3815
  valence=needed_unusual_el_valence( at[i].el_number, at[i].charge, at[i].radical,
3675
- at[i].chem_bonds_valence, at[i].num_H, at[i].valence );
3816
+ at[i].chem_bonds_valence, bonds_val, NUMH(at, i), at[i].valence );
3676
3817
  if ( valence < 0 ) {
3677
3818
  valence = 15; /* means no bonds nor H */
3678
3819
  }
@@ -3727,11 +3868,12 @@ int WriteOrigAtomDataToSDfile( const ORIG_ATOM_DATA *inp_at_data, INCHI_FILE* fc
3727
3868
  num_m ++;
3728
3869
  len = sprintf( str_m, "%s", at[i].elname );
3729
3870
  /* add isotopic H to the alias */
3730
- for ( k = 1; k < NUM_H_ISOTOPES; k ++ ) {
3731
- if ( at[i].num_iso_H[k] ) {
3732
- len += sprintf( str_m+len, "%s", k==1? "D" : k==2? "T" : "?" );
3733
- if ( at[i].num_iso_H[k] != 1 ) {
3734
- len += sprintf( str_m+len, "%d", (int)at[i].num_iso_H[k] );
3871
+ for ( k = 0; k < NUM_H_ISOTOPES; k ++ ) {
3872
+ int num_H = at[i].num_iso_H[k] + (k? 0:at[i].num_H);
3873
+ if ( num_H ) {
3874
+ len += sprintf( str_m+len, "%s", k == 0? "H" : k==1? "D" : k==2? "T" : "?" );
3875
+ if ( num_H != 1 ) {
3876
+ len += sprintf( str_m+len, "%d", num_H );
3735
3877
  }
3736
3878
  }
3737
3879
  }
@@ -3803,8 +3945,10 @@ int WriteOrigAtomDataToSDfile( const ORIG_ATOM_DATA *inp_at_data, INCHI_FILE* fc
3803
3945
  /* isotopes*/
3804
3946
  str_m[0] = 0;
3805
3947
  num_m = 0;
3806
- if ( nNumIso ) {
3948
+ if ( nNumIsoLines ) {
3949
+ int el_num, iso;
3807
3950
  for (i=0; i < num_atoms; i++) {
3951
+ /*
3808
3952
  if ( 0 == strcmp( at[i].elname, "D" ) ) {
3809
3953
  sprintf( entry, " %3d %3d", i+1, 2 );
3810
3954
  strcat( str_m, entry );
@@ -3822,6 +3966,27 @@ int WriteOrigAtomDataToSDfile( const ORIG_ATOM_DATA *inp_at_data, INCHI_FILE* fc
3822
3966
  strcat( str_m, entry );
3823
3967
  num_m ++;
3824
3968
  }
3969
+ */
3970
+ if ( ANY_ISO(i, bAtomsDT) && !ALIASED_AT(i) ) {
3971
+ if ( IS_DEUTERIUM(i) ) {
3972
+ iso = 1;
3973
+ el_num = 1;
3974
+ } else
3975
+ if ( IS_TRITIUM(i) ) {
3976
+ iso = 2;
3977
+ el_num = 1;
3978
+ } else {
3979
+ iso = at[i].iso_atw_diff > 0? at[i].iso_atw_diff-1 : at[i].iso_atw_diff;
3980
+ el_num = at[i].el_number;
3981
+ }
3982
+ iso += get_atw_from_elnum( el_num );
3983
+
3984
+ sprintf( entry, " %3d %3d", i+1, iso );
3985
+ strcat( str_m, entry );
3986
+ num_m ++;
3987
+ }
3988
+
3989
+
3825
3990
  if ( i == num_atoms-1 && num_m || num_m == 8 ) {
3826
3991
  inchi_print_nodisplay( fcb, "M ISO%3d%s\n", num_m, str_m );
3827
3992
  str_m[0] = 0;
@@ -3845,6 +4010,47 @@ int WriteOrigAtomDataToSDfile( const ORIG_ATOM_DATA *inp_at_data, INCHI_FILE* fc
3845
4010
  return ret;
3846
4011
 
3847
4012
  }
4013
+ #if( FIX_ADJ_RAD == 1 )
4014
+ /*************************************************************************/
4015
+ int FixNextRadicals( int cur_at, inp_ATOM *at );
4016
+ int FixNextRadicals( int cur_at, inp_ATOM *at )
4017
+ {
4018
+ int j, neigh, num_found = 0;
4019
+ for ( j = 0; j < at[cur_at].valence; j ++ ) {
4020
+ neigh = at[cur_at].neighbor[j];
4021
+ if ( at[neigh].radical == RADICAL_DOUBLET ) {
4022
+ at[neigh].radical = 0;
4023
+ num_found ++;
4024
+ num_found += FixNextRadicals( neigh, at );
4025
+ }
4026
+ }
4027
+ return num_found;
4028
+ }
4029
+ /*************************************************************************/
4030
+ int FixAdjacentRadicals( int num_inp_atoms, inp_ATOM *at )
4031
+ {
4032
+ int i, j;
4033
+ char *bVisited = NULL;
4034
+ int nNumFound = 0, neigh, cur_found;
4035
+ for ( i = 0; i < num_inp_atoms; i ++ ) {
4036
+ if ( at[i].radical == RADICAL_DOUBLET ) {
4037
+ cur_found = 1;
4038
+ for ( j = 0; j < at[i].valence; j ++ ) {
4039
+ neigh = at[i].neighbor[j];
4040
+ if ( at[neigh].radical == RADICAL_DOUBLET ) {
4041
+ cur_found ++;
4042
+ }
4043
+ }
4044
+ if ( cur_found >= 3 ) {
4045
+ nNumFound ++;
4046
+ at[i].radical = 0;
4047
+ nNumFound += FixNextRadicals( i, at );
4048
+ }
4049
+ }
4050
+ }
4051
+ return nNumFound;
4052
+ }
4053
+ #endif
3848
4054
 
3849
4055
  #ifdef INCHI_ANSI_ONLY
3850
4056
  #ifndef INCHI_LIBRARY