pychnosz 1.1.11__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. pychnosz/__init__.py +129 -0
  2. pychnosz/biomolecules/__init__.py +29 -0
  3. pychnosz/biomolecules/ionize_aa.py +197 -0
  4. pychnosz/biomolecules/proteins.py +595 -0
  5. pychnosz/core/__init__.py +46 -0
  6. pychnosz/core/affinity.py +1256 -0
  7. pychnosz/core/animation.py +593 -0
  8. pychnosz/core/balance.py +334 -0
  9. pychnosz/core/basis.py +716 -0
  10. pychnosz/core/diagram.py +3336 -0
  11. pychnosz/core/equilibrate.py +813 -0
  12. pychnosz/core/equilibrium.py +554 -0
  13. pychnosz/core/info.py +821 -0
  14. pychnosz/core/retrieve.py +364 -0
  15. pychnosz/core/speciation.py +580 -0
  16. pychnosz/core/species.py +599 -0
  17. pychnosz/core/subcrt.py +1696 -0
  18. pychnosz/core/thermo.py +593 -0
  19. pychnosz/core/unicurve.py +1226 -0
  20. pychnosz/data/__init__.py +11 -0
  21. pychnosz/data/add_obigt.py +327 -0
  22. pychnosz/data/extdata/Berman/BDat17_2017.csv +2 -0
  23. pychnosz/data/extdata/Berman/Ber88_1988.csv +68 -0
  24. pychnosz/data/extdata/Berman/Ber90_1990.csv +5 -0
  25. pychnosz/data/extdata/Berman/DS10_2010.csv +6 -0
  26. pychnosz/data/extdata/Berman/FDM+14_2014.csv +2 -0
  27. pychnosz/data/extdata/Berman/Got04_2004.csv +5 -0
  28. pychnosz/data/extdata/Berman/JUN92_1992.csv +3 -0
  29. pychnosz/data/extdata/Berman/SHD91_1991.csv +12 -0
  30. pychnosz/data/extdata/Berman/VGT92_1992.csv +2 -0
  31. pychnosz/data/extdata/Berman/VPT01_2001.csv +3 -0
  32. pychnosz/data/extdata/Berman/VPV05_2005.csv +2 -0
  33. pychnosz/data/extdata/Berman/ZS92_1992.csv +11 -0
  34. pychnosz/data/extdata/Berman/sympy.R +99 -0
  35. pychnosz/data/extdata/Berman/testing/BA96.bib +12 -0
  36. pychnosz/data/extdata/Berman/testing/BA96_Berman.csv +21 -0
  37. pychnosz/data/extdata/Berman/testing/BA96_OBIGT.csv +21 -0
  38. pychnosz/data/extdata/Berman/testing/BA96_refs.csv +6 -0
  39. pychnosz/data/extdata/OBIGT/AD.csv +25 -0
  40. pychnosz/data/extdata/OBIGT/Berman_cr.csv +93 -0
  41. pychnosz/data/extdata/OBIGT/DEW.csv +211 -0
  42. pychnosz/data/extdata/OBIGT/H2O_aq.csv +4 -0
  43. pychnosz/data/extdata/OBIGT/SLOP98.csv +411 -0
  44. pychnosz/data/extdata/OBIGT/SUPCRT92.csv +178 -0
  45. pychnosz/data/extdata/OBIGT/inorganic_aq.csv +729 -0
  46. pychnosz/data/extdata/OBIGT/inorganic_cr.csv +273 -0
  47. pychnosz/data/extdata/OBIGT/inorganic_gas.csv +20 -0
  48. pychnosz/data/extdata/OBIGT/organic_aq.csv +1104 -0
  49. pychnosz/data/extdata/OBIGT/organic_cr.csv +481 -0
  50. pychnosz/data/extdata/OBIGT/organic_gas.csv +268 -0
  51. pychnosz/data/extdata/OBIGT/organic_liq.csv +533 -0
  52. pychnosz/data/extdata/OBIGT/testing/GEMSFIT.csv +43 -0
  53. pychnosz/data/extdata/OBIGT/testing/IGEM.csv +17 -0
  54. pychnosz/data/extdata/OBIGT/testing/Sandia.csv +8 -0
  55. pychnosz/data/extdata/OBIGT/testing/SiO2.csv +4 -0
  56. pychnosz/data/extdata/misc/AD03_Fig1a.csv +69 -0
  57. pychnosz/data/extdata/misc/AD03_Fig1b.csv +43 -0
  58. pychnosz/data/extdata/misc/AD03_Fig1c.csv +89 -0
  59. pychnosz/data/extdata/misc/AD03_Fig1d.csv +30 -0
  60. pychnosz/data/extdata/misc/BZA10.csv +5 -0
  61. pychnosz/data/extdata/misc/HW97_Cp.csv +90 -0
  62. pychnosz/data/extdata/misc/HWM96_V.csv +229 -0
  63. pychnosz/data/extdata/misc/LA19_test.csv +7 -0
  64. pychnosz/data/extdata/misc/Mer75_Table4.csv +42 -0
  65. pychnosz/data/extdata/misc/OBIGT_check.csv +423 -0
  66. pychnosz/data/extdata/misc/PM90.csv +7 -0
  67. pychnosz/data/extdata/misc/RH95.csv +23 -0
  68. pychnosz/data/extdata/misc/RH98_Table15.csv +17 -0
  69. pychnosz/data/extdata/misc/SC10_Rainbow.csv +19 -0
  70. pychnosz/data/extdata/misc/SK95.csv +55 -0
  71. pychnosz/data/extdata/misc/SOJSH.csv +61 -0
  72. pychnosz/data/extdata/misc/SS98_Fig5a.csv +81 -0
  73. pychnosz/data/extdata/misc/SS98_Fig5b.csv +84 -0
  74. pychnosz/data/extdata/misc/TKSS14_Fig2.csv +25 -0
  75. pychnosz/data/extdata/misc/bluered.txt +1000 -0
  76. pychnosz/data/extdata/protein/Cas/Cas_aa.csv +177 -0
  77. pychnosz/data/extdata/protein/Cas/Cas_uniprot.csv +186 -0
  78. pychnosz/data/extdata/protein/Cas/download.R +34 -0
  79. pychnosz/data/extdata/protein/Cas/mkaa.R +34 -0
  80. pychnosz/data/extdata/protein/POLG.csv +12 -0
  81. pychnosz/data/extdata/protein/TBD+05.csv +393 -0
  82. pychnosz/data/extdata/protein/TBD+05_aa.csv +393 -0
  83. pychnosz/data/extdata/protein/rubisco.csv +28 -0
  84. pychnosz/data/extdata/protein/rubisco.fasta +239 -0
  85. pychnosz/data/extdata/protein/rubisco_aa.csv +28 -0
  86. pychnosz/data/extdata/src/H2O92D.f.orig +3457 -0
  87. pychnosz/data/extdata/src/README.txt +5 -0
  88. pychnosz/data/extdata/taxonomy/names.dmp +215 -0
  89. pychnosz/data/extdata/taxonomy/nodes.dmp +63 -0
  90. pychnosz/data/extdata/thermo/Bdot_acirc.csv +60 -0
  91. pychnosz/data/extdata/thermo/buffer.csv +40 -0
  92. pychnosz/data/extdata/thermo/element.csv +135 -0
  93. pychnosz/data/extdata/thermo/groups.csv +6 -0
  94. pychnosz/data/extdata/thermo/opt.csv +2 -0
  95. pychnosz/data/extdata/thermo/protein.csv +506 -0
  96. pychnosz/data/extdata/thermo/refs.csv +343 -0
  97. pychnosz/data/extdata/thermo/stoich.csv.xz +0 -0
  98. pychnosz/data/loader.py +431 -0
  99. pychnosz/data/mod_obigt.py +322 -0
  100. pychnosz/data/obigt.py +471 -0
  101. pychnosz/data/worm.py +228 -0
  102. pychnosz/fortran/__init__.py +16 -0
  103. pychnosz/fortran/h2o92.dll +0 -0
  104. pychnosz/fortran/h2o92_interface.py +527 -0
  105. pychnosz/geochemistry/__init__.py +21 -0
  106. pychnosz/geochemistry/minerals.py +514 -0
  107. pychnosz/geochemistry/redox.py +500 -0
  108. pychnosz/models/__init__.py +47 -0
  109. pychnosz/models/archer_wang.py +165 -0
  110. pychnosz/models/berman.py +309 -0
  111. pychnosz/models/cgl.py +381 -0
  112. pychnosz/models/dew.py +997 -0
  113. pychnosz/models/hkf.py +523 -0
  114. pychnosz/models/hkf_helpers.py +231 -0
  115. pychnosz/models/iapws95.py +1113 -0
  116. pychnosz/models/supcrt92_fortran.py +238 -0
  117. pychnosz/models/water.py +480 -0
  118. pychnosz/utils/__init__.py +27 -0
  119. pychnosz/utils/expression.py +1074 -0
  120. pychnosz/utils/formula.py +830 -0
  121. pychnosz/utils/formula_ox.py +227 -0
  122. pychnosz/utils/reset.py +33 -0
  123. pychnosz/utils/units.py +259 -0
  124. pychnosz-1.1.11.dist-info/METADATA +197 -0
  125. pychnosz-1.1.11.dist-info/RECORD +128 -0
  126. pychnosz-1.1.11.dist-info/WHEEL +5 -0
  127. pychnosz-1.1.11.dist-info/licenses/LICENSE.txt +19 -0
  128. pychnosz-1.1.11.dist-info/top_level.txt +1 -0
@@ -0,0 +1,177 @@
1
+ "protein","organism","ref","abbrv","chains","Ala","Cys","Asp","Glu","Phe","Gly","His","Ile","Lys","Leu","Met","Asn","Pro","Gln","Arg","Ser","Thr","Val","Trp","Tyr"
2
+ "Cas6",ARCFU,O28420,Y1859,1,11,5,9,21,18,26,7,21,17,31,4,8,14,5,22,25,10,18,1,16
3
+ "Cas11",ARCFU,O28409,Y1870,1,12,0,7,14,3,7,1,4,14,15,2,2,1,3,5,9,4,8,0,5
4
+ "Cas7",ARCFU,O28408,O28408,1,22,3,12,31,21,25,3,14,25,17,7,13,13,4,15,21,17,39,2,15
5
+ "Cas5",ARCFU,O28407,O28407,1,18,3,11,23,12,21,1,22,23,22,3,7,14,5,12,21,10,22,5,11
6
+ "Cas8a1",ARCFU,O28406,Y1873,1,21,0,17,31,22,19,4,27,35,55,5,18,10,15,18,33,21,18,2,19
7
+ "Cas3'",ARCFU,O28405,O28405,1,33,3,33,50,35,26,6,40,47,55,11,23,16,6,31,42,21,41,5,22
8
+ "Cas3''",ARCFU,O28404,O28404,1,20,2,7,25,16,13,6,13,25,26,5,12,3,1,16,16,7,14,1,13
9
+ "Cas2",ARCFU,O28403,CAS2A,1,5,3,7,6,5,6,1,6,5,10,1,3,2,4,9,6,2,11,0,2
10
+ "Cas4",ARCFU,O28402,O28402,1,12,4,7,17,8,13,0,11,21,22,4,2,5,5,15,11,5,15,1,11
11
+ "Cas1",ARCFU,O28401,CAS1A,1,30,4,20,25,9,24,5,24,28,45,7,14,7,9,30,18,4,27,2,13
12
+ "Cas4",ARCFU,O28400,O28400,1,15,9,11,27,11,12,6,14,14,28,9,15,15,3,17,22,9,24,5,15
13
+ "Cas6",CLOK5,A5N0X4,A5N0X4,1,6,4,12,21,12,9,3,26,24,18,6,20,6,6,10,12,15,8,1,17
14
+ "Cas8b1",CLOK5,A5N0X3,A5N0X3,1,15,4,37,51,31,22,4,49,79,51,11,53,11,10,19,48,17,27,4,46
15
+ "Cas7",CLOK5,A5N0X2,A5N0X2,1,17,2,23,23,17,21,2,25,36,21,5,21,8,10,6,19,20,14,2,17
16
+ "Cas5",CLOK5,A5N0X1,A5N0X1,1,11,0,12,22,16,11,6,14,25,22,6,20,6,8,8,10,10,17,3,19
17
+ "Cas3",CLOK5,A5N0X0,A5N0X0,1,20,8,57,79,55,27,12,89,92,69,23,86,11,17,31,61,26,39,4,54
18
+ "Cas4",CLOK5,A5N0W9,A5N0W9,1,4,5,9,20,3,8,4,19,25,13,3,10,2,6,5,3,5,8,2,10
19
+ "Cas1",CLOK5,A5N0W8,A5N0W8,1,11,2,16,23,21,15,10,29,36,27,8,24,10,6,17,20,9,24,2,24
20
+ "Cas2",CLOK5,A5N0W7,A5N0W7,1,2,2,5,6,9,3,3,9,13,7,1,11,1,2,2,4,4,7,0,3
21
+ "Cas3",HALH5,Q9KFY4,Q9KFY4,1,55,11,43,73,34,40,28,44,54,83,16,37,20,43,36,47,44,53,5,34
22
+ "Cas5",HALH5,Q9KFY3,Q9KFY3,1,11,3,14,18,13,24,9,12,13,19,6,8,14,12,12,9,8,15,2,14
23
+ "Cas8c",HALH5,Q9KFY2,Q9KFY2,1,48,6,45,53,24,34,14,32,51,55,9,28,23,19,39,42,32,32,12,29
24
+ "Cas7",HALH5,Q9KFY1,Q9KFY1,1,18,1,26,16,12,18,7,18,24,17,7,13,9,8,15,29,14,22,3,6
25
+ "Cas4",HALH5,Q9KFY0,Q9KFY0,1,7,6,8,14,5,10,5,10,20,12,8,6,8,8,10,11,4,15,0,7
26
+ "Cas1",HALH5,Q9KFX9,Q9KFX9,1,26,2,16,24,15,22,5,11,23,44,13,18,11,15,24,16,18,21,5,14
27
+ "Cas2",HALH5,Q9KFX8,CAS2,1,3,2,6,6,2,5,1,7,8,12,2,4,2,5,5,8,7,7,0,4
28
+ "Cas3",GEOSL,Q74H42,Q74H42,1,93,11,74,63,32,60,22,30,39,99,15,18,55,27,78,54,45,71,16,19
29
+ "Cas8u2",GEOSL,Q74H41,Q74H41,1,23,6,20,16,23,19,2,10,18,36,7,8,18,10,21,22,11,13,8,5
30
+ "Cas7",GEOSL,Q74H40,Q74H40,1,42,3,26,20,13,30,6,16,23,36,3,18,19,12,23,14,17,31,2,9
31
+ "Cas5-Cas6",GEOSL,Q74H39,Q74H39,1,59,4,33,24,15,47,18,32,18,48,12,15,39,16,45,34,17,35,14,14
32
+ "Cas4-Cas1",GEOSL,Q74H36,CS4F1,1,48,10,27,40,22,49,14,23,11,56,16,13,35,13,59,31,29,38,6,19
33
+ "Cas2",GEOSL,Q74H35,CAS2,1,5,2,5,5,4,4,4,11,5,9,3,4,3,4,9,4,2,8,2,2
34
+ "Cas3'",RIPO1,B7JVN0,B7JVN0,1,44,11,31,52,33,36,13,41,41,92,14,32,23,49,45,42,32,35,10,25
35
+ "Cas3''-Cas10d",RIPO1,B7JVM9,B7JVM9,1,70,7,53,67,38,44,17,65,58,134,13,40,39,66,46,69,50,44,21,33
36
+ "Cas7",RIPO1,B7JVM8,B7JVM8,1,17,6,18,27,17,26,8,22,23,33,8,19,12,18,18,30,29,13,0,12
37
+ "Cas5",RIPO1,B7JVM7,B7JVM7,1,17,4,8,9,9,15,8,16,16,28,3,19,17,17,8,18,11,9,5,21
38
+ "Cas6",RIPO1,B7JVM6,B7JVM6,1,16,3,6,12,14,17,7,17,16,40,6,6,15,26,8,22,13,12,4,9
39
+ "Cas4",RIPO1,B7JVM5,B7JVM5,1,9,7,9,17,5,7,10,18,13,17,5,12,5,10,15,11,7,8,3,8
40
+ "Cas1",RIPO1,B7JVM4,B7JVM4,1,21,3,12,19,13,19,11,15,21,44,3,12,15,21,18,21,14,22,2,19
41
+ "Cas2",RIPO1,B7JVM3,B7JVM3,1,5,2,4,5,4,5,0,6,6,14,2,6,3,5,6,5,7,5,1,6
42
+ "Cas3",ECOLI,P38036,CAS3,1,71,17,56,51,39,52,26,43,36,100,16,41,39,54,43,59,38,56,25,26
43
+ "Cas8e",ECOLI,Q46901,CSE1,1,37,11,18,27,25,39,14,30,25,51,9,32,29,29,25,25,25,33,8,10
44
+ "Cas11",ECOLI,P76632,CSE2,1,15,2,10,9,5,7,3,8,5,19,6,7,6,12,19,6,7,7,5,2
45
+ "Cas7",ECOLI,Q46899,CASC,1,42,2,25,18,13,23,8,21,21,31,14,18,8,24,17,27,14,25,5,7
46
+ "Cas5",ECOLI,Q46898,CAS5,1,14,5,12,13,7,18,6,8,6,28,5,1,13,10,19,16,18,12,4,9
47
+ "Cas6",ECOLI,Q46897,CAS6,1,16,4,9,8,7,13,5,13,13,23,3,7,12,14,13,10,7,15,3,4
48
+ "Cas1",ECOLI,Q46896,CAS1,1,35,4,18,15,9,25,4,22,14,29,4,4,22,10,23,15,13,26,4,9
49
+ "Cas2",ECOLI,P45956,CAS2,1,7,0,3,8,4,8,0,4,1,11,4,4,4,2,8,3,6,13,3,1
50
+ "Cas1",A0A0H3B3U5,A0A0H3B3U5,NA,1,33,5,16,18,15,25,9,18,10,35,6,15,7,17,20,17,19,24,6,11
51
+ "Cas2-Cas3",A0A0H3B287,A0A0H3B287,NA,1,90,15,71,49,34,55,44,44,53,131,20,48,48,77,79,67,55,62,27,26
52
+ "Cas8f1",A0A0H3B0Q7,A0A0H3B0Q7,NA,1,48,4,25,32,17,21,10,18,29,51,5,21,17,33,23,31,14,24,13,12
53
+ "Cas5f1",A0A0H3B1X5,A0A0H3B1X5,NA,1,30,7,14,18,8,24,14,15,8,40,4,12,17,12,20,18,21,17,4,13
54
+ "Cas7f1",A0A0H3B2X2,A0A0H3B2X2,NA,1,35,2,18,19,15,22,2,16,21,30,6,18,11,21,15,24,19,24,5,11
55
+ "Cas6f",A0A0H3B3V0,A0A0H3B3V0,NA,1,14,3,9,10,8,12,5,7,8,26,1,1,9,10,17,17,10,10,3,4
56
+ "TnsA",UPI000637FC9D,UPI000637FC9D,NA,1,9,5,11,11,13,7,9,12,16,21,4,8,6,14,9,17,12,11,2,10
57
+ "Cas8f3-Cas5f3",UPI00063793EA,UPI00063793EA,NA,1,49,10,35,45,28,26,23,36,41,81,9,29,29,34,40,75,29,43,13,20
58
+ "Cas7f3",UPI000633A6D2,UPI000633A6D2,NA,1,26,8,18,28,10,24,5,15,21,34,6,16,12,11,20,22,17,21,8,21
59
+ "Cas6f",UPI000631609A,UPI000631609A,NA,1,12,3,8,14,13,12,2,12,7,11,8,9,7,9,15,21,12,20,1,7
60
+ "Cas1",SHEPC,A4Y6F9,A4Y6F9,1,37,5,18,18,19,19,9,14,17,35,9,13,8,24,14,10,22,16,6,12
61
+ "Cas2-Cas3",SHEPC,A4Y6G0,A4Y6G0,1,79,13,61,60,48,41,27,61,73,98,13,58,35,49,37,65,57,63,10,30
62
+ "Cas7f2",SHEPC,A4Y6G1,A4Y6G1,1,20,3,21,19,15,23,8,22,24,26,5,18,8,16,13,20,20,17,1,16
63
+ "Cas5f2",SHEPC,A4Y6G2,A4Y6G2,1,16,4,17,22,17,24,0,28,32,31,10,21,9,15,11,28,19,16,3,13
64
+ "Cas6f",SHEPC,A4Y6G3,A4Y6G3,1,11,1,13,6,11,13,6,9,16,21,4,4,7,8,9,18,6,11,2,7
65
+ "DinG",THISK,D3SGE4,D3SGE4,1,102,4,43,54,16,61,16,23,9,100,10,14,44,34,81,50,37,43,11,14
66
+ "Cas6",THISK,D3SGE5,D3SGE5,1,24,2,9,18,8,22,9,14,9,21,5,1,21,9,20,12,9,12,3,5
67
+ "Cas8-like",THISK,D3SGE6,D3SGE6,1,30,5,13,19,11,22,3,5,6,27,5,4,20,14,25,6,11,14,7,5
68
+ "Cas7",THISK,D3SGE7,D3SGE7,1,35,3,26,25,11,29,5,15,8,29,8,10,17,11,32,25,26,22,6,6
69
+ "Cas5",THISK,D3SGE8,D3SGE8,1,20,1,11,18,4,20,2,12,4,19,8,6,14,11,22,13,11,9,9,4
70
+ "cysH-like",RHOJR,Q0RWS5,Q0RWS5,1,21,3,20,17,7,23,17,11,5,34,3,8,19,8,26,12,20,14,8,8
71
+ "Cas8-like",RHOJR,Q0RWS4,Q0RWS4,1,37,7,10,10,6,16,5,5,8,19,5,0,19,5,26,11,18,27,8,3
72
+ "Cas11",RHOJR,Q0RWS3,Q0RWS3,1,14,0,6,10,0,9,2,3,3,19,1,2,9,2,17,9,13,9,6,4
73
+ "Cas7",RHOJR,Q0RWS2,Q0RWS2,1,16,2,18,21,12,34,2,12,6,29,6,3,15,7,33,21,21,24,4,3
74
+ "Cas5",RHOJR,Q0RWS1,Q0RWS1,1,29,5,14,13,4,23,13,6,4,28,6,0,21,3,21,12,16,10,9,4
75
+ "Cas11",A0A3M1DV11,A0A3M1DV11,NA,1,61,10,42,35,31,32,16,32,27,78,16,23,32,28,54,47,39,52,6,14
76
+ "Cas7",A0A3M1DW31,A0A3M1DW31,NA,1,27,3,13,28,10,28,3,21,14,38,12,10,21,18,20,18,26,21,3,9
77
+ "Cas5",A0A3M1DYX3,A0A3M1DYX3,NA,1,25,5,19,12,13,26,6,17,10,38,3,14,29,18,27,14,20,22,7,14
78
+ "Cas6",STAEQ,Q5HK95,Q5HK95,1,3,1,12,14,20,9,5,23,23,27,10,14,7,13,8,19,8,14,1,13
79
+ "Cas10",STAEQ,Q5HK89,Q5HK89,1,37,8,52,55,38,39,16,61,63,79,18,57,8,26,26,66,34,22,9,43
80
+ "Cas11",STAEQ,Q5HK90,Q5HK90,1,6,1,7,14,10,3,1,6,19,12,3,8,1,4,5,5,5,9,0,9
81
+ "Cas7",STAEQ,Q5HK91,Q5HK91,1,9,0,13,19,11,20,5,20,16,14,4,13,4,9,12,15,13,12,0,5
82
+ "Cas5",STAEQ,Q5HK92,Q5HK92,1,17,0,16,22,19,19,8,18,27,34,5,19,9,10,5,26,19,12,2,17
83
+ "Cas7",STAEQ,Q5HK93,Q5HK93,1,8,2,16,20,13,22,4,25,44,26,7,28,12,18,11,22,17,20,2,23
84
+ "csm6",STAEQ,Q5HK94,Q5HK94,1,12,4,25,44,16,12,9,41,43,46,7,37,13,9,17,23,16,23,4,21
85
+ "Cas1",STAEQ,Q5HK87,Q5HK87,1,8,5,16,21,20,11,8,35,33,25,5,24,5,8,14,19,8,21,1,14
86
+ "Cas2",STAEQ,Q5HK88,Q5HK88,1,2,0,5,8,4,3,0,11,11,15,2,4,4,4,6,7,4,6,0,5
87
+ "Cas10",SYNY3,Q6ZED1,Q6ZED1,1,55,6,39,38,26,31,10,36,43,58,2,32,16,22,26,39,26,30,4,19
88
+ "Cas7-Cas5",SYNY3,Q6ZED2,Q6ZED2,1,56,7,44,45,19,73,10,57,55,82,7,45,35,54,43,47,46,34,14,18
89
+ "Cas11",SYNY3,Q6ZED5,Q6ZED5,1,48,6,53,50,29,66,22,44,46,61,8,43,47,39,55,46,40,42,13,37
90
+ "Cas7-Cas7",SYNY3,Q6ZED3,Q6ZED3,1,48,5,36,24,12,47,7,37,35,62,9,26,25,23,20,26,30,29,10,11
91
+ "Csx19",SYNY3,Q6ZED4,Q6ZED4,1,13,4,12,16,8,13,4,10,13,26,2,14,8,7,8,9,6,10,3,7
92
+ "TPR+Caspase",A0A0B0EKL4,A0A0B0EKL4,NA,1,43,14,48,58,33,35,16,45,70,71,12,42,24,21,34,46,35,32,13,24
93
+ "Cas7(3),Cas11",A0A0B0EGF3,A0A0B0EGF3,NA,1,62,27,120,138,76,135,39,112,160,150,17,100,76,41,110,107,86,73,28,60
94
+ "Cas1",A0A0B0EEW2,A0A0B0EEW2,NA,1,39,8,40,47,34,67,16,52,68,84,11,31,22,23,54,54,31,48,11,28
95
+ "Cas2",A0A0B0ELU0,A0A0B0ELU0,NA,1,2,3,7,3,5,3,0,8,13,5,2,6,1,4,7,3,6,10,0,3
96
+ "Cas10",PSELT,A8F3D4,A8F3D4,1,25,10,37,62,37,28,16,50,61,62,5,39,25,27,33,40,23,37,15,28
97
+ "Cas5",PSELT,A8F3D5,A8F3D5,1,7,1,11,21,14,17,2,19,27,20,2,5,9,3,5,13,9,13,0,10
98
+ "Cas11",PSELT,A8F3D6,A8F3D6,1,4,1,10,22,9,8,1,14,18,19,2,15,5,6,15,12,9,9,5,10
99
+ "Cas7",PSELT,A8F3D7,A8F3D7,1,22,4,12,30,13,26,4,29,31,20,4,12,15,9,11,11,9,15,2,3
100
+ "Cas7",METTH,O26428,O26428,1,10,9,11,24,12,26,3,17,17,26,4,13,11,5,24,13,9,7,3,7
101
+ "Cas7",METTH,O26427,O26427,1,9,1,16,20,19,25,4,14,23,26,9,12,15,4,12,14,13,10,2,13
102
+ "Cas10",METTH,O26426,O26426,1,45,11,54,93,55,43,20,89,103,104,10,61,37,16,52,50,38,30,15,31
103
+ "Cas7",METTH,O26425,O26425,1,19,1,16,34,13,24,2,19,20,21,2,14,13,3,19,13,16,23,4,11
104
+ "Cas11",METTH,O26424,O26424,1,8,3,8,11,5,3,2,5,11,16,4,3,2,3,11,6,2,5,1,4
105
+ "Cas5",METTH,O26423,O26423,1,12,1,19,19,18,25,8,25,26,19,5,15,14,5,12,20,20,16,5,16
106
+ "Cas7",PYRFU,Q8U1S5,CMR1A,1,10,0,11,24,22,26,4,21,27,32,3,12,14,5,28,35,17,27,10,10
107
+ "Cas10",PYRFU,Q8U1S6,CMR2,1,44,4,52,91,37,46,17,61,97,91,10,32,30,14,45,54,22,71,15,38
108
+ "Cas5",PYRFU,Q8U1S7,CMR3,1,12,2,10,36,17,30,1,28,36,32,3,6,19,2,14,17,15,25,3,14
109
+ "Cas7",PYRFU,Q8U1S9,CMR4,1,18,0,14,27,11,26,4,26,25,29,3,11,12,8,11,11,21,27,2,9
110
+ "Cas11",PYRFU,Q8U1T0,CMR5,1,13,0,8,17,3,5,3,8,18,24,5,10,4,5,9,10,5,11,3,8
111
+ "Cas6",PYRFU,Q8U1S4,CAS6,1,12,1,10,24,18,18,5,12,29,24,7,10,16,5,12,11,11,20,2,17
112
+ "Cas7",PYRFU,Q8U1T1,CMR6,1,15,2,16,27,15,23,3,24,33,38,4,13,22,10,16,17,14,27,1,20
113
+ "Cas9",A0AAN5PKD3,A0AAN5PKD3,NA,1,64,11,68,99,70,46,44,111,142,137,22,94,65,73,56,103,64,47,15,41
114
+ "Cas1",A0A2U8U9X9,A0A2U8U9X9,NA,1,28,5,7,19,14,19,7,25,31,36,8,15,8,15,18,22,16,17,7,13
115
+ "Cas2",A0A4T1YC98,A0A4T1YC98,NA,1,3,1,6,9,3,3,1,10,9,12,1,5,4,4,7,3,5,5,1,7
116
+ "Cas4",A0A2U8U9S0,A0A2U8U9S0,NA,1,8,8,6,15,9,11,4,20,20,16,6,5,9,11,7,11,9,9,1,12
117
+ "Cas9",STRTR,G3ECR1,CAS9,1,64,4,99,105,74,72,25,110,159,152,16,90,34,40,75,100,53,61,7,69
118
+ "Cas1",STRTR,G3ECR2,CAS1,1,14,5,14,20,17,10,8,27,17,29,8,21,6,10,15,19,15,16,3,15
119
+ "Cas2",STRTR,G3ECR3,CAS2,1,7,1,6,7,7,5,2,6,9,11,8,8,2,2,8,6,8,4,0,7
120
+ "csn2",STRTR,G3ECR4,CSN2,1,4,2,16,21,11,4,1,28,14,31,4,9,4,11,6,17,10,16,0,10
121
+ "Cas9",NEIL0,E4ZF34,E4ZF34,1,87,9,60,90,50,61,24,46,102,109,18,49,44,42,90,46,50,64,11,30
122
+ "Cas1",NEIL0,E4ZF35,E4ZF35,1,31,2,11,19,11,14,9,19,16,48,2,16,12,22,18,11,18,11,4,10
123
+ "Cas2",NEIL0,E4ZF36,E4ZF36,1,8,2,4,6,6,4,2,6,11,16,4,4,2,9,7,6,3,5,0,3
124
+ "Cas9",A0A1L9GUT8,A0A1L9GUT8,NA,1,62,7,55,71,32,44,25,63,111,80,19,44,38,31,74,50,48,50,11,34
125
+ "Cas4",A0A1L9GUY1,A0A1L9GUY1,NA,1,4,3,10,11,8,7,0,8,14,16,3,4,5,3,11,11,7,9,0,9
126
+ "Cas2",A0A1L9GUR0,A0A1L9GUR0,NA,1,7,2,6,3,6,4,4,11,7,10,2,5,1,3,7,7,6,6,0,4
127
+ "Cas1",A0A1L9GUR2,A0A1L9GUR2,NA,1,29,3,13,16,11,18,2,28,26,37,13,19,10,13,23,33,11,20,4,11
128
+ "Cas12a",FRATN,A0Q7Q2,CS12A,1,63,9,99,97,87,53,19,106,173,109,13,100,28,54,35,79,51,50,8,67
129
+ "Cas4",FRATN,A0Q7Q1,A0Q7Q1,1,7,4,14,6,6,8,5,16,21,9,4,8,3,8,4,11,8,6,0,15
130
+ "Cas1",FRATN,A0Q7Q0,A0Q7Q0,1,12,8,17,23,28,15,4,24,34,30,7,19,6,15,16,20,12,25,3,16
131
+ "Cas2",FRATN,A0Q7P9,A0Q7P9,1,6,0,6,6,7,2,1,7,7,9,2,6,2,2,4,6,1,9,0,7
132
+ "Cas12e",A0A357BT59,A0A357BT59,NA,1,71,11,57,77,38,66,14,44,101,96,17,49,35,37,65,53,47,53,19,36
133
+ "Cas4",A0A357BV01,A0A357BV01,NA,1,13,7,8,18,10,7,6,15,5,15,6,8,8,7,18,9,12,13,2,7
134
+ "Cas1",A0A357BT84,A0A357BT84,NA,1,15,3,22,23,15,18,7,29,29,31,7,10,14,14,22,17,17,17,4,14
135
+ "Cas2",A0A357BT86,A0A357BT86,NA,1,7,1,7,9,4,5,1,7,6,11,2,2,1,1,6,6,1,7,2,6
136
+ "Cas12b1",ALIAG,T0D7A2,CS12B,1,88,11,69,98,45,73,29,40,70,108,21,41,44,57,111,60,35,72,25,32
137
+ "Cas4-Cas1",A0A9E6ZHP2,A0A9E6ZHP2,NA,1,36,6,36,32,18,43,13,28,22,61,15,16,25,18,50,33,27,36,5,16
138
+ "Cas2",ALIAG,T0C2X2,T0C2X2,1,5,2,8,3,4,5,2,6,5,10,4,1,1,8,9,5,2,11,1,5
139
+ "Cas4-Cas1",A0A1G2YUS0,A0A1G2YUS0,NA,1,67,10,33,36,22,31,11,40,24,56,17,16,38,21,40,32,26,33,4,17
140
+ "Cas2",A0A1G2YUR9,A0A1G2YUR9,NA,1,6,4,8,8,4,5,1,11,5,8,4,1,3,5,9,2,3,6,1,3
141
+ "Cas12b2",A0A1G2YUT2,A0A1G2YUT2,NA,1,43,13,37,57,29,48,22,41,94,59,13,37,32,19,58,41,28,31,17,24
142
+ "Cas12i",Ga0208225_100001036,Ga0208225_100001036,NA,1,64,14,62,74,46,53,23,63,126,98,27,72,38,39,48,78,47,60,12,49
143
+ "Cas12h",Ga0180438_100006283,Ga0180438_100006283,NA,1,59,11,53,69,34,54,21,43,78,93,16,28,35,28,61,55,31,52,12,37
144
+ "Cas1",UPI0007C2850C,UPI0007C2850C,NA,1,14,2,16,21,12,17,15,21,21,38,4,10,15,10,27,32,19,19,8,17
145
+ "Cas12c",UPI0007C30103,UPI0007C30103,NA,1,91,13,65,74,59,63,33,77,86,140,17,66,51,58,67,125,58,63,13,33
146
+ "Cas1",A0A1L9GZC2,A0A1L9GZC2,NA,1,14,3,13,21,23,25,9,24,28,29,7,9,19,11,23,28,17,16,8,20
147
+ "Cas12d",A0A1L9GZ17,A0A1L9GZ17,NA,1,49,15,60,95,55,50,12,83,124,127,14,58,33,31,65,92,36,60,17,49
148
+ "Cas1",A0A482D445,A0A482D445,NA,1,15,5,18,26,18,17,2,32,34,33,6,14,7,13,21,16,19,12,1,18
149
+ "Cas2",A0A482D3L8,A0A482D3L8,NA,1,5,4,7,8,7,3,1,9,11,6,3,5,2,2,5,11,2,6,1,2
150
+ "Cas4",A0A482D482,A0A482D482,NA,1,7,6,11,14,15,5,2,19,30,13,5,9,6,8,8,8,9,8,1,11
151
+ "Cas12f1",A0A482D2S9,A0A482D2S9,NA,1,24,5,22,38,28,28,8,40,63,37,7,34,15,22,30,37,22,20,7,20
152
+ "C2c10",A0A9W3JGX3,A0A9W3JGX3,NA,1,16,10,13,29,15,22,10,45,50,29,8,38,8,33,21,30,23,20,6,24
153
+ "Cas12f",A0A482D2T6,A0A482D2T6,NA,1,17,9,24,44,27,22,7,41,90,47,7,47,11,17,34,33,14,18,3,22
154
+ "Cas1",A0A482D3Z6,A0A482D3Z6,NA,1,11,4,17,29,18,19,2,38,34,32,5,17,8,8,17,26,13,13,1,14
155
+ "Cas2",A0A482D4I6,A0A482D4I6,NA,1,2,4,8,7,6,4,0,18,8,6,2,6,1,3,6,6,3,3,1,4
156
+ "Cas4",A0A482D3B0,A0A482D3B0,NA,1,9,4,7,15,7,4,1,14,28,19,7,11,9,7,11,5,9,8,0,16
157
+ "C2c8",RIPO1,B7K606,B7K606,1,28,11,20,31,19,23,18,23,52,56,7,39,16,28,28,27,29,14,8,17
158
+ "C2c9",A0A7D4LAR1,A0A7D4LAR1,NA,1,57,5,27,33,17,34,10,9,39,46,7,28,18,19,58,40,30,33,8,22
159
+ "Cas1",A0A2H9L0L4,A0A2H9L0L4,NA,1,30,4,15,23,11,30,7,24,22,36,7,15,9,11,28,21,5,17,4,11
160
+ "Cas2",A0A2H9KZH3,A0A2H9KZH3,NA,1,10,5,6,9,6,5,3,12,16,4,3,4,3,2,5,12,0,7,1,1
161
+ "Cas4",A0A2H9KYT0,A0A2H9KYT0,NA,1,10,8,7,19,13,14,1,11,20,11,7,3,7,6,15,8,10,9,4,10
162
+ "Cas12f3",A0A2H9KYV8,A0A2H9KYV8,NA,1,25,7,27,43,12,32,8,31,60,44,10,32,21,17,43,37,24,24,5,16
163
+ "TnsB",RIPO1,B7JWD5,B7JWD5,1,38,4,28,58,18,30,21,54,61,50,14,41,26,46,42,43,37,31,9,26
164
+ "TnsC",RIPO1,B7JWD6,B7JWD6,1,18,4,15,23,10,20,8,16,19,40,6,11,12,8,19,18,15,14,4,16
165
+ "tniQ",RIPO1,B7JWD7,B7JWD7,1,7,8,1,16,7,16,5,6,13,18,6,1,10,7,11,14,3,6,4,7
166
+ "Cas12k",RIPO1,B7JWD9,B7JWD9,1,39,9,20,30,20,20,18,49,54,77,6,45,31,73,28,43,46,16,11,25
167
+ "Cas13a",LEPSD,P0DOC6,CS13A,1,28,7,102,145,72,37,11,176,212,116,16,148,10,24,54,60,47,49,5,70
168
+ "Cas1",A0A510JKT1,A0A510JKT1,NA,1,14,3,20,27,13,5,8,36,26,28,6,25,6,13,12,22,4,17,2,17
169
+ "Cas2",A0A510JQG4,A0A510JQG4,NA,1,3,0,3,12,6,4,0,9,12,10,7,2,2,7,5,8,5,4,0,6
170
+ "Cas13d",UPI0003D9600D,UPI0003D9600D,NA,1,56,12,65,65,46,43,10,69,101,84,28,68,15,18,48,61,34,48,1,46
171
+ "Cas1",UPI0003D863A8,UPI0003D863A8,NA,1,19,7,18,24,14,19,4,24,24,42,6,14,11,10,20,28,17,18,1,20
172
+ "Cas2",UPI0003D92D81,UPI0003D92D81,NA,1,0,1,8,5,3,6,3,5,11,8,2,5,2,3,5,5,7,9,1,7
173
+ "Cas13c",UPI0004817C16,UPI0004817C16,NA,1,20,7,67,119,52,28,9,121,191,116,15,95,9,25,42,63,39,42,2,59
174
+ "Cas13b1",9BACT,E6K398,E6K398,1,52,6,61,87,70,36,28,71,118,120,30,89,41,50,66,49,52,40,14,47
175
+ "Csx28",9BACT,E6K399,E6K399,1,13,2,4,18,8,4,3,19,18,14,5,11,1,9,10,11,8,5,1,13
176
+ "Csx27",9FLAO,K1MDP1,K1MDP1,1,12,3,8,10,18,12,2,27,15,26,6,10,5,4,6,16,12,13,3,11
177
+ "Cas13b2",9FLAO,K1LVU1,K1LVU1,1,57,7,70,137,67,42,20,80,175,103,13,73,32,64,52,46,45,65,9,67
@@ -0,0 +1,186 @@
1
+ Class,Subtype,Organism,"Locus tag",Accession,Legacy,Systematic,Effector,UniProt,Protein,Note
2
+ 1,I-A,"Archaeoglobus fulgidus",AF1859,,,cas6,FALSE,O28420,Y1859_ARCFU,
3
+ 1,I-A,"Archaeoglobus fulgidus",AF1870,,csa5,cas11,TRUE,O28409,Y1870_ARCFU,
4
+ 1,I-A,"Archaeoglobus fulgidus",AF1871,,,cas7,TRUE,O28408,O28408_ARCFU,"NCBI: incomplete sequence, no link out; found by interpolating UniProt accessions"
5
+ 1,I-A,"Archaeoglobus fulgidus",AF1872,,,cas5,TRUE,O28407,O28407_ARCFU,
6
+ 1,I-A,"Archaeoglobus fulgidus",AF1873,,,cas8a1,TRUE,O28406,Y1873_ARCFU,
7
+ 1,I-A,"Archaeoglobus fulgidus",AF1874,,,cas3',FALSE,O28405,O28405_ARCFU,
8
+ 1,I-A,"Archaeoglobus fulgidus",AF1875,,,cas3'',FALSE,O28404,O28404_ARCFU,
9
+ 1,I-A,"Archaeoglobus fulgidus",AF1876,,,cas2,FALSE,O28403,CAS2A_ARCFU,
10
+ 1,I-A,"Archaeoglobus fulgidus",AF1877,,,cas4,FALSE,O28402,O28402_ARCFU,
11
+ 1,I-A,"Archaeoglobus fulgidus",AF1878,,,cas1,FALSE,O28401,CAS1A_ARCFU,
12
+ 1,I-A,"Archaeoglobus fulgidus",AF1879,,csa1,cas4,FALSE,O28400,O28400_ARCFU,
13
+ 1,I-B,"Clostridium kluyveri",CKL_2758,,,cas6,FALSE,A5N0X4,A5N0X4_CLOK5,
14
+ 1,I-B,"Clostridium kluyveri",CKL_2757,,csh1,cas8b1,TRUE,A5N0X3,A5N0X3_CLOK5,
15
+ 1,I-B,"Clostridium kluyveri",CKL_2756,,csh2,cas7,TRUE,A5N0X2,A5N0X2_CLOK5,
16
+ 1,I-B,"Clostridium kluyveri",CKL_2755,,,cas5,TRUE,A5N0X1,A5N0X1_CLOK5,
17
+ 1,I-B,"Clostridium kluyveri",CKL_2754,,,cas3,FALSE,A5N0X0,A5N0X0_CLOK5,
18
+ 1,I-B,"Clostridium kluyveri",CKL_2753,,,cas4,FALSE,A5N0W9,A5N0W9_CLOK5,
19
+ 1,I-B,"Clostridium kluyveri",CKL_2752,,,cas1,FALSE,A5N0W8,A5N0W8_CLOK5,
20
+ 1,I-B,"Clostridium kluyveri",CKL_2751,,,cas2,FALSE,A5N0W7,A5N0W7_CLOK5,
21
+ 1,I-C,"Halalkalibacterium halodurans C-125",BH0336,,,cas3,FALSE,Q9KFY4,Q9KFY4_HALH5,
22
+ 1,I-C,"Halalkalibacterium halodurans C-125",BH0337,,,cas5,TRUE,Q9KFY3,Q9KFY3_HALH5,
23
+ 1,I-C,"Halalkalibacterium halodurans C-125",BH0338,,,cas8c,TRUE,Q9KFY2,Q9KFY2_HALH5,
24
+ 1,I-C,"Halalkalibacterium halodurans C-125",BH0339,,,cas7,TRUE,Q9KFY1,Q9KFY1_HALH5,
25
+ 1,I-C,"Halalkalibacterium halodurans C-125",BH0340,,,cas4,FALSE,Q9KFY0,Q9KFY0_HALH5,
26
+ 1,I-C,"Halalkalibacterium halodurans C-125",BH0341,,,cas1,FALSE,Q9KFX9,Q9KFX9_HALH5,
27
+ 1,I-C,"Halalkalibacterium halodurans C-125",BH0342,,,cas2,FALSE,Q9KFX8,CAS2_HALH5,
28
+ 1,I-G,"Geobacter sulfurreducens",GSU0051,,,cas3,FALSE,Q74H42,Q74H42_GEOSL,
29
+ 1,I-G,"Geobacter sulfurreducens",GSU0052,,,cas8u2,TRUE,Q74H41,Q74H41_GEOSL,
30
+ 1,I-G,"Geobacter sulfurreducens",GSU0053,,csb1,cas7,TRUE,Q74H40,Q74H40_GEOSL,
31
+ 1,I-G,"Geobacter sulfurreducens",GSU0054,,csb2,cas5-cas6,TRUE,Q74H39,Q74H39_GEOSL,
32
+ 1,I-G,"Geobacter sulfurreducens",GSU0057,,,cas4-cas1,FALSE,Q74H36,CS4F1_GEOSL,
33
+ 1,I-G,"Geobacter sulfurreducens",GSU0058,,,cas2,FALSE,Q74H35,CAS2_GEOSL,
34
+ 1,I-D,"Rippkaea orientalis (strain PCC 8801 / RF-1)",Cyan8802_0527,"WP_015783167, WP_012593878",,cas3',FALSE,B7JVN0,B7JVN0_RIPO1,"RefSeq and UniProt have active records for strain 8801 (not 8802)"
35
+ 1,I-D,"Rippkaea orientalis (strain PCC 8801 / RF-1)",Cyan8802_0526,"WP_015783166, WP_012593877",,cas3''-cas10d,TRUE,B7JVM9,B7JVM9_RIPO1,
36
+ 1,I-D,"Rippkaea orientalis (strain PCC 8801 / RF-1)",Cyan8802_0525,"WP_015783165, WP_012593876",csc2,cas7,TRUE,B7JVM8,B7JVM8_RIPO1,
37
+ 1,I-D,"Rippkaea orientalis (strain PCC 8801 / RF-1)",Cyan8802_0524,"WP_015783164, WP_012593875",csc1,cas5,TRUE,B7JVM7,B7JVM7_RIPO1,
38
+ 1,I-D,"Rippkaea orientalis (strain PCC 8801 / RF-1)",Cyan8802_0523,"WP_015783163, WP_012593874",,cas6,FALSE,B7JVM6,B7JVM6_RIPO1,
39
+ 1,I-D,"Rippkaea orientalis (strain PCC 8801 / RF-1)",Cyan8802_0522,"WP_015783162, WP_012593873",,cas4,FALSE,B7JVM5,B7JVM5_RIPO1,
40
+ 1,I-D,"Rippkaea orientalis (strain PCC 8801 / RF-1)",Cyan8802_0521,"WP_015783161, WP_012593872",,cas1,FALSE,B7JVM4,B7JVM4_RIPO1,
41
+ 1,I-D,"Rippkaea orientalis (strain PCC 8801 / RF-1)",Cyan8802_0520,"WP_015783160, WP_012593871",,cas2,FALSE,B7JVM3,B7JVM3_RIPO1,
42
+ 1,I-E,"Escherichia coli K12",ygcB,,,cas3,FALSE,P38036,CAS3_ECOLI,
43
+ 1,I-E,"Escherichia coli K12","ygcL (casA)",,cse1,cas8e,TRUE,Q46901,CSE1_ECOLI,
44
+ 1,I-E,"Escherichia coli K12","ygcK (casB)",,cse2,cas11,TRUE,P76632,CSE2_ECOLI,
45
+ 1,I-E,"Escherichia coli K12","ygcJ (casC)",,,cas7,TRUE,Q46899,CASC_ECOLI,
46
+ 1,I-E,"Escherichia coli K12","ygcI (casD)",,,cas5,TRUE,Q46898,CAS5_ECOLI,
47
+ 1,I-E,"Escherichia coli K12","ygcH (casE)",,,cas6,TRUE,Q46897,CAS6_ECOLI,
48
+ 1,I-E,"Escherichia coli K12",ygbT,,,cas1,FALSE,Q46896,CAS1_ECOLI,
49
+ 1,I-E,"Escherichia coli K12",ygbF,,,cas2,FALSE,P45956,CAS2_ECOLI,
50
+ 1,I-F1,"Yersinia pseudo-tuberculosis",YPK_1644,,,cas1,FALSE,A0A0H3B3U5,A0A0H3B3U5_YERPY,
51
+ 1,I-F1,"Yersinia pseudo-tuberculosis",YPK_1645,,,cas2-cas3,FALSE,A0A0H3B287,A0A0H3B287_YERPY,
52
+ 1,I-F1,"Yersinia pseudo-tuberculosis",YPK_1646,,csy1,cas8f1,TRUE,A0A0H3B0Q7,A0A0H3B0Q7_YERPY,
53
+ 1,I-F1,"Yersinia pseudo-tuberculosis",YPK_1647,,csy2,cas5f1,TRUE,A0A0H3B1X5,A0A0H3B1X5_YERPY,
54
+ 1,I-F1,"Yersinia pseudo-tuberculosis",YPK_1648,,csy3,cas7f1,TRUE,A0A0H3B2X2,A0A0H3B2X2_YERPY,
55
+ 1,I-F1,"Yersinia pseudo-tuberculosis",YPK_1649,,csy4,cas6f,TRUE,A0A0H3B3V0,A0A0H3B3V0_YERPY,
56
+ 1,I-F3,"Vibrio crassostreae J5 20",VCR20J5_310108,CDT46946,,tnsA,FALSE,,UPI000637FC9D,"UniParc sequence"
57
+ 1,I-F3,"Vibrio crassostreae J5 20",,,,tnsB,FALSE,,,
58
+ 1,I-F3,"Vibrio crassostreae J5 20",,,,tnsC,FALSE,,,
59
+ 1,I-F3,"Vibrio crassostreae J5 20",,,,tnsD,FALSE,,,
60
+ 1,I-F3,"Vibrio crassostreae J5 20",VCR20J5_310090,CDT46719,csy2,cas8f3-cas5f3,TRUE,,UPI00063793EA,"UniParc sequence"
61
+ 1,I-F3,"Vibrio crassostreae J5 20",VCR20J5_310089,CDT46710,csy3,cas7f3,TRUE,,UPI000633A6D2,"UniParc sequence"
62
+ 1,I-F3,"Vibrio crassostreae J5 20",VCR20J5_310088,CDT46699,,cas6f,TRUE,,UPI000631609A,"UniParc sequence"
63
+ 1,I-F2,"Shewenella putrefaciens CN-32",Sputcn32_1819,,,cas1,FALSE,A4Y6F9,A4Y6F9_SHEPC,
64
+ 1,I-F2,"Shewenella putrefaciens CN-32",Sputcn32_1820,,,cas2-cas3,FALSE,A4Y6G0,A4Y6G0_SHEPC,
65
+ 1,I-F2,"Shewenella putrefaciens CN-32",Sputcn32_1821,,PBPRB1993,cas7f2,TRUE,A4Y6G1,A4Y6G1_SHEPC,
66
+ 1,I-F2,"Shewenella putrefaciens CN-32",Sputcn32_1822,,PBPRB1992,cas5f2,TRUE,A4Y6G2,A4Y6G2_SHEPC,
67
+ 1,I-F2,"Shewenella putrefaciens CN-32",Sputcn32_1823,,,cas6f,TRUE,A4Y6G3,A4Y6G3_SHEPC,
68
+ 1,IV-A,"Thioalkalivibrio sp. K90mix",TK90_2699,,csf4,dinG,FALSE,D3SGE4,D3SGE4_THISK,
69
+ 1,IV-A,"Thioalkalivibrio sp. K90mix",TK90_2700,,csf5,cas6,FALSE,D3SGE5,D3SGE5_THISK,
70
+ 1,IV-A,"Thioalkalivibrio sp. K90mix",TK90_2701,,csf1,cas8-like,TRUE,D3SGE6,D3SGE6_THISK,
71
+ 1,IV-A,"Thioalkalivibrio sp. K90mix",TK90_2702,,csf2,cas7,TRUE,D3SGE7,D3SGE7_THISK,
72
+ 1,IV-A,"Thioalkalivibrio sp. K90mix",TK90_2703,,csf3,cas5,TRUE,D3SGE8,D3SGE8_THISK,
73
+ 1,IV-B,"Rhodococcus jostii RHA1",RHA1_ro10068,,,cysH-like,FALSE,Q0RWS5,Q0RWS5_RHOJR,
74
+ 1,IV-B,"Rhodococcus jostii RHA1",RHA1_ro10069,,csf1,cas8-like,TRUE,Q0RWS4,Q0RWS4_RHOJR,
75
+ 1,IV-B,"Rhodococcus jostii RHA1",RHA1_ro10070,,,cas11,TRUE,Q0RWS3,Q0RWS3_RHOJR,
76
+ 1,IV-B,"Rhodococcus jostii RHA1",RHA1_ro10071,,csf2,cas7,TRUE,Q0RWS2,Q0RWS2_RHOJR,"Pfam: RAMP superfamily protein"
77
+ 1,IV-B,"Rhodococcus jostii RHA1",RHA1_ro10072,,csf3,cas5,TRUE,Q0RWS1,Q0RWS1_RHOJR,
78
+ 1,IV-C,"Thermoflexia bacterium",,,,LS,TRUE,,,
79
+ 1,IV-C,"Thermoflexia bacterium",D6793_05715,,,cas11,TRUE,A0A3M1DV11,A0A3M1DV11_9CHLR,
80
+ 1,IV-C,"Thermoflexia bacterium",D6793_05705,,csf2,cas7,TRUE,A0A3M1DW31,A0A3M1DW31_9CHLR,
81
+ 1,IV-C,"Thermoflexia bacterium",D6793_05700,,,cas5,TRUE,A0A3M1DYX3,A0A3M1DYX3_9CHLR,"UniProt: Type III-B CRISPR module RAMP protein Cmr6"
82
+ 1,III-A,"Staphylococcus epidermidis",SERP2455,,,cas6,FALSE,Q5HK95,Q5HK95_STAEQ,
83
+ 1,III-A,"Staphylococcus epidermidis",SERP2461,,,cas10,TRUE,Q5HK89,Q5HK89_STAEQ,
84
+ 1,III-A,"Staphylococcus epidermidis",SERP2460,,csm2,cas11,TRUE,Q5HK90,Q5HK90_STAEQ,
85
+ 1,III-A,"Staphylococcus epidermidis",SERP2459,,csm3,cas7,TRUE,Q5HK91,Q5HK91_STAEQ,"Pfam: RAMPs 1 hit"
86
+ 1,III-A,"Staphylococcus epidermidis",SERP2458,,csm4,cas5,TRUE,Q5HK92,Q5HK92_STAEQ,
87
+ 1,III-A,"Staphylococcus epidermidis",SERP2457,,csm5,cas7,TRUE,Q5HK93,Q5HK93_STAEQ,
88
+ 1,III-A,"Staphylococcus epidermidis",SERP2456,,,csm6,FALSE,Q5HK94,Q5HK94_STAEQ,
89
+ 1,III-A,"Staphylococcus epidermidis",SERP2463,,,cas1,FALSE,Q5HK87,Q5HK87_STAEQ,
90
+ 1,III-A,"Staphylococcus epidermidis",SERP2462,,,cas2,FALSE,Q5HK88,Q5HK88_STAEQ,
91
+ 1,III-D,"Synechocystis sp. 6803",sll7067,,,cas10,TRUE,Q6ZED1,Q6ZED1_SYNY3,
92
+ 1,III-D,"Synechocystis sp. 6803",sll7066,,csx10,cas7-cas5,TRUE,Q6ZED2,Q6ZED2_SYNY3,"GenBank: Regions Cas7, Cas5"
93
+ 1,III-D,"Synechocystis sp. 6803",sll7063,,csm2,cas11,TRUE,Q6ZED5,Q6ZED5_SYNY3,
94
+ 1,III-D,"Synechocystis sp. 6803",sll7065,,csm5,cas7-cas7,TRUE,Q6ZED3,Q6ZED3_SYNY3,"GenBank: Regions Cas7, Cas7"
95
+ 1,III-D,"Synechocystis sp. 6803",sll7064,,all1473,csx19,TRUE,Q6ZED4,Q6ZED4_SYNY3,
96
+ 1,III-D,"Synechocystis sp. 6803",,,csm5,cas7,TRUE,,,
97
+ 1,III-E,"Candidatus Scalindua brodae",SCABRO_02601,,,TPR+caspase,FALSE,A0A0B0EKL4,A0A0B0EKL4_9BACT,"UniProt: CHAT domain proteion"
98
+ 1,III-E,"Candidatus Scalindua brodae",SCABRO_02597,,csm3+csm2+csm5(3),"cas7(3),cas11",TRUE,A0A0B0EGF3,A0A0B0EGF3_9BACT,"Pfam: RAMPs 4 hits"
99
+ 1,III-E,"Candidatus Scalindua brodae",,,,RT,FALSE,,,
100
+ 1,III-E,"Candidatus Scalindua brodae",SCABRO_02595,,,cas1,FALSE,A0A0B0EEW2,A0A0B0EEW2_9BACT,
101
+ 1,III-E,"Candidatus Scalindua brodae",SCABRO_02593,,,cas2,FALSE,A0A0B0ELU0,A0A0B0ELU0_9BACT,
102
+ 1,III-F,"Thermotoga lettingae TMO",Tlet_0097,,,cas10,TRUE,A8F3D4,A8F3D4_PSELT,
103
+ 1,III-F,"Thermotoga lettingae TMO",Tlet_0098,,,cas5,TRUE,A8F3D5,A8F3D5_PSELT,
104
+ 1,III-F,"Thermotoga lettingae TMO",Tlet_0099,,,cas11,TRUE,A8F3D6,A8F3D6_PSELT,
105
+ 1,III-F,"Thermotoga lettingae TMO",Tlet_0100,,csm3,cas7,TRUE,A8F3D7,A8F3D7_PSELT,"Pfam: RAMPs 1 hit"
106
+ 1,III-C,"Methanothermobacter thermoautotrophicus",MTH328,,cmr1,cas7,TRUE,O26428,O26428_METTH,"Pfam: RAMPs 1 hit"
107
+ 1,III-C,"Methanothermobacter thermoautotrophicus",MTH327,,cmr6,cas7,TRUE,O26427,O26427_METTH,"Pfam: RAMPs 1 hit"
108
+ 1,III-C,"Methanothermobacter thermoautotrophicus",MTH326,,,cas10,TRUE,O26426,O26426_METTH,
109
+ 1,III-C,"Methanothermobacter thermoautotrophicus",MTH325,,cmr4,cas7,TRUE,O26425,O26425_METTH,"Pfam: RAMPs 1 hit"
110
+ 1,III-C,"Methanothermobacter thermoautotrophicus",MTH324,,cmr5,cas11,TRUE,O26424,O26424_METTH,
111
+ 1,III-C,"Methanothermobacter thermoautotrophicus",MTH323,,cmr3,cas5,TRUE,O26423,O26423_METTH,
112
+ 1,III-B,"Pyrococcus furiosus",PF1130,,cmr1,cas7,TRUE,Q8U1S5,CMR1A_PYRFU,
113
+ 1,III-B,"Pyrococcus furiosus",PF1129,,,cas10,TRUE,Q8U1S6,CMR2_PYRFU,
114
+ 1,III-B,"Pyrococcus furiosus",PF1128,,cmr3,cas5,TRUE,Q8U1S7,CMR3_PYRFU,
115
+ 1,III-B,"Pyrococcus furiosus",PF1126,,cmr4,cas7,TRUE,Q8U1S9,CMR4_PYRFU,
116
+ 1,III-B,"Pyrococcus furiosus",PF1125,,cmr5,cas11,TRUE,Q8U1T0,CMR5_PYRFU,
117
+ 1,III-B,"Pyrococcus furiosus",PF1131,,,cas6,FALSE,Q8U1S4,CAS6_PYRFU,
118
+ 1,III-B,"Pyrococcus furiosus",PF1124,,cmr6,cas7,TRUE,Q8U1T1,CMR6_PYRFU,
119
+ 2,II-B,"Legionella pneumophila str. Paris",lpp0160,,,cas9,TRUE,A0AAN5PKD3,A0AAN5PKD3_LEGPN,
120
+ 2,II-B,"Legionella pneumophila str. Paris",lpp0161,,,cas1,FALSE,A0A2U8U9X9,A0A2U8U9X9_LEGPN,
121
+ 2,II-B,"Legionella pneumophila str. Paris",lpp0162,,,cas2,FALSE,A0A4T1YC98,A0A4T1YC98_LEGPN,
122
+ 2,II-B,"Legionella pneumophila str. Paris",lpp0163,,,cas4,FALSE,A0A2U8U9S0,A0A2U8U9S0_LEGPN,
123
+ 2,II-A,"Streptococcus thermophilus",str0657,,,cas9,TRUE,G3ECR1,CAS9_STRTR,
124
+ 2,II-A,"Streptococcus thermophilus",str0658,,,cas1,FALSE,G3ECR2,CAS1_STRTR,
125
+ 2,II-A,"Streptococcus thermophilus",str0659,,,cas2,FALSE,G3ECR3,CAS2_STRTR,
126
+ 2,II-A,"Streptococcus thermophilus",str0660,,,csn2,FALSE,G3ECR4,CSN2_STRTR,
127
+ 2,II-C1,"Neisseria lactamica 020-06",NLA_17660,,,cas9,TRUE,E4ZF34,E4ZF34_NEIL0,
128
+ 2,II-C1,"Neisseria lactamica 020-06",NLA_17670,,,cas1,FALSE,E4ZF35,E4ZF35_NEIL0,
129
+ 2,II-C1,"Neisseria lactamica 020-06",NLA_17680,,,cas2,FALSE,E4ZF36,E4ZF36_NEIL0,
130
+ 2,II-C2,"Micrarchaeum acidiphilum ARMAN-1",BK997_03320,,,cas9,TRUE,A0A1L9GUT8,A0A1L9GUT8_9ARCH,
131
+ 2,II-C2,"Micrarchaeum acidiphilum ARMAN-1",BK997_03325,,,cas4,FALSE,A0A1L9GUY1,A0A1L9GUY1_9ARCH,
132
+ 2,II-C2,"Micrarchaeum acidiphilum ARMAN-1",BK997_03330,,,cas2,FALSE,A0A1L9GUR0,A0A1L9GUR0_9ARCH,
133
+ 2,II-C2,"Micrarchaeum acidiphilum ARMAN-1",BK997_03335,,,cas1,FALSE,A0A1L9GUR2,A0A1L9GUR2_9ARCH,
134
+ 2,V-A,"Francisella cf. novicida Fx1",FNFX1_1431,,cpf1,cas12a,TRUE,A0Q7Q2,CS12A_FRATN,
135
+ 2,V-A,"Francisella cf. novicida Fx1",FNFX1_1430,,,cas4,FALSE,A0Q7Q1,A0Q7Q1_FRATN,
136
+ 2,V-A,"Francisella cf. novicida Fx1",FNFX1_1429,,,cas1,FALSE,A0Q7Q0,A0Q7Q0_FRATN,
137
+ 2,V-A,"Francisella cf. novicida Fx1",FNFX1_1428,,,cas2,FALSE,A0Q7P9,A0Q7P9_FRATN,
138
+ 2,V-E,"Deltaproteobacteria bacterium",A2Z89_08250,,casX,cas12e,TRUE,A0A357BT59,A0A357BT59_UNCDE,
139
+ 2,V-E,"Deltaproteobacteria bacterium",A2Z89_08255,,,cas4,FALSE,A0A357BV01,A0A357BV01_UNCDE,
140
+ 2,V-E,"Deltaproteobacteria bacterium",A2Z89_08260,,,cas1,FALSE,A0A357BT84,A0A357BT84_UNCDE,
141
+ 2,V-E,"Deltaproteobacteria bacterium",A2Z89_08265,,,cas2,FALSE,A0A357BT86,A0A357BT86_UNCDE,
142
+ 2,V-B1,"Alicyclobacillus acidoterrestris",N007_06525,,c2c1,cas12b1,TRUE,T0D7A2,CS12B_ALIAG,
143
+ 2,V-B1,"Alicyclobacillus acidoterrestris",N007_06530,,,cas4-cas1,FALSE,A0A9E6ZHP2,A0A9E6ZHP2_ALIAG,
144
+ 2,V-B1,"Alicyclobacillus acidoterrestris",N007_06535,,,cas2,FALSE,T0C2X2,T0C2X2_ALIAG,
145
+ 2,V-B2,"Planctomycetes bacterium RBG_13_46_10",A2167_01675,,,cas4-cas1,FALSE,A0A1G2YUS0,A0A1G2YUS0_9BACT,
146
+ 2,V-B2,"Planctomycetes bacterium RBG_13_46_10",A2167_01680,,,cas2,FALSE,A0A1G2YUR9,A0A1G2YUR9_9BACT,
147
+ 2,V-B2,"Planctomycetes bacterium RBG_13_46_10",A2167_01685,,,cas12b2,TRUE,A0A1G2YUT2,A0A1G2YUT2_9BACT,
148
+ 2,V-I,"Freshwater metagenome (JGI)",Ga0208225_100001036,,,cas12i,TRUE,,Ga0208225_100001036,"Downloaded from JGI Data Portal (taxon ID: 3300020508)"
149
+ 2,V-H,"Hypersaline lake sediment metagenome (JGI )",Ga0180438_100006283,,,cas12h,TRUE,,Ga0180438_100006283,"Downloaded from JGI Data Portal (taxon ID: 3300017971)"
150
+ 2,V-C,"Oleiphilus sp.",A3715_16890,,,cas1,FALSE,,UPI0007C2850C,"UniParc sequence"
151
+ 2,V-C,"Oleiphilus sp.",A3715_16885,,c2c3,cas12c,TRUE,,UPI0007C30103,"UniParc sequence"
152
+ 2,V-D,"Bacterium CG09_39_24",BK003_02070,,,cas1,FALSE,A0A1L9GZC2,A0A1L9GZC2_UNCXX,
153
+ 2,V-D,"Bacterium CG09_39_24",BK003_02075,,casY,cas12d,TRUE,A0A1L9GZ17,A0A1L9GZ17_UNCXX,
154
+ 2,V-F1,"Uncultured archaeon",NDOCEIEL_00008,QBM01133,,cas1,FALSE,A0A482D445,A0A482D445_UNCAX,
155
+ 2,V-F1,"Uncultured archaeon",NDOCEIEL_00009,QBM01134,,cas2,FALSE,A0A482D3L8,A0A482D3L8_UNCAX,
156
+ 2,V-F1,"Uncultured archaeon",NDOCEIEL_00010,QBM01135,,cas4,FALSE,A0A482D482,A0A482D482_UNCAX,
157
+ 2,V-F1,"Uncultured archaeon",NDOCEIEL_00011,QBM01136,cas14a,cas12f1,TRUE,A0A482D2S9,A0A482D2S9_UNCAX,
158
+ 2,V-U3,"Bacillus thuringiensis HD-771",BTG_31928,,,c2c10,TRUE,A0A9W3JGX3,A0A9W3JGX3_BACTU,
159
+ 2,V-F2,"Uncultured archaeon",ICDLJNLD_00052,QBM01237,cas14b,cas12f,TRUE,A0A482D2T6,A0A482D2T6_UNCAX,
160
+ 2,V-F2,"Uncultured archaeon",ICDLJNLD_00049,QBM01234,,cas1,FALSE,A0A482D3Z6,A0A482D3Z6_UNCAX,
161
+ 2,V-F2,"Uncultured archaeon",ICDLJNLD_00050,QBM01235,,cas2,FALSE,A0A482D4I6,A0A482D4I6_UNCAX,
162
+ 2,V-F2,"Uncultured archaeon",ICDLJNLD_00051,QBM01236,,cas4,FALSE,A0A482D3B0,A0A482D3B0_UNCAX,
163
+ 2,V-U2,"Rippkaea orientalis (strain PCC 8801 / RF-1)",PCC8801_4127,ACK68059,,c2c8,TRUE,B7K606,B7K606_RIPO1,
164
+ 2,V-U4,"Rothia dentocariosa M567",HMPREF0734_01291,EFJ78236,,c2c9,TRUE,A0A7D4LAR1,A0A7D4LAR1_9MICC,
165
+ 2,V-F3,"Candidatus Micrarchaeota archaeon",COU37_03065,PIT84483,,cas1,FALSE,A0A2H9L0L4,A0A2H9L0L4_9ARCH,
166
+ 2,V-F3,"Candidatus Micrarchaeota archaeon",COU37_03060,PIT84482,,cas2,FALSE,A0A2H9KZH3,A0A2H9KZH3_9ARCH,
167
+ 2,V-F3,"Candidatus Micrarchaeota archaeon",COU37_03055,PIT84481,,cas4,FALSE,A0A2H9KYT0,A0A2H9KYT0_9ARCH,
168
+ 2,V-F3,"Candidatus Micrarchaeota archaeon",COU37_03050,PIT84480,cas14c,cas12f3,TRUE,A0A2H9KYV8,A0A2H9KYV8_9ARCH,
169
+ 2,V-U1,"Gordonia otitidis",GOOTI_RS19525,,,c2c4,TRUE,,,
170
+ 2,V-G,"Hot springs metagenome",FLYL01000025.1,,,cas12g,TRUE,,,https://www.ncbi.nlm.nih.gov/nuccore/FLYL01000025.1
171
+ 2,V-U5,"Rippkaea orientalis (strain PCC 8801 / RF-1)",PCC8801_2993,ACK66980,,tnsB,FALSE,B7JWD5,B7JWD5_RIPO1,"Transposase-like Mu"
172
+ 2,V-U5,"Rippkaea orientalis (strain PCC 8801 / RF-1)",PCC8801_2994,ACK66981,,tnsC,FALSE,B7JWD6,B7JWD6_RIPO1,
173
+ 2,V-U5,"Rippkaea orientalis (strain PCC 8801 / RF-1)",PCC8801_2995,ACK66982,,tniQ,FALSE,B7JWD7,B7JWD7_RIPO1,"Pfam: TniQ 1 hit"
174
+ 2,V-U5,"Rippkaea orientalis (strain PCC 8801 / RF-1)",PCC8801_2997,ACK66984,c2c5,cas12k,TRUE,B7JWD9,B7JWD9_RIPO1,"NCBIfam: V_Cas12k 1 hit"
175
+ 2,VI-A,"Leptotrichia shahii",B031_RS0110445,WP_018451595,c2c2,cas13a,TRUE,P0DOC6,CS13A_LEPSD,
176
+ 2,VI-A,"Leptotrichia shahii",B031_RS0110440,WP_018451594,,cas1,FALSE,A0A510JKT1,A0A510JKT1_9FUSO,
177
+ 2,VI-A,"Leptotrichia shahii",B031_RS10965,WP_018451593,,cas2,FALSE,A0A510JQG4,A0A510JQG4_9FUSO,
178
+ 2,VI-D,"Ruminococcus bicirculans",RBI_RS11595,,,WYL,FALSE,,,
179
+ 2,VI-D,"Ruminococcus bicirculans",RBI_RS12820,WP_041337480,,cas13d,TRUE,,UPI0003D9600D,"UniParc sequence"
180
+ 2,VI-D,"Ruminococcus bicirculans",RBI_RS11600,WP_041337297,,cas1,FALSE,,UPI0003D863A8,"UniParc sequence"
181
+ 2,VI-D,"Ruminococcus bicirculans",RBI_RS11605,WP_041337298,,cas2,FALSE,,UPI0003D92D81,"UniParc sequence"
182
+ 2,VI-C,"Fusobacterium perfoetens",T364_RS0105110,WP_027128616,c2c7,cas13c,TRUE,,UPI0004817C16,"UniParc sequence"
183
+ 2,VI-B1,"Segatella buccae ATCC 33574",HMPREF6485_RS00335,WP_004343973,c2c6,cas13b1,TRUE,E6K398,E6K398_9BACT,
184
+ 2,VI-B1,"Segatella buccae ATCC 33574",HMPREF6485_RS00340,WP_004343974,,csx28,FALSE,E6K399,E6K399_9BACT,
185
+ 2,VI-B2,"Bergeyella zoohelcum",HMPREF9699_02006,EKB54194,,csx27,FALSE,K1MDP1,K1MDP1_9FLAO,
186
+ 2,VI-B2,"Bergeyella zoohelcum",HMPREF9699_02005,EKB54193,c2c6,cas13b2,TRUE,K1LVU1,K1LVU1_9FLAO,
@@ -0,0 +1,34 @@
1
+ # Cas/download.R
2
+ # Download Cas sequences from UniProt
3
+ # 20250522
4
+
5
+ dat <- read.csv("Cas_uniprot.csv")
6
+
7
+ # Loop over UniProt entries
8
+ for(UniProt in dat$UniProt) {
9
+ if(UniProt == "") next
10
+ file <- paste0(UniProt, ".fasta")
11
+ # Skip if already downloaded
12
+ if(file.exists(file.path("fasta/", file))) next
13
+ URL <- paste0("https://rest.uniprot.org/uniprotkb/", file)
14
+ cmd <- paste("wget", URL)
15
+ print(cmd)
16
+ system(cmd)
17
+ # Move downloaded file to fasta directory
18
+ file.rename(file, file.path("fasta/", file))
19
+ }
20
+
21
+ # Loop over Protein to get UniParc sequences
22
+ for(Protein in dat$Protein) {
23
+ if(Protein == "") next
24
+ if(!grepl("^UPI", Protein)) next
25
+ file <- paste0(Protein, ".fasta")
26
+ # Skip if already downloaded
27
+ if(file.exists(file.path("fasta/", file))) next
28
+ URL <- paste0("https://rest.uniprot.org/uniparc/", file)
29
+ cmd <- paste("wget", URL)
30
+ print(cmd)
31
+ system(cmd)
32
+ # Move downloaded file to fasta directory
33
+ file.rename(file, file.path("fasta/", file))
34
+ }
@@ -0,0 +1,34 @@
1
+ # Cas/mkaa.R
2
+ # Generate amino acid compositions from protein sequences
3
+ # 20250522
4
+
5
+ dat <- read.csv("Cas_uniprot.csv")
6
+ # Use UniProt ID as the file name
7
+ ID <- dat$UniProt
8
+ # In case UniProt ID is missing, use alternate ID
9
+ ID[ID == ""] <- dat$Protein[ID == ""]
10
+ # Store ID in data frame
11
+ dat$ID <- ID
12
+ # Remove missing IDs
13
+ dat <- subset(dat, ID != "")
14
+
15
+ # Get amino acid composition for each protein
16
+ aalist <- lapply(1:nrow(dat), function(iID) {
17
+ file <- file.path("fasta", paste0(dat$ID[iID], ".fasta"))
18
+ aa <- canprot::read_fasta(file)
19
+ # Store systematic name and ID
20
+ aa$protein <- dat$Systematic[iID]
21
+ aa$ref <- dat$ID[iID]
22
+ aa
23
+ })
24
+
25
+ # Convert list to data frame
26
+ aa <- do.call(rbind, aalist)
27
+ # Capitalize protein names
28
+ aa$protein <- gsub("cas", "Cas", aa$protein)
29
+ aa$protein <- gsub("csx", "Csx", aa$protein)
30
+ aa$protein <- gsub("c2c", "C2c", aa$protein)
31
+ aa$protein <- gsub("din", "Din", aa$protein)
32
+ aa$protein <- gsub("tns", "Tns", aa$protein)
33
+ # Save results
34
+ write.csv(aa, "Cas_aa.csv", row.names = FALSE, quote = 1)
@@ -0,0 +1,12 @@
1
+ protein,organism,ref,abbrv,chains,Ala,Cys,Asp,Glu,Phe,Gly,His,Ile,Lys,Leu,Met,Asn,Pro,Gln,Arg,Ser,Thr,Val,Trp,Tyr
2
+ POLG.VP4,POL1M,UniProt,P03300,1,7,0,4,2,2,4,1,4,5,2,1,6,3,4,2,9,5,3,0,4
3
+ POLG.VP2,POL1M,UniProt,P03300,1,21,9,10,12,8,18,5,11,5,25,8,20,18,10,13,22,24,14,8,10
4
+ POLG.VP3,POL1M,UniProt,P03300,1,15,5,14,10,10,13,5,14,10,23,12,8,18,7,9,21,20,13,3,8
5
+ POLG.VP1,POL1M,UniProt,P03300,1,23,2,17,11,12,17,7,12,15,20,5,14,21,9,15,24,32,26,4,16
6
+ POLG.2A,POL1M,UniProt,P03300,1,12,6,7,9,5,16,6,8,3,8,4,8,4,7,8,8,6,10,1,13
7
+ POLG.P2B,POL1M,UniProt,P03300,1,6,2,4,5,2,5,0,12,7,12,1,4,2,4,2,7,12,5,2,3
8
+ POLG.P2C,POL1M,UniProt,P03300,1,22,10,15,20,14,12,8,25,23,25,13,20,15,19,19,27,15,16,4,7
9
+ POLG.P3A,POL1M,UniProt,P03300,1,7,2,5,4,2,4,1,7,6,6,2,4,4,7,3,3,6,9,1,4
10
+ POLG.VPg,POL1M,UniProt,P03300,1,2,0,0,0,0,2,0,1,3,1,0,2,3,1,1,0,3,2,0,1
11
+ POLG.3C,POL1M,UniProt,P03300,1,15,3,8,9,8,18,4,13,9,11,5,9,8,7,9,9,19,13,0,5
12
+ POLG.RNA,POL1M,UniProt,P03300,1,31,5,32,30,19,28,12,26,38,46,17,18,22,7,18,30,26,24,7,25