gsrap 0.7.2__tar.gz → 0.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. {gsrap-0.7.2 → gsrap-0.8.0}/PKG-INFO +1 -1
  2. {gsrap-0.7.2 → gsrap-0.8.0}/pyproject.toml +1 -1
  3. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/.ipynb_checkpoints/__init__-checkpoint.py +2 -0
  4. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/__init__.py +2 -0
  5. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/commons/.ipynb_checkpoints/__init__-checkpoint.py +1 -0
  6. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/commons/.ipynb_checkpoints/downloads-checkpoint.py +1 -1
  7. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/commons/.ipynb_checkpoints/escherutils-checkpoint.py +1 -1
  8. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/commons/.ipynb_checkpoints/excelhub-checkpoint.py +70 -37
  9. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/commons/.ipynb_checkpoints/figures-checkpoint.py +15 -1
  10. gsrap-0.8.0/src/gsrap/commons/.ipynb_checkpoints/keggutils-checkpoint.py +145 -0
  11. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/commons/__init__.py +1 -0
  12. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/commons/downloads.py +1 -1
  13. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/commons/escherutils.py +1 -1
  14. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/commons/excelhub.py +70 -37
  15. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/commons/figures.py +15 -1
  16. gsrap-0.8.0/src/gsrap/commons/keggutils.py +145 -0
  17. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/mkmodel/.ipynb_checkpoints/mkmodel-checkpoint.py +63 -19
  18. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/mkmodel/.ipynb_checkpoints/pruner-checkpoint.py +72 -7
  19. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/mkmodel/mkmodel.py +63 -19
  20. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/mkmodel/pruner.py +72 -7
  21. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/parsedb/.ipynb_checkpoints/completeness-checkpoint.py +33 -6
  22. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/parsedb/.ipynb_checkpoints/introduce-checkpoint.py +8 -0
  23. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/parsedb/.ipynb_checkpoints/parsedb-checkpoint.py +10 -2
  24. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/parsedb/completeness.py +33 -6
  25. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/parsedb/introduce.py +8 -0
  26. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/parsedb/parsedb.py +10 -2
  27. {gsrap-0.7.2 → gsrap-0.8.0}/LICENSE.txt +0 -0
  28. {gsrap-0.7.2 → gsrap-0.8.0}/README.md +0 -0
  29. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/assets/.ipynb_checkpoints/PM1-checkpoint.csv +0 -0
  30. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/assets/.ipynb_checkpoints/PM2A-checkpoint.csv +0 -0
  31. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/assets/.ipynb_checkpoints/PM3B-checkpoint.csv +0 -0
  32. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/assets/.ipynb_checkpoints/PM4A-checkpoint.csv +0 -0
  33. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/assets/PM1.csv +0 -0
  34. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/assets/PM2A.csv +0 -0
  35. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/assets/PM3B.csv +0 -0
  36. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/assets/PM4A.csv +0 -0
  37. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/assets/__init__.py +0 -0
  38. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/assets/kegg_compound_to_others.pickle +0 -0
  39. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/assets/kegg_reaction_to_others.pickle +0 -0
  40. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/commons/.ipynb_checkpoints/biomass-checkpoint.py +0 -0
  41. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/commons/.ipynb_checkpoints/coeffs-checkpoint.py +0 -0
  42. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/commons/.ipynb_checkpoints/fluxbal-checkpoint.py +0 -0
  43. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/commons/.ipynb_checkpoints/logutils-checkpoint.py +0 -0
  44. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/commons/.ipynb_checkpoints/medium-checkpoint.py +0 -0
  45. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/commons/.ipynb_checkpoints/metrics-checkpoint.py +0 -0
  46. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/commons/.ipynb_checkpoints/sbmlutils-checkpoint.py +0 -0
  47. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/commons/biomass.py +0 -0
  48. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/commons/coeffs.py +0 -0
  49. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/commons/fluxbal.py +0 -0
  50. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/commons/logutils.py +0 -0
  51. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/commons/medium.py +0 -0
  52. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/commons/metrics.py +0 -0
  53. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/commons/sbmlutils.py +0 -0
  54. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/getmaps/.ipynb_checkpoints/__init__-checkpoint.py +0 -0
  55. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/getmaps/.ipynb_checkpoints/getmaps-checkpoint.py +0 -0
  56. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/getmaps/.ipynb_checkpoints/kdown-checkpoint.py +0 -0
  57. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/getmaps/__init__.py +0 -0
  58. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/getmaps/getmaps.py +0 -0
  59. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/getmaps/kdown.py +0 -0
  60. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/mkmodel/.ipynb_checkpoints/__init__-checkpoint.py +0 -0
  61. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/mkmodel/.ipynb_checkpoints/biologcuration-checkpoint.py +0 -0
  62. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/mkmodel/.ipynb_checkpoints/gapfill-checkpoint.py +0 -0
  63. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/mkmodel/.ipynb_checkpoints/gapfillutils-checkpoint.py +0 -0
  64. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/mkmodel/.ipynb_checkpoints/polishing-checkpoint.py +0 -0
  65. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/mkmodel/__init__.py +0 -0
  66. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/mkmodel/biologcuration.py +0 -0
  67. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/mkmodel/gapfill.py +0 -0
  68. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/mkmodel/gapfillutils.py +0 -0
  69. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/mkmodel/polishing.py +0 -0
  70. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/parsedb/.ipynb_checkpoints/__init__-checkpoint.py +0 -0
  71. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/parsedb/.ipynb_checkpoints/annotation-checkpoint.py +0 -0
  72. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/parsedb/.ipynb_checkpoints/manual-checkpoint.py +0 -0
  73. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/parsedb/.ipynb_checkpoints/repeating-checkpoint.py +0 -0
  74. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/parsedb/__init__.py +0 -0
  75. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/parsedb/annotation.py +0 -0
  76. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/parsedb/manual.py +0 -0
  77. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/parsedb/repeating.py +0 -0
  78. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/runsims/.ipynb_checkpoints/__init__-checkpoint.py +0 -0
  79. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/runsims/.ipynb_checkpoints/biosynth-checkpoint.py +0 -0
  80. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/runsims/.ipynb_checkpoints/cnps-checkpoint.py +0 -0
  81. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/runsims/.ipynb_checkpoints/essentialgenes-checkpoint.py +0 -0
  82. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/runsims/.ipynb_checkpoints/growthfactors-checkpoint.py +0 -0
  83. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/runsims/.ipynb_checkpoints/precursors-checkpoint.py +0 -0
  84. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/runsims/.ipynb_checkpoints/runsims-checkpoint.py +0 -0
  85. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/runsims/.ipynb_checkpoints/simplegrowth-checkpoint.py +0 -0
  86. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/runsims/.ipynb_checkpoints/singleomission-checkpoint.py +0 -0
  87. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/runsims/__init__.py +0 -0
  88. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/runsims/biosynth.py +0 -0
  89. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/runsims/cnps.py +0 -0
  90. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/runsims/essentialgenes.py +0 -0
  91. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/runsims/growthfactors.py +0 -0
  92. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/runsims/precursors.py +0 -0
  93. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/runsims/runsims.py +0 -0
  94. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/runsims/simplegrowth.py +0 -0
  95. {gsrap-0.7.2 → gsrap-0.8.0}/src/gsrap/runsims/singleomission.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: gsrap
3
- Version: 0.7.2
3
+ Version: 0.8.0
4
4
  Summary:
5
5
  License: GNU General Public License v3.0
6
6
  Author: Gioele Lazzari
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "gsrap"
3
- version = "0.7.2"
3
+ version = "0.8.0"
4
4
  description = ""
5
5
  authors = ["Gioele Lazzari"]
6
6
  license = "GNU General Public License v3.0"
@@ -72,6 +72,7 @@ def main():
72
72
  parsedb_parser.add_argument("--precursors", action='store_true', help="Verify biosynthesis of biomass precursors and show blocked ones.")
73
73
  parsedb_parser.add_argument("--biosynth", action='store_true', help="Check biosynthesis of all metabolites and detect dead-ends.")
74
74
  parsedb_parser.add_argument("-e", "--eggnog", nargs='+', metavar='', type=str, default='-', help="Path to the optional eggnog-mapper annotation table(s).")
75
+ parsedb_parser.add_argument("-k", "--keggorg", metavar='', type=str, default='-', help="A single KEGG Organism code.")
75
76
  parsedb_parser.add_argument("--goodbefore", metavar='', type=str, default='-', help="Syntax is {pure_mid}-{rid1}-{rid2}. From top to bottom, build the universe until reaction {rid1}, transport {rid2} and metabolite {pure_mid} are reached.")
76
77
  parsedb_parser.add_argument("--onlyauthor", metavar='', type=str, default='-', help="Build the universe by parsing contents of the specified author ID only. Contents affected by --goodbefore are parsed anyway.")
77
78
  parsedb_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
@@ -84,6 +85,7 @@ def main():
84
85
  mkmodel_parser.add_argument("-c", "--cores", metavar='', type=int, default=0, help="Number of cores to use (if 0, use all available cores).")
85
86
  mkmodel_parser.add_argument("-o", "--outdir", metavar='', type=str, default='./', help="Main output directory (will be created if not existing).")
86
87
  mkmodel_parser.add_argument("-e", "--eggnog", nargs='+', metavar='', type=str, default='-', help="Path to the eggnog-mapper annotation table(s).")
88
+ mkmodel_parser.add_argument("-k", "--keggorg", metavar='', type=str, default='-', help="A single KEGG Organism code.")
87
89
  mkmodel_parser.add_argument("-u", "--universe", metavar='', type=str, default='-', help="Path to the universe model (SBML format).")
88
90
  mkmodel_parser.add_argument("-i", "--force_inclusion", metavar='', type=str, default='-', help="Force the inclusion of the specified reactions (comma-separated IDs).")
89
91
  mkmodel_parser.add_argument("-f", "--gap_fill", metavar='', type=str, default='-', help="Media to use during gap-filling (comma-separated IDs); if not provided, gap-filling will be skipped.")
@@ -72,6 +72,7 @@ def main():
72
72
  parsedb_parser.add_argument("--precursors", action='store_true', help="Verify biosynthesis of biomass precursors and show blocked ones.")
73
73
  parsedb_parser.add_argument("--biosynth", action='store_true', help="Check biosynthesis of all metabolites and detect dead-ends.")
74
74
  parsedb_parser.add_argument("-e", "--eggnog", nargs='+', metavar='', type=str, default='-', help="Path to the optional eggnog-mapper annotation table(s).")
75
+ parsedb_parser.add_argument("-k", "--keggorg", metavar='', type=str, default='-', help="A single KEGG Organism code.")
75
76
  parsedb_parser.add_argument("--goodbefore", metavar='', type=str, default='-', help="Syntax is {pure_mid}-{rid1}-{rid2}. From top to bottom, build the universe until reaction {rid1}, transport {rid2} and metabolite {pure_mid} are reached.")
76
77
  parsedb_parser.add_argument("--onlyauthor", metavar='', type=str, default='-', help="Build the universe by parsing contents of the specified author ID only. Contents affected by --goodbefore are parsed anyway.")
77
78
  parsedb_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
@@ -84,6 +85,7 @@ def main():
84
85
  mkmodel_parser.add_argument("-c", "--cores", metavar='', type=int, default=0, help="Number of cores to use (if 0, use all available cores).")
85
86
  mkmodel_parser.add_argument("-o", "--outdir", metavar='', type=str, default='./', help="Main output directory (will be created if not existing).")
86
87
  mkmodel_parser.add_argument("-e", "--eggnog", nargs='+', metavar='', type=str, default='-', help="Path to the eggnog-mapper annotation table(s).")
88
+ mkmodel_parser.add_argument("-k", "--keggorg", metavar='', type=str, default='-', help="A single KEGG Organism code.")
87
89
  mkmodel_parser.add_argument("-u", "--universe", metavar='', type=str, default='-', help="Path to the universe model (SBML format).")
88
90
  mkmodel_parser.add_argument("-i", "--force_inclusion", metavar='', type=str, default='-', help="Force the inclusion of the specified reactions (comma-separated IDs).")
89
91
  mkmodel_parser.add_argument("-f", "--gap_fill", metavar='', type=str, default='-', help="Media to use during gap-filling (comma-separated IDs); if not provided, gap-filling will be skipped.")
@@ -7,3 +7,4 @@ from .metrics import *
7
7
  from .sbmlutils import *
8
8
  from .escherutils import *
9
9
  from .logutils import *
10
+ from .keggutils import *
@@ -236,7 +236,7 @@ def format_expansion(logger, eggnog):
236
236
 
237
237
 
238
238
  if eggnog == [] or eggnog == ['-']:
239
- eggnog = '-' # return always a list except for ths case
239
+ eggnog = '-' # return always a list except for this case
240
240
 
241
241
 
242
242
  return eggnog
@@ -31,7 +31,7 @@ def count_undrawn_rids(logger, universe, lastmap):
31
31
  filename = lastmap['filename']
32
32
  logger.debug(f"Last universal map version detected: '{filename}'.")
33
33
  if len(remainings) > 0:
34
- logger.info(f"Our universal map is {len(remainings)} reactions behind. Please draw!")
34
+ logger.warning(f"Our universal map is {len(remainings)} reactions behind. Please draw!")
35
35
  else:
36
36
  logger.info(f"Our universal map is {len(remainings)} reactions behind. Thank you ♥")
37
37
 
@@ -16,12 +16,6 @@ def write_excel_model(model, filepath, nofigs, df_E, df_B, df_P, df_S, df_C=None
16
16
 
17
17
 
18
18
 
19
- df_M = []
20
- df_R = []
21
- df_T = []
22
- df_A = []
23
-
24
-
25
19
  # format df_E: # biomass precursors biosynthesis
26
20
  if df_E is not None:
27
21
  df_E.insert(0, 'mid', '') # new columns as first
@@ -53,56 +47,93 @@ def write_excel_model(model, filepath, nofigs, df_E, df_B, df_P, df_S, df_C=None
53
47
  df_C = df_C.reset_index(drop=True)
54
48
 
55
49
 
50
+
51
+ # define dict-lists, future dataframes
52
+ df_M = []
53
+ df_R = []
54
+ df_T = []
55
+ df_G = []
56
+ df_A = []
57
+
56
58
  for m in model.metabolites:
59
+ row_dict = {'mid': m.id, 'name': m.name, 'formula': m.formula, 'charge': m.charge,}
57
60
 
58
- # get kc codes:
59
- if 'kegg.compound' not in m.annotation.keys(): kc_ids = ''
60
- else:
61
- kc_ids = m.annotation['kegg.compound']
62
- if type(kc_ids) == str: kc_ids = [kc_ids]
63
- kc_ids = '; '.join([i for i in kc_ids if i!='CXXXXX'])
64
-
65
- df_M.append({'mid': m.id, 'formula': m.formula, 'charge': m.charge, 'kc': kc_ids, 'name': m.name})
66
-
61
+ for db in m.annotation.keys():
62
+ annots = m.annotation[db]
63
+ if type(annots) == str: annots = [annots]
64
+ annots = '; '.join([i for i in annots])
65
+ row_dict[db] = annots
66
+ df_M.append(row_dict)
67
67
 
68
68
  for r in model.reactions:
69
+ row_dict = {'rid': r.id, 'name': r.name, 'rstring': r.reaction, 'gpr': "Not applicable", 'bounds': r.bounds}
70
+
71
+ for db in r.annotation.keys():
72
+ annots = r.annotation[db]
73
+ if type(annots) == str: annots = [annots]
74
+ annots = '; '.join([i for i in annots])
75
+ row_dict[db] = annots
69
76
 
70
77
  # handle artificial reactions
71
78
  if r.id == 'Biomass':
72
- df_A.append({'rid': r.id, 'rstring': r.reaction, 'type': 'biomass', 'name': r.name})
79
+ # commented as the type is inplicit in the ID
80
+ #row_dict['type'] = 'biomass'
81
+ df_A.append(row_dict)
73
82
 
74
83
  elif len(r.metabolites) == 1:
84
+ # commented as the type is inplicit in the ID
85
+ """
75
86
  if len(r.metabolites)==1 and list(r.metabolites)[0].id.rsplit('_',1)[-1] == 'e':
76
- df_A.append({'rid': r.id, 'rstring': r.reaction, 'type': 'exchange', 'name': r.name})
87
+ row_dict['type'] = 'exchange'
77
88
  elif r.lower_bound < 0 and r.upper_bound > 0:
78
- df_A.append({'rid': r.id, 'rstring': r.reaction, 'type': 'sink', 'name': r.name})
89
+ row_dict['type'] = 'sink'
79
90
  elif r.lower_bound == 0 and r.upper_bound > 0:
80
- df_A.append({'rid': r.id, 'rstring': r.reaction, 'type': 'demand', 'name': r.name})
91
+ row_dict['type'] = 'demand'
92
+ """
93
+ df_A.append(row_dict)
81
94
 
82
95
  else: # more than 1 metabolite involved
96
+ row_dict['gpr'] = r.gene_reaction_rule
83
97
 
84
- # get kr codes:
85
- if 'kegg.reaction' not in r.annotation.keys(): kr_ids = ''
86
- else:
87
- kr_ids = r.annotation['kegg.reaction']
88
- if type(kr_ids) == str: kr_ids = [kr_ids]
89
- kr_ids = '; '.join([i for i in kr_ids if i!='RXXXXX'])
90
-
91
98
  # introduce reaction in the correct table:
92
- r_dict = {'rid': r.id, 'rstring': r.reaction, 'kr': kr_ids, 'gpr': r.gene_reaction_rule, 'name': r.name}
93
99
  if len(set([m.id.rsplit('_',1)[-1] for m in r.metabolites])) == 1:
94
- df_R.append(r_dict)
95
- else: df_T.append(r_dict)
96
-
100
+ df_R.append(row_dict)
101
+ else: df_T.append(row_dict)
102
+
103
+ for g in model.genes:
104
+ row_dict = {'gid': g.id, 'involved_in': '; '.join([r.id for r in g.reactions])}
105
+
106
+ for db in g.annotation.keys():
107
+ annots = g.annotation[db]
108
+ if type(annots) == str: annots = [annots]
109
+ annots = '; '.join([i for i in annots])
110
+ row_dict[db] = annots
111
+ df_G.append(row_dict)
97
112
 
113
+ # create dataframes from dict-lists
98
114
  df_M = pnd.DataFrame.from_records(df_M)
99
115
  df_R = pnd.DataFrame.from_records(df_R)
100
116
  df_T = pnd.DataFrame.from_records(df_T)
101
117
  df_A = pnd.DataFrame.from_records(df_A)
118
+ df_G = pnd.DataFrame.from_records(df_G)
119
+
120
+ # sort columns
121
+ df_M_first_cols = ['mid', 'name', 'formula', 'charge']
122
+ df_M = df_M[df_M_first_cols + sorted([c for c in df_M.columns if c not in df_M_first_cols])]
123
+ df_R_first_cols = ['rid', 'name', 'rstring', 'gpr', 'bounds']
124
+ df_R = df_R[df_R_first_cols + sorted([c for c in df_R.columns if c not in df_R_first_cols])]
125
+ df_T = df_T[df_R_first_cols + sorted([c for c in df_T.columns if c not in df_R_first_cols])]
126
+ df_A = df_A[df_R_first_cols + sorted([c for c in df_A.columns if c not in df_R_first_cols])]
127
+ df_G_first_cols = ['gid', 'involved_in']
128
+ df_G = df_G[df_G_first_cols + sorted([c for c in df_G.columns if c not in df_G_first_cols])]
129
+
130
+
131
+
102
132
  with pnd.ExcelWriter(filepath, engine='xlsxwriter') as writer:
103
133
  df_M.to_excel(writer, sheet_name='Metabolites', index=False)
104
134
  df_R.to_excel(writer, sheet_name='Reactions', index=False)
105
135
  df_T.to_excel(writer, sheet_name='Transporters', index=False)
136
+ df_G.to_excel(writer, sheet_name='Genes', index=False)
106
137
  df_A.to_excel(writer, sheet_name='Artificials', index=False)
107
138
  if df_E is not None and len(df_E)!=0: df_E.to_excel(writer, sheet_name='Precursors', index=False)
108
139
  if df_B is not None: df_B.to_excel(writer, sheet_name='Biomass', index=False)
@@ -112,7 +143,7 @@ def write_excel_model(model, filepath, nofigs, df_E, df_B, df_P, df_S, df_C=None
112
143
  df_C.to_excel(writer, sheet_name='Coverage', index=False)
113
144
  if nofigs == False:
114
145
  worksheet = writer.sheets['Coverage']
115
- worksheet.insert_image('A1', 'df_C_F1.png', {'image_data': df_C_F1})
146
+ worksheet.insert_image('E3', 'df_C_F1.png', {'image_data': df_C_F1})
116
147
 
117
148
 
118
149
  sheets_dict = {
@@ -139,9 +170,10 @@ def comparative_table(logger, outdir, sheets_dicts):
139
170
  for sheets_dict in sheets_dicts:
140
171
  for index, row in sheets_dict['Reactions'].iterrows():
141
172
  if row['rid'] not in df_topology.index:
142
- df_topology.loc[row['rid'], 'rstring'] = row['rstring']
143
- df_topology.loc[row['rid'], 'kr'] = row['kr']
144
- df_topology.loc[row['rid'], 'name'] = row['name']
173
+ df_topology.loc[row['rid'], 'rid'] = row['rid']
174
+ for key, value in row.to_dict().items():
175
+ # force string to avoid errors with bounds
176
+ df_topology.loc[row['rid'], key] = '' if pnd.isna(value) else str(value)
145
177
  df_topology.loc[row['rid'], sheets_dict['model_id']] = 1
146
178
  for sheets_dict in sheets_dicts: # replace missing values:
147
179
  df_topology = df_topology.fillna({sheets_dict['model_id']: 0})
@@ -152,9 +184,10 @@ def comparative_table(logger, outdir, sheets_dicts):
152
184
  for sheets_dict in sheets_dicts:
153
185
  for index, row in sheets_dict['Reactions'].iterrows():
154
186
  if row['rid'] not in df_gprs.index:
155
- df_gprs.loc[row['rid'], 'rstring'] = row['rstring']
156
- df_gprs.loc[row['rid'], 'kr'] = row['kr']
157
- df_gprs.loc[row['rid'], 'name'] = row['name']
187
+ df_gprs.loc[row['rid'], 'rid'] = row['rid']
188
+ for key, value in row.to_dict().items():
189
+ # force string to avoid errors with bounds
190
+ df_gprs.loc[row['rid'], key] = '' if pnd.isna(value) else str(value)
158
191
  df_gprs.loc[row['rid'], sheets_dict['model_id']] = row['gpr']
159
192
  for sheets_dict in sheets_dicts: # replace missing values:
160
193
  df_gprs = df_gprs.fillna({sheets_dict['model_id']: 'missing'})
@@ -13,9 +13,23 @@ from matplotlib.patches import Patch
13
13
 
14
14
  def figure_df_C_F1(df_coverage):
15
15
 
16
+
17
+
18
+ # prepare the binary matrix:
19
+ modeled_rs = df_coverage[df_coverage['modeled']==True].index
20
+ unmodeled_rs = df_coverage[df_coverage['modeled']==False].index
21
+ # remove useless columns
16
22
  bin_matrix = df_coverage[[i for i in df_coverage.columns if i not in ['map_ids', 'modeled']]]
23
+ # sort rows: upper rows are present in more strains
24
+ bin_matrix = bin_matrix.loc[bin_matrix.sum(axis=1).sort_values(ascending=False).index]
25
+ # split in 2: modeled above, non-modeled below:
26
+ bin_matrix = pnd.concat([
27
+ bin_matrix.loc[[i for i in bin_matrix.index if i in modeled_rs], ],
28
+ bin_matrix.loc[[i for i in bin_matrix.index if i in unmodeled_rs], ]
29
+ ])
17
30
  strains = bin_matrix.columns
18
31
  bin_matrix = bin_matrix.T # features in column
32
+
19
33
 
20
34
  # pdist() / linkage() will loose the accession information. So here we save a dict:
21
35
  index_to_strain = {i: strain for i, strain in enumerate(bin_matrix.index)}
@@ -57,7 +71,7 @@ def figure_df_C_F1(df_coverage):
57
71
 
58
72
  ### PART 3: coverage bar
59
73
  axs[0,1].matshow(
60
- df_coverage[['modeled']].T,
74
+ df_coverage.loc[bin_matrix.T.index, ['modeled']].T,
61
75
  cmap='cool_r',
62
76
  aspect='auto', # non-squared pixels to fit the axis
63
77
  )
@@ -0,0 +1,145 @@
1
+ import time
2
+ import os
3
+ import sys
4
+ import pickle
5
+
6
+
7
+ import pandas as pnd
8
+ from Bio.KEGG import REST
9
+
10
+
11
+
12
+ def download_keggorg(logger, keggorg='lpl', outdir='./', ):
13
+
14
+
15
+ # check if already downloaded
16
+ outfile = os.path.join(outdir, f'{keggorg}.keggorg')
17
+ if os.path.exists(outfile):
18
+ logger.info(f"Organism code '{keggorg}' already downloaded ('{os.path.join(outdir, f'{keggorg}.keggorg')}').")
19
+ return 0
20
+
21
+
22
+ # donwload entire txt:
23
+ logger.info(f"Verifying existence of organism code '{keggorg}' on KEGG...")
24
+ time.sleep(0.5) # be respectful
25
+ try: response = REST.kegg_list(keggorg).read()
26
+ except:
27
+ logger.error(f"Organism code '{keggorg}' not found in KEGG database.")
28
+ return 1
29
+ # response is now a string similar to:
30
+ """
31
+ lpl:lp_0026 CDS 31317..32084 hydrolase, HAD superfamily, Cof family
32
+ lpl:lp_0027 CDS complement(32236..32907) pgmB1; beta-phosphoglucomutase
33
+ """
34
+
35
+
36
+ # extract the gene IDs list:
37
+ gene_ids = [line.split('\t')[0] for line in response.strip().split('\n')]
38
+ # example of gene_id: "lpl:lp_0005"
39
+ logger.info(f"Respectfully downloading {len(gene_ids)} genes from KEGG...")
40
+
41
+
42
+
43
+ # respectfully download in batch
44
+ # 10 is the max number of elements that can be downloaded
45
+ batch_size = 10
46
+ n_batches = len(gene_ids) // batch_size + (1 if (len(gene_ids) % batch_size) > 0 else 0)
47
+
48
+
49
+ n_attempts = 5
50
+ attempts_left = n_attempts
51
+ default_sleep = 0.5
52
+ sleep_time = default_sleep
53
+
54
+
55
+ completed_batches = 0
56
+ completed_genes = 0
57
+ res_string_list = []
58
+ while completed_batches < n_batches:
59
+
60
+ # be respectful
61
+ time.sleep(sleep_time)
62
+
63
+ # extract batch
64
+ start_index = completed_batches *batch_size
65
+ end_index = (completed_batches+1) *batch_size
66
+ if end_index > len(gene_ids): end_index = len(gene_ids)
67
+ curr_batch = gene_ids[start_index: end_index]
68
+
69
+
70
+ # download batch
71
+ try:
72
+ res_string = REST.kegg_get(curr_batch).read()
73
+ for item in res_string.split("///\n\n"):
74
+ res_string_list.append(item.replace('///\n', ''))
75
+ completed_batches += 1
76
+ completed_genes += len(curr_batch)
77
+
78
+ print(f"{completed_genes}/{len(gene_ids)} ({int(completed_genes/len(gene_ids)*100)}%) completed!", end='\r', file=sys.stderr)
79
+
80
+ attempts_left = n_attempts
81
+ sleep_time = default_sleep
82
+ except:
83
+ attempts_left -= 1
84
+ sleep_time = default_sleep *4 # increase sleep time to be more respectful
85
+ logger.warning(f"An error occurred during kegg_get() of batch {curr_batch}. Remaining attempts: {attempts_left}.")
86
+
87
+
88
+ if attempts_left == 0:
89
+ logger.error("No attemps left! Shutting down...")
90
+ return 1
91
+
92
+
93
+ # hide last progress trace ('sheets_dicts' unused if not in multi-strain mode):
94
+ last_trace = f"{completed_genes}/{len(gene_ids)} ({int(completed_genes/len(gene_ids)*100)}%) completed!"
95
+ whitewash = ''.join([' ' for i in range(len(last_trace))])
96
+ print(whitewash, end='\r', file=sys.stderr)
97
+
98
+
99
+
100
+ # extract info into a formatted df:
101
+ df = [] # list of dicts, future df
102
+ for entry in res_string_list:
103
+
104
+ entry_dict = {}
105
+ curr_header = None
106
+
107
+ for line in entry.split('\n'):
108
+ if line == '': continue
109
+
110
+ header = line[:12]
111
+ content = line[12:]
112
+ if header != ' '*12:
113
+ curr_header = header
114
+
115
+ if curr_header == 'ENTRY ':
116
+ gid = content.split(' ', 1)[0]
117
+ entry_dict['gid'] = gid
118
+
119
+ if curr_header == 'POSITION ':
120
+ entry_dict['pos'] = content.strip()
121
+
122
+ if curr_header == 'ORTHOLOGY ':
123
+ ko = content.split(' ', 1)[0]
124
+ entry_dict['ko'] = ko
125
+
126
+ if curr_header == 'MOTIF ':
127
+ db, value = content.strip().split(': ', 1)
128
+ entry_dict[db] = value.split(' ')
129
+
130
+ if curr_header == 'DBLINKS ':
131
+ db, value = content.strip().split(': ', 1)
132
+ entry_dict[db] = value.split(' ')
133
+
134
+ df.append(entry_dict)
135
+ df = pnd.DataFrame.from_records(df)
136
+
137
+
138
+ # save dataframe in the output dir:
139
+ with open(outfile, 'wb') as wb_handler:
140
+ pickle.dump(df, wb_handler)
141
+ logger.info(f"'{outfile}' created!")
142
+
143
+
144
+
145
+ return 0
@@ -7,3 +7,4 @@ from .metrics import *
7
7
  from .sbmlutils import *
8
8
  from .escherutils import *
9
9
  from .logutils import *
10
+ from .keggutils import *
@@ -236,7 +236,7 @@ def format_expansion(logger, eggnog):
236
236
 
237
237
 
238
238
  if eggnog == [] or eggnog == ['-']:
239
- eggnog = '-' # return always a list except for ths case
239
+ eggnog = '-' # return always a list except for this case
240
240
 
241
241
 
242
242
  return eggnog
@@ -31,7 +31,7 @@ def count_undrawn_rids(logger, universe, lastmap):
31
31
  filename = lastmap['filename']
32
32
  logger.debug(f"Last universal map version detected: '{filename}'.")
33
33
  if len(remainings) > 0:
34
- logger.info(f"Our universal map is {len(remainings)} reactions behind. Please draw!")
34
+ logger.warning(f"Our universal map is {len(remainings)} reactions behind. Please draw!")
35
35
  else:
36
36
  logger.info(f"Our universal map is {len(remainings)} reactions behind. Thank you ♥")
37
37
 
@@ -16,12 +16,6 @@ def write_excel_model(model, filepath, nofigs, df_E, df_B, df_P, df_S, df_C=None
16
16
 
17
17
 
18
18
 
19
- df_M = []
20
- df_R = []
21
- df_T = []
22
- df_A = []
23
-
24
-
25
19
  # format df_E: # biomass precursors biosynthesis
26
20
  if df_E is not None:
27
21
  df_E.insert(0, 'mid', '') # new columns as first
@@ -53,56 +47,93 @@ def write_excel_model(model, filepath, nofigs, df_E, df_B, df_P, df_S, df_C=None
53
47
  df_C = df_C.reset_index(drop=True)
54
48
 
55
49
 
50
+
51
+ # define dict-lists, future dataframes
52
+ df_M = []
53
+ df_R = []
54
+ df_T = []
55
+ df_G = []
56
+ df_A = []
57
+
56
58
  for m in model.metabolites:
59
+ row_dict = {'mid': m.id, 'name': m.name, 'formula': m.formula, 'charge': m.charge,}
57
60
 
58
- # get kc codes:
59
- if 'kegg.compound' not in m.annotation.keys(): kc_ids = ''
60
- else:
61
- kc_ids = m.annotation['kegg.compound']
62
- if type(kc_ids) == str: kc_ids = [kc_ids]
63
- kc_ids = '; '.join([i for i in kc_ids if i!='CXXXXX'])
64
-
65
- df_M.append({'mid': m.id, 'formula': m.formula, 'charge': m.charge, 'kc': kc_ids, 'name': m.name})
66
-
61
+ for db in m.annotation.keys():
62
+ annots = m.annotation[db]
63
+ if type(annots) == str: annots = [annots]
64
+ annots = '; '.join([i for i in annots])
65
+ row_dict[db] = annots
66
+ df_M.append(row_dict)
67
67
 
68
68
  for r in model.reactions:
69
+ row_dict = {'rid': r.id, 'name': r.name, 'rstring': r.reaction, 'gpr': "Not applicable", 'bounds': r.bounds}
70
+
71
+ for db in r.annotation.keys():
72
+ annots = r.annotation[db]
73
+ if type(annots) == str: annots = [annots]
74
+ annots = '; '.join([i for i in annots])
75
+ row_dict[db] = annots
69
76
 
70
77
  # handle artificial reactions
71
78
  if r.id == 'Biomass':
72
- df_A.append({'rid': r.id, 'rstring': r.reaction, 'type': 'biomass', 'name': r.name})
79
+ # commented as the type is inplicit in the ID
80
+ #row_dict['type'] = 'biomass'
81
+ df_A.append(row_dict)
73
82
 
74
83
  elif len(r.metabolites) == 1:
84
+ # commented as the type is inplicit in the ID
85
+ """
75
86
  if len(r.metabolites)==1 and list(r.metabolites)[0].id.rsplit('_',1)[-1] == 'e':
76
- df_A.append({'rid': r.id, 'rstring': r.reaction, 'type': 'exchange', 'name': r.name})
87
+ row_dict['type'] = 'exchange'
77
88
  elif r.lower_bound < 0 and r.upper_bound > 0:
78
- df_A.append({'rid': r.id, 'rstring': r.reaction, 'type': 'sink', 'name': r.name})
89
+ row_dict['type'] = 'sink'
79
90
  elif r.lower_bound == 0 and r.upper_bound > 0:
80
- df_A.append({'rid': r.id, 'rstring': r.reaction, 'type': 'demand', 'name': r.name})
91
+ row_dict['type'] = 'demand'
92
+ """
93
+ df_A.append(row_dict)
81
94
 
82
95
  else: # more than 1 metabolite involved
96
+ row_dict['gpr'] = r.gene_reaction_rule
83
97
 
84
- # get kr codes:
85
- if 'kegg.reaction' not in r.annotation.keys(): kr_ids = ''
86
- else:
87
- kr_ids = r.annotation['kegg.reaction']
88
- if type(kr_ids) == str: kr_ids = [kr_ids]
89
- kr_ids = '; '.join([i for i in kr_ids if i!='RXXXXX'])
90
-
91
98
  # introduce reaction in the correct table:
92
- r_dict = {'rid': r.id, 'rstring': r.reaction, 'kr': kr_ids, 'gpr': r.gene_reaction_rule, 'name': r.name}
93
99
  if len(set([m.id.rsplit('_',1)[-1] for m in r.metabolites])) == 1:
94
- df_R.append(r_dict)
95
- else: df_T.append(r_dict)
96
-
100
+ df_R.append(row_dict)
101
+ else: df_T.append(row_dict)
102
+
103
+ for g in model.genes:
104
+ row_dict = {'gid': g.id, 'involved_in': '; '.join([r.id for r in g.reactions])}
105
+
106
+ for db in g.annotation.keys():
107
+ annots = g.annotation[db]
108
+ if type(annots) == str: annots = [annots]
109
+ annots = '; '.join([i for i in annots])
110
+ row_dict[db] = annots
111
+ df_G.append(row_dict)
97
112
 
113
+ # create dataframes from dict-lists
98
114
  df_M = pnd.DataFrame.from_records(df_M)
99
115
  df_R = pnd.DataFrame.from_records(df_R)
100
116
  df_T = pnd.DataFrame.from_records(df_T)
101
117
  df_A = pnd.DataFrame.from_records(df_A)
118
+ df_G = pnd.DataFrame.from_records(df_G)
119
+
120
+ # sort columns
121
+ df_M_first_cols = ['mid', 'name', 'formula', 'charge']
122
+ df_M = df_M[df_M_first_cols + sorted([c for c in df_M.columns if c not in df_M_first_cols])]
123
+ df_R_first_cols = ['rid', 'name', 'rstring', 'gpr', 'bounds']
124
+ df_R = df_R[df_R_first_cols + sorted([c for c in df_R.columns if c not in df_R_first_cols])]
125
+ df_T = df_T[df_R_first_cols + sorted([c for c in df_T.columns if c not in df_R_first_cols])]
126
+ df_A = df_A[df_R_first_cols + sorted([c for c in df_A.columns if c not in df_R_first_cols])]
127
+ df_G_first_cols = ['gid', 'involved_in']
128
+ df_G = df_G[df_G_first_cols + sorted([c for c in df_G.columns if c not in df_G_first_cols])]
129
+
130
+
131
+
102
132
  with pnd.ExcelWriter(filepath, engine='xlsxwriter') as writer:
103
133
  df_M.to_excel(writer, sheet_name='Metabolites', index=False)
104
134
  df_R.to_excel(writer, sheet_name='Reactions', index=False)
105
135
  df_T.to_excel(writer, sheet_name='Transporters', index=False)
136
+ df_G.to_excel(writer, sheet_name='Genes', index=False)
106
137
  df_A.to_excel(writer, sheet_name='Artificials', index=False)
107
138
  if df_E is not None and len(df_E)!=0: df_E.to_excel(writer, sheet_name='Precursors', index=False)
108
139
  if df_B is not None: df_B.to_excel(writer, sheet_name='Biomass', index=False)
@@ -112,7 +143,7 @@ def write_excel_model(model, filepath, nofigs, df_E, df_B, df_P, df_S, df_C=None
112
143
  df_C.to_excel(writer, sheet_name='Coverage', index=False)
113
144
  if nofigs == False:
114
145
  worksheet = writer.sheets['Coverage']
115
- worksheet.insert_image('A1', 'df_C_F1.png', {'image_data': df_C_F1})
146
+ worksheet.insert_image('E3', 'df_C_F1.png', {'image_data': df_C_F1})
116
147
 
117
148
 
118
149
  sheets_dict = {
@@ -139,9 +170,10 @@ def comparative_table(logger, outdir, sheets_dicts):
139
170
  for sheets_dict in sheets_dicts:
140
171
  for index, row in sheets_dict['Reactions'].iterrows():
141
172
  if row['rid'] not in df_topology.index:
142
- df_topology.loc[row['rid'], 'rstring'] = row['rstring']
143
- df_topology.loc[row['rid'], 'kr'] = row['kr']
144
- df_topology.loc[row['rid'], 'name'] = row['name']
173
+ df_topology.loc[row['rid'], 'rid'] = row['rid']
174
+ for key, value in row.to_dict().items():
175
+ # force string to avoid errors with bounds
176
+ df_topology.loc[row['rid'], key] = '' if pnd.isna(value) else str(value)
145
177
  df_topology.loc[row['rid'], sheets_dict['model_id']] = 1
146
178
  for sheets_dict in sheets_dicts: # replace missing values:
147
179
  df_topology = df_topology.fillna({sheets_dict['model_id']: 0})
@@ -152,9 +184,10 @@ def comparative_table(logger, outdir, sheets_dicts):
152
184
  for sheets_dict in sheets_dicts:
153
185
  for index, row in sheets_dict['Reactions'].iterrows():
154
186
  if row['rid'] not in df_gprs.index:
155
- df_gprs.loc[row['rid'], 'rstring'] = row['rstring']
156
- df_gprs.loc[row['rid'], 'kr'] = row['kr']
157
- df_gprs.loc[row['rid'], 'name'] = row['name']
187
+ df_gprs.loc[row['rid'], 'rid'] = row['rid']
188
+ for key, value in row.to_dict().items():
189
+ # force string to avoid errors with bounds
190
+ df_gprs.loc[row['rid'], key] = '' if pnd.isna(value) else str(value)
158
191
  df_gprs.loc[row['rid'], sheets_dict['model_id']] = row['gpr']
159
192
  for sheets_dict in sheets_dicts: # replace missing values:
160
193
  df_gprs = df_gprs.fillna({sheets_dict['model_id']: 'missing'})