aiagents4pharma 1.13.1__py3-none-any.whl → 1.14.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. aiagents4pharma/configs/config.yaml +2 -1
  2. aiagents4pharma/configs/talk2biomodels/__init__.py +1 -0
  3. aiagents4pharma/configs/talk2biomodels/agents/t2b_agent/default.yaml +2 -3
  4. aiagents4pharma/configs/talk2biomodels/tools/__init__.py +4 -0
  5. aiagents4pharma/configs/talk2biomodels/tools/ask_question/__init__.py +3 -0
  6. aiagents4pharma/talk2biomodels/__init__.py +1 -0
  7. aiagents4pharma/talk2biomodels/agents/t2b_agent.py +4 -2
  8. aiagents4pharma/talk2biomodels/api/__init__.py +6 -0
  9. aiagents4pharma/talk2biomodels/api/kegg.py +83 -0
  10. aiagents4pharma/talk2biomodels/api/ols.py +72 -0
  11. aiagents4pharma/talk2biomodels/api/uniprot.py +35 -0
  12. aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +21 -6
  13. aiagents4pharma/talk2biomodels/tests/test_api.py +57 -0
  14. aiagents4pharma/talk2biomodels/tests/test_ask_question.py +44 -0
  15. aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +171 -0
  16. aiagents4pharma/talk2biomodels/tests/test_getmodelinfo.py +26 -0
  17. aiagents4pharma/talk2biomodels/tests/test_integration.py +126 -0
  18. aiagents4pharma/talk2biomodels/tests/test_param_scan.py +68 -0
  19. aiagents4pharma/talk2biomodels/tests/test_search_models.py +28 -0
  20. aiagents4pharma/talk2biomodels/tests/test_simulate_model.py +39 -0
  21. aiagents4pharma/talk2biomodels/tests/test_steady_state.py +90 -0
  22. aiagents4pharma/talk2biomodels/tools/__init__.py +1 -0
  23. aiagents4pharma/talk2biomodels/tools/ask_question.py +29 -8
  24. aiagents4pharma/talk2biomodels/tools/get_annotation.py +304 -0
  25. aiagents4pharma/talk2biomodels/tools/load_arguments.py +114 -0
  26. aiagents4pharma/talk2biomodels/tools/parameter_scan.py +91 -96
  27. aiagents4pharma/talk2biomodels/tools/simulate_model.py +14 -81
  28. aiagents4pharma/talk2biomodels/tools/steady_state.py +48 -89
  29. {aiagents4pharma-1.13.1.dist-info → aiagents4pharma-1.14.1.dist-info}/METADATA +1 -1
  30. {aiagents4pharma-1.13.1.dist-info → aiagents4pharma-1.14.1.dist-info}/RECORD +33 -17
  31. aiagents4pharma/talk2biomodels/tests/test_langgraph.py +0 -384
  32. {aiagents4pharma-1.13.1.dist-info → aiagents4pharma-1.14.1.dist-info}/LICENSE +0 -0
  33. {aiagents4pharma-1.13.1.dist-info → aiagents4pharma-1.14.1.dist-info}/WHEEL +0 -0
  34. {aiagents4pharma-1.13.1.dist-info → aiagents4pharma-1.14.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,304 @@
1
+ #!/usr/bin/env python3
2
+
3
+ """
4
+ This module contains the `GetAnnotationTool` for fetching species annotations
5
+ based on the provided model and species names.
6
+ """
7
+ import math
8
+ from typing import List, Annotated, Type
9
+ import logging
10
+ from dataclasses import dataclass
11
+ from pydantic import BaseModel, Field
12
+ import basico
13
+ import pandas as pd
14
+ from langgraph.types import Command
15
+ from langgraph.prebuilt import InjectedState
16
+ from langchain_core.tools.base import BaseTool
17
+ from langchain_core.tools.base import InjectedToolCallId
18
+ from langchain_core.messages import ToolMessage
19
+ from .load_biomodel import ModelData, load_biomodel
20
+ from ..api.uniprot import search_uniprot_labels
21
+ from ..api.ols import search_ols_labels
22
+ from ..api.kegg import fetch_kegg_annotations
23
+
24
+ # Initialize logger
25
+ logging.basicConfig(level=logging.INFO)
26
+ logger = logging.getLogger(__name__)
27
+
28
+ ols_ontology_abbreviations = {'pato', 'chebi', 'sbo', 'fma', 'pr','go'}
29
+
30
+ def prepare_content_msg(species_not_found: List[str],
31
+ species_without_description: List[str]):
32
+ """
33
+ Prepare the content message.
34
+ """
35
+ content = 'Successfully extracted annotations for the species.'
36
+ if species_not_found:
37
+ content += f'''The following species do not exist, and
38
+ hence their annotations were not extracted:
39
+ {', '.join(species_not_found)}.'''
40
+ if species_without_description:
41
+ content += f'''The descriptions for the following species
42
+ were not found:
43
+ {", ".join(species_without_description)}.'''
44
+ return content
45
+
46
+ @dataclass
47
+ class ArgumentData:
48
+ """
49
+ Dataclass for storing the argument data.
50
+ """
51
+ experiment_name: Annotated[str, "An AI assigned _ separated name of"
52
+ " the experiment based on human query"
53
+ " and the context of the experiment."
54
+ " This must be set before the experiment is run."]
55
+ list_species_names: List[str] = Field(
56
+ default=None,
57
+ description='''List of species names to fetch annotations for.
58
+ If not provided, annotations for all
59
+ species in the model will be fetched.'''
60
+ )
61
+
62
+ class GetAnnotationInput(BaseModel):
63
+ """
64
+ Input schema for annotation tool.
65
+ """
66
+ arg_data: ArgumentData = Field(description="argument data")
67
+ sys_bio_model: ModelData = Field(description="model data")
68
+ tool_call_id: Annotated[str, InjectedToolCallId]
69
+ state: Annotated[dict, InjectedState]
70
+
71
+ class GetAnnotationTool(BaseTool):
72
+ """
73
+ Tool for fetching species annotations based on the provided model and species names.
74
+ """
75
+ name: str = "get_annotation"
76
+ description: str = '''A tool to extract annotations for a list of species names
77
+ based on the provided model. Annotations include
78
+ the species name, description, database, ID, link,
79
+ and qualifier. The tool can handle multiple species
80
+ in a single invoke.'''
81
+ args_schema: Type[BaseModel] = GetAnnotationInput
82
+ return_direct: bool = False
83
+
84
+ def _run(self,
85
+ arg_data: ArgumentData,
86
+ tool_call_id: Annotated[str, InjectedToolCallId],
87
+ state: Annotated[dict, InjectedState],
88
+ sys_bio_model: ModelData = None) -> str:
89
+ """
90
+ Run the tool.
91
+ """
92
+ logger.info("Running the GetAnnotationTool tool for species %s, %s",
93
+ arg_data.list_species_names,
94
+ arg_data.experiment_name)
95
+
96
+ # Prepare the model object
97
+ sbml_file_path = state['sbml_file_path'][-1] if state['sbml_file_path'] else None
98
+ model_object = load_biomodel(sys_bio_model, sbml_file_path=sbml_file_path)
99
+
100
+ # Extract all the species names from the model
101
+ df_species = basico.model_info.get_species(model=model_object.copasi_model)
102
+
103
+ if df_species is None:
104
+ # for example this may happen with model 20
105
+ raise ValueError("Unable to extract species from the model.")
106
+ # Fetch annotations for the species names
107
+ arg_data.list_species_names = arg_data.list_species_names or df_species.index.tolist()
108
+
109
+ (annotations_df,
110
+ species_not_found,
111
+ species_without_description) = self._fetch_annotations(arg_data.list_species_names)
112
+
113
+ # Check if annotations are empty
114
+ # If empty, return a message
115
+ if annotations_df.empty:
116
+ logger.warning("The annotations dataframe is empty.")
117
+ return prepare_content_msg(species_not_found, species_without_description)
118
+
119
+ # Process annotations
120
+ annotations_df = self._process_annotations(annotations_df)
121
+
122
+ # Prepare the simulated data
123
+ dic_annotations_data = {
124
+ 'name': arg_data.experiment_name,
125
+ 'source': sys_bio_model.biomodel_id if sys_bio_model.biomodel_id else 'upload',
126
+ 'tool_call_id': tool_call_id,
127
+ 'data': annotations_df.to_dict()
128
+ }
129
+
130
+ # Update the state with the annotations data
131
+ dic_updated_state_for_model = {}
132
+ for key, value in {
133
+ "model_id": [sys_bio_model.biomodel_id],
134
+ "sbml_file_path": [sbml_file_path],
135
+ "dic_annotations_data": [dic_annotations_data]
136
+ }.items():
137
+ if value:
138
+ dic_updated_state_for_model[key] = value
139
+
140
+ return Command(
141
+ update=dic_updated_state_for_model | {
142
+ "messages": [
143
+ ToolMessage(
144
+ content=prepare_content_msg(species_not_found,
145
+ species_without_description),
146
+ artifact=True,
147
+ tool_call_id=tool_call_id
148
+ )
149
+ ]
150
+ }
151
+ )
152
+
153
+ def _fetch_annotations(self, list_species_names: List[str]) -> tuple:
154
+ """
155
+ Fetch annotations for the given species names from the model.
156
+ In this method, we fetch the MIRIAM annotations for the species names.
157
+ If the annotation is not found, we add the species to the list of
158
+ species not found. If the annotation is found, we extract the descriptions
159
+ from the annotation and add them to the data list.
160
+
161
+ Args:
162
+ list_species_names (List[str]): List of species names to fetch annotations for.
163
+
164
+ Returns:
165
+ tuple: A tuple containing the annotations dataframe, species not found list,
166
+ and description not found list.
167
+ """
168
+ species_not_found = []
169
+ description_not_found = []
170
+ data = []
171
+
172
+ # Loop through the species names
173
+ for species in list_species_names:
174
+ # Get the MIRIAM annotation for the species
175
+ annotation = basico.get_miriam_annotation(name=species)
176
+ # If the annotation is not found, add the species to the list
177
+ if annotation is None:
178
+ species_not_found.append(species)
179
+ continue
180
+
181
+ # Extract the descriptions from the annotation
182
+ descriptions = annotation.get("descriptions", [])
183
+
184
+ if descriptions == []:
185
+ description_not_found.append(species)
186
+ continue
187
+
188
+ # Loop through the descriptions and add them to the data list
189
+ for desc in descriptions:
190
+ data.append({
191
+ "Species Name": species,
192
+ "Link": desc["id"],
193
+ "Qualifier": desc["qualifier"]
194
+ })
195
+
196
+ # Create a dataframe from the data list
197
+ annotations_df = pd.DataFrame(data)
198
+
199
+ # Return the annotations dataframe and the species not found list
200
+ return annotations_df, species_not_found, description_not_found
201
+
202
+ def _process_annotations(self, annotations_df: pd.DataFrame) -> pd.DataFrame:
203
+ """
204
+ Process annotations dataframe to add additional information.
205
+ In this method, we add a new column for the ID, a new column for the database,
206
+ and a new column for the description. We then reorder the columns and process
207
+ the link to format it correctly.
208
+
209
+ Args:
210
+ annotations_df (pd.DataFrame): Annotations dataframe to process.
211
+
212
+ Returns:
213
+ pd.DataFrame: Processed annotations dataframe
214
+ """
215
+ logger.info("Processing annotations.")
216
+ # Add a new column for the ID
217
+ # Get the ID from the link key
218
+ annotations_df['Id'] = annotations_df['Link'].str.split('/').str[-1]
219
+
220
+ # Add a new column for the database
221
+ # Get the database from the link key
222
+ annotations_df['Database'] = annotations_df['Link'].str.split('/').str[-2]
223
+
224
+ # Fetch descriptions for the IDs based on the database type
225
+ # by qyerying the respective APIs
226
+ identifiers = annotations_df[['Id', 'Database']].to_dict(orient='records')
227
+ descriptions = self._fetch_descriptions(identifiers)
228
+
229
+ # Add a new column for the description
230
+ # Get the description from the descriptions dictionary
231
+ # based on the ID. If the description is not found, use '-'
232
+ annotations_df['Description'] = annotations_df['Id'].apply(lambda x:
233
+ descriptions.get(x, '-'))
234
+ # annotations_df.index = annotations_df.index + 1
235
+
236
+ # Reorder the columns
237
+ annotations_df = annotations_df[
238
+ ["Species Name", "Description", "Database", "Id", "Link", "Qualifier"]
239
+ ]
240
+
241
+ # Process the link to format it correctly
242
+ annotations_df["Link"] = annotations_df["Link"].apply(self._process_link)
243
+
244
+ # Return the processed annotations dataframe
245
+ return annotations_df
246
+
247
+ def _process_link(self, link: str) -> str:
248
+ """
249
+ Process link to format it correctly.
250
+ """
251
+ for ols_ontology_abbreviation in ols_ontology_abbreviations:
252
+ if ols_ontology_abbreviation +'/' in link:
253
+ link = link.replace(f"{ols_ontology_abbreviation}/", "")
254
+ if "kegg.compound" in link:
255
+ link = link.replace("kegg.compound/", "kegg.compound:")
256
+ return link
257
+
258
+ def _fetch_descriptions(self, data: List[dict[str, str]]) -> dict[str, str]:
259
+ """
260
+ Fetch protein names or labels based on the database type.
261
+ """
262
+ logger.info("Fetching descriptions for the IDs.")
263
+ results = {}
264
+ grouped_data = {}
265
+
266
+ # In the following loop, we create a dictionary with database as the key
267
+ # and a list of identifiers as the value. If either the database or the
268
+ # identifier is NaN, we set it to None.
269
+ for entry in data:
270
+ identifier = entry.get('Id')
271
+ database = entry.get('Database')
272
+ # Check if database is NaN
273
+ if isinstance(database, float):
274
+ if math.isnan(database):
275
+ database = None
276
+ results[identifier or "unknown"] = "-"
277
+ else:
278
+ database = database.lower()
279
+ grouped_data.setdefault(database, []).append(identifier)
280
+
281
+ # In the following loop, we fetch the descriptions for the identifiers
282
+ # based on the database type.
283
+ # Constants
284
+
285
+ for database, identifiers in grouped_data.items():
286
+ if database == 'uniprot':
287
+ results.update(search_uniprot_labels(identifiers))
288
+ elif database in ols_ontology_abbreviations:
289
+ annotations = search_ols_labels([
290
+ {"Id": id_, "Database": database}
291
+ for id_ in identifiers
292
+ ])
293
+ for identifier in identifiers:
294
+ results[identifier] = annotations.get(database, {}).get(identifier, "-")
295
+ elif database == 'kegg.compound':
296
+ data = [{"Id": identifier, "Database": "kegg.compound"}
297
+ for identifier in identifiers]
298
+ annotations = fetch_kegg_annotations(data)
299
+ for identifier in identifiers:
300
+ results[identifier] = annotations.get(database, {}).get(identifier, "-")
301
+ else:
302
+ for identifier in identifiers:
303
+ results[identifier] = "-"
304
+ return results
@@ -0,0 +1,114 @@
1
+ #!/usr/bin/env python3
2
+
3
+ """
4
+ A utility module for defining the dataclasses
5
+ for the arguments to set up initial settings
6
+ before the experiment is run.
7
+ """
8
+
9
+ import logging
10
+ from dataclasses import dataclass
11
+ from typing import Union, List, Optional, Annotated
12
+ from pydantic import Field
13
+ import basico
14
+
15
+ # Initialize logger
16
+ logging.basicConfig(level=logging.INFO)
17
+ logger = logging.getLogger(__name__)
18
+
19
+ @dataclass
20
+ class TimeData:
21
+ """
22
+ Dataclass for storing the time data.
23
+ """
24
+ duration: Union[int, float] = Field(
25
+ description="Duration of the simulation",
26
+ default=100)
27
+ interval: Union[int, float] = Field(
28
+ description="The interval is the time step or"
29
+ " the step size of the simulation. It is unrelated"
30
+ " to the step size of species concentration and parameter values.",
31
+ default=100)
32
+
33
+ @dataclass
34
+ class SpeciesInitialData:
35
+ """
36
+ Dataclass for storing the species initial data.
37
+ """
38
+ species_name: List[str] = Field(
39
+ description="List of species whose initial concentration is to be set."
40
+ " This does not include species that reoccur or the species whose"
41
+ " concentration is to be determined/observed at the end of the experiment."
42
+ " Do not hallucinate the species name.",
43
+ default=[])
44
+ species_concentration: List[Union[int, float]] = Field(
45
+ description="List of initial concentrations of species."
46
+ " This does not include species that reoccur or the species whose"
47
+ " concentration is to be determined/observed at the end of the experiment."
48
+ " Do not hallucinate the species concentration.",
49
+ default=[])
50
+
51
+ @dataclass
52
+ class TimeSpeciesNameConcentration:
53
+ """
54
+ Dataclass for storing the time,
55
+ species name, and concentration data.
56
+ """
57
+ time: Union[int, float] = Field(description="time point where the event occurs")
58
+ species_name: str = Field(description="species name")
59
+ species_concentration: Union[int, float] = Field(
60
+ description="species concentration at the time point")
61
+
62
+ @dataclass
63
+ class ReocurringData:
64
+ """
65
+ Dataclass for species that reoccur. In other words,
66
+ the concentration of the species resets to a certain
67
+ value after a certain time interval.
68
+ """
69
+ data: List[TimeSpeciesNameConcentration] = Field(
70
+ description="List of time, name, and concentration data"
71
+ " of species or parameters that reoccur",
72
+ default=[])
73
+
74
+ @dataclass
75
+ class ArgumentData:
76
+ """
77
+ Dataclass for storing the argument data.
78
+ """
79
+ experiment_name: Annotated[str, "An AI assigned _ separated name of"
80
+ " the experiment based on human query"
81
+ " and the context of the experiment."
82
+ " This must be set before the experiment is run."]
83
+ time_data: Optional[TimeData] = Field(
84
+ description="time data",
85
+ default=None)
86
+ species_to_be_analyzed_before_experiment: Optional[SpeciesInitialData] = Field(
87
+ description="Data of species whose initial concentration"
88
+ " is to be set before the experiment. This does not include"
89
+ " species that reoccur or the species whose concentration"
90
+ " is to be determined at the end of the experiment.",
91
+ default=None)
92
+ reocurring_data: Optional[ReocurringData] = Field(
93
+ description="List of concentration and time data of species that"
94
+ " reoccur. For example, a species whose concentration resets"
95
+ " to a certain value after a certain time interval.",
96
+ default=None)
97
+
98
+ def add_rec_events(model_object, reocurring_data):
99
+ """
100
+ Add reocurring events to the model.
101
+
102
+ Args:
103
+ model_object: The model object.
104
+ reocurring_data: The reocurring data.
105
+
106
+ Returns:
107
+ None
108
+ """
109
+ for row in reocurring_data.data:
110
+ tp, sn, sc = row.time, row.species_name, row.species_concentration
111
+ basico.add_event(f'{sn}_{tp}',
112
+ f'Time > {tp}',
113
+ [[sn, str(sc)]],
114
+ model=model_object.copasi_model)