aurelian 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Main entry point to run the talisman agent.
4
+ """
5
+ import os
6
+ import sys
7
+ from pydantic_ai import chat
8
+
9
+ # Add the parent directory to the path for absolute imports
10
+ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../")))
11
+
12
+ from aurelian.agents.talisman.talisman_agent import talisman_agent
13
+ from aurelian.agents.talisman.talisman_config import get_config
14
+
15
+ if __name__ == "__main__":
16
+ config = get_config()
17
+ chat(talisman_agent, deps=config)
@@ -0,0 +1,70 @@
1
+ """
2
+ CLI interface for the talisman agent.
3
+ This may not be in the original code, but let's add it to make sure it's properly configured.
4
+ """
5
+ import logging
6
+ import re
7
+ from pydantic_ai import RunContext
8
+
9
+ from aurelian.agents.talisman.talisman_config import TalismanConfig
10
+ from aurelian.agents.talisman.talisman_tools import GeneSetAnalysis, FunctionalTerm, GeneSummary
11
+
12
+ def format_talisman_output(result):
13
+ """Format the talisman output to ensure it always has all three sections."""
14
+ logging.info("Post-processing talisman output")
15
+
16
+ # Check if output already has proper sections
17
+ has_narrative = re.search(r'^\s*##\s*Narrative', result, re.MULTILINE) is not None
18
+ has_functional_terms = re.search(r'^\s*##\s*Functional Terms Table', result, re.MULTILINE) is not None
19
+ has_gene_summary = re.search(r'^\s*##\s*Gene Summary Table', result, re.MULTILINE) is not None
20
+
21
+ # If all sections are present, return as is
22
+ if has_narrative and has_functional_terms and has_gene_summary:
23
+ return result
24
+
25
+ # Need to reconstruct the output
26
+ # Extract gene summary table if it exists
27
+ gene_table_match = re.search(r'^\s*##\s*Gene Summary Table\s*\n(.*?)(?=$|\n\n|\Z)',
28
+ result, re.MULTILINE | re.DOTALL)
29
+
30
+ if gene_table_match:
31
+ gene_table = gene_table_match.group(0)
32
+
33
+ # Extract existing text that might be a narrative
34
+ narrative_text = result.replace(gene_table, '').strip()
35
+
36
+ # Create a proper narrative section if missing
37
+ if not has_narrative and narrative_text:
38
+ narrative_section = "## Narrative\n" + narrative_text + "\n\n"
39
+ else:
40
+ narrative_section = "## Narrative\nThese genes may have related functions as indicated in the gene summary table.\n\n"
41
+
42
+ # Create a functional terms section if missing
43
+ if not has_functional_terms:
44
+ # Extract gene IDs from the gene table
45
+ gene_ids = []
46
+ for line in gene_table.split('\n'):
47
+ if '|' in line and not line.strip().startswith('|--') and not 'ID |' in line:
48
+ parts = line.split('|')
49
+ if len(parts) > 1:
50
+ gene_id = parts[1].strip()
51
+ if gene_id and gene_id != 'ID':
52
+ gene_ids.append(gene_id)
53
+
54
+ # Create a simple functional terms table
55
+ functional_terms = "## Functional Terms Table\n"
56
+ functional_terms += "| Functional Term | Genes | Source |\n"
57
+ functional_terms += "|-----------------|-------|--------|\n"
58
+ functional_terms += f"| Gene set | {', '.join(gene_ids)} | Analysis |\n\n"
59
+ else:
60
+ # Find and extract existing functional terms section
61
+ ft_match = re.search(r'^\s*##\s*Functional Terms Table\s*\n(.*?)(?=^\s*##\s*|\Z)',
62
+ result, re.MULTILINE | re.DOTALL)
63
+ functional_terms = ft_match.group(0) if ft_match else ""
64
+
65
+ # Reconstruct the output with all sections
66
+ formatted_output = "# Gene Set Analysis\n\n" + narrative_section + functional_terms + gene_table
67
+ return formatted_output
68
+
69
+ # If no gene table was found, return the original result
70
+ return result
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Standalone script to run the talisman agent directly.
4
+ """
5
+ import os
6
+ import sys
7
+ from pydantic_ai import chat
8
+
9
+ # Add the src directory to the path for imports
10
+ src_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../../"))
11
+ sys.path.insert(0, src_dir)
12
+
13
+ from aurelian.agents.talisman.talisman_agent import talisman_agent
14
+ from aurelian.agents.talisman.talisman_config import get_config
15
+
16
+ if __name__ == "__main__":
17
+ config = get_config()
18
+ chat(talisman_agent, deps=config)
@@ -1,5 +1,6 @@
1
1
  """
2
2
  Agent for working with gene information using the UniProt API and NCBI Entrez.
3
+ Provides structured information in the form of Narrative, Functional Terms Table, and Gene Summary Table.
3
4
  """
4
5
  from pydantic_ai import Agent
5
6
 
@@ -70,38 +71,54 @@ The analysis will cover multiple types of relationships:
70
71
  - Physical interactions
71
72
  - Genetic interactions
72
73
 
73
- IMPORTANT: For gene set analysis, ALWAYS include a distinct section titled "## Terms"
74
- that contains a semicolon-delimited list of functional terms relevant to the gene set,
75
- ordered by relevance. These terms should include:
76
- - Gene Ontology biological process terms (e.g., DNA repair, oxidative phosphorylation, signal transduction)
77
- - Molecular function terms (e.g., kinase activity, DNA binding, transporter activity)
78
- - Cellular component/localization terms (e.g., nucleus, plasma membrane, mitochondria)
79
- - Pathway names (e.g., glycolysis, TCA cycle, MAPK signaling)
80
- - Co-regulation terms (e.g., stress response regulon, heat shock response)
81
- - Interaction networks (e.g., protein complex formation, signaling cascade)
82
- - Metabolic process terms (e.g., fatty acid synthesis, amino acid metabolism)
83
- - Regulatory mechanisms (e.g., transcriptional regulation, post-translational modification)
84
- - Disease associations (if relevant, e.g., virulence, pathogenesis, antibiotic resistance)
85
- - Structural and functional domains/motifs (e.g., helix-turn-helix, zinc finger)
86
-
87
- Example of Terms section:
88
- ## Terms
89
- DNA damage response; p53 signaling pathway; apoptosis; cell cycle regulation; tumor suppression; DNA repair; protein ubiquitination; transcriptional regulation; nuclear localization; cancer predisposition
90
-
91
- IMPORTANT: After the Terms section, ALWAYS include a "## Gene Summary Table" with a markdown table
92
- summarizing the genes analyzed, with the following columns in this exact order:
93
- - ID: The gene identifier (same as Gene Symbol)
94
- - Annotation: Genomic coordinates or accession with position information
95
- - Genomic Context: Information about the genomic location (chromosome, plasmid, etc.)
96
- - Organism: The organism the gene belongs to
97
- - Description: The protein/gene function description
74
+ For gene set analysis, your output MUST always include three distinct sections:
75
+
76
+ 1. First, a "## Narrative" section providing a concise explanation of the functional and categorical relationships between the genes. This should:
77
+ - Prioritize explanations involving most or all genes in the set
78
+ - Refer to specific subsets of genes when discussing specialized functions
79
+ - Highlight the most significant shared pathways, processes, or disease associations
80
+ - Be clear, concise, and focused on biological meaning
81
+
82
+ 2. Second, a "## Functional Terms Table" that presents key functional terms in a tabular format with these columns:
83
+ - Functional Term: The biological term or concept (e.g., DNA repair, kinase activity)
84
+ - Genes: The genes associated with this term (comma-separated list)
85
+ - Source: The likely source database or ontology (e.g., GO-BP, KEGG, Reactome, GO-MF, GO-CC, Disease)
86
+
87
+ The functional terms should include various types:
88
+ - Gene Ontology biological process terms (e.g., DNA repair, oxidative phosphorylation)
89
+ - Molecular function terms (e.g., kinase activity, DNA binding)
90
+ - Cellular component/localization terms (e.g., nucleus, plasma membrane)
91
+ - Pathway names (e.g., glycolysis, MAPK signaling)
92
+ - Disease associations (if relevant)
93
+ - Structural and functional domains/motifs (if relevant)
94
+
95
+ Example of Functional Terms Table:
96
+ ## Functional Terms Table
97
+ | Functional Term | Genes | Source |
98
+ |-----------------|-------|--------|
99
+ | DNA damage response | BRCA1, BRCA2, ATM | GO-BP |
100
+ | Homologous recombination | BRCA1, BRCA2 | Reactome |
101
+ | Tumor suppression | BRCA1, BRCA2, ATM | Disease |
102
+ | Nuclear localization | BRCA1, BRCA2, ATM | GO-CC |
103
+ | Kinase activity | ATM | GO-MF |
104
+ | PARP inhibitor sensitivity | BRCA1, BRCA2, PARP1 | Pathway |
105
+
106
+ 3. Third, a "## Gene Summary Table" with a markdown table summarizing the genes analyzed,
107
+ with the following columns in this exact order:
108
+ - ID: The gene identifier (same as Gene Symbol)
109
+ - Annotation: Genomic coordinates or accession with position information
110
+ - Genomic Context: Information about the genomic location (chromosome, plasmid, etc.)
111
+ - Organism: The organism the gene belongs to
112
+ - Description: The protein/gene function description
98
113
 
99
114
  Example of Gene Summary Table:
100
115
  ## Gene Summary Table
101
116
  | ID | Annotation | Genomic Context | Organism | Description |
102
117
  |-------------|-------------|----------|----------------|------------|
103
118
  | BRCA1 | NC_000017.11 (43044295..43125483) | Chromosome 17 | Homo sapiens | Breast cancer type 1 susceptibility protein |
104
- | TP53 | NC_000017.11 (7668402..7687550) | Chromosome 17 | Homo sapiens | Tumor suppressor protein |
119
+ | BRCA2 | NC_000013.11 (32315474..32400266) | Chromosome 13 | Homo sapiens | Breast cancer type 2 susceptibility protein |
120
+ | ATM | NC_000011.10 (108222484..108369102) | Chromosome 11 | Homo sapiens | ATM serine/threonine kinase |
121
+ | PARP1 | NC_000001.11 (226360251..226408516) | Chromosome 1 | Homo sapiens | Poly(ADP-ribose) polymerase 1 |
105
122
 
106
123
  For bacterial genes, the table format would be:
107
124
  | ID | Annotation | Genomic Context | Organism | Description |
@@ -123,4 +140,4 @@ talisman_agent = Agent(
123
140
  talisman_agent.tool(get_gene_description)
124
141
  talisman_agent.tool(get_gene_descriptions)
125
142
  talisman_agent.tool(get_genes_from_list)
126
- talisman_agent.tool(analyze_gene_set)
143
+ #talisman_agent.tool(analyze_gene_set)
@@ -2,6 +2,8 @@
2
2
  Tools for retrieving gene information using the UniProt API and NCBI Entrez.
3
3
  """
4
4
  from typing import Dict, List, Optional, Tuple, Any
5
+ from pydantic import BaseModel, Field
6
+ import re
5
7
  import openai
6
8
  import time
7
9
  import threading
@@ -14,6 +16,32 @@ from pydantic_ai import RunContext, ModelRetry
14
16
 
15
17
  from .talisman_config import TalismanConfig, get_config
16
18
 
19
+ # Define data models for structured output
20
+ class FunctionalTerm(BaseModel):
21
+ """A functional term associated with genes."""
22
+ term: str = Field(..., description="The biological term or concept")
23
+ genes: List[str] = Field(..., description="List of genes associated with this term")
24
+ source: str = Field(..., description="The source database or ontology (GO-BP, KEGG, Reactome, etc.)")
25
+
26
+ class GeneSummary(BaseModel):
27
+ """Summary information for a gene."""
28
+ id: str = Field(..., description="The gene identifier (Gene Symbol)")
29
+ annotation: str = Field(..., description="Genomic coordinates or accession with position")
30
+ genomic_context: str = Field(..., description="Information about genomic location (chromosome, etc.)")
31
+ organism: str = Field(..., description="The organism the gene belongs to")
32
+ description: str = Field(..., description="The protein/gene function description")
33
+
34
+ class GeneSetAnalysis(BaseModel):
35
+ """Complete analysis of a gene set."""
36
+ input_species: str = Field(default="", description="The species provided by the user")
37
+ inferred_species: str = Field(default="", description="The species inferred from the gene data")
38
+ narrative: str = Field(default="No narrative information available for these genes.",
39
+ description="Explanation of functional and categorical relationships between genes")
40
+ functional_terms: List[FunctionalTerm] = Field(default_factory=list,
41
+ description="Functional terms associated with the gene set")
42
+ gene_summaries: List[GeneSummary] = Field(default_factory=list,
43
+ description="Summary information for each gene")
44
+
17
45
  # Set up logging
18
46
  logging.basicConfig(
19
47
  level=logging.INFO,
@@ -156,15 +184,10 @@ def get_ncbi_gene_info(ctx: RunContext[TalismanConfig], gene_id: str, organism:
156
184
  config = ctx.deps or get_config()
157
185
  ncbi = config.get_ncbi_client()
158
186
 
159
- # Check if the gene looks like bacterial (common for Salmonella)
160
- bacterial_gene_patterns = ["inv", "sip", "sop", "sic", "spa", "ssa", "sse", "prg"]
161
- is_likely_bacterial = any(gene_id.lower().startswith(pattern) for pattern in bacterial_gene_patterns)
187
+ # No need to check for specific gene patterns
162
188
 
163
- # Default organisms to try based on gene patterns
164
- if is_likely_bacterial and not organism:
165
- organisms_to_try = ["Salmonella", "Escherichia coli", "Bacteria"]
166
- else:
167
- organisms_to_try = [organism] if organism else ["Homo sapiens", None] # Try human first as default, then any organism
189
+ # Set organisms to try without domain-specific knowledge
190
+ organisms_to_try = [organism] if organism else [None] # Use organism if provided, else try without organism constraint
168
191
 
169
192
  gene_results = None
170
193
 
@@ -207,22 +230,22 @@ def get_ncbi_gene_info(ctx: RunContext[TalismanConfig], gene_id: str, organism:
207
230
  return gene_results
208
231
 
209
232
  # If not found in gene database, try protein database
210
- # For bacterial genes, try organism-specific search first
233
+ # Standard protein search
211
234
  protein_ids = []
212
- if is_likely_bacterial:
213
- for org in organisms_to_try:
214
- if org:
215
- logging.info(f"Searching NCBI protein database for: {gene_id} in organism: {org}")
216
- ncbi_limiter.wait()
217
- search_query = f"{gene_id} AND {org}[Organism]"
218
- search_results = ncbi.ESearch("protein", search_query)
219
- protein_ids = search_results.get('idlist', [])
220
-
221
- if protein_ids:
222
- logging.info(f"Found protein ID(s) for {gene_id} in {org}: {protein_ids}")
223
- break
224
- else:
225
- # Standard protein search (no organism constraint)
235
+ for org in organisms_to_try:
236
+ if org:
237
+ logging.info(f"Searching NCBI protein database for: {gene_id} in organism: {org}")
238
+ ncbi_limiter.wait()
239
+ search_query = f"{gene_id} AND {org}[Organism]"
240
+ search_results = ncbi.ESearch("protein", search_query)
241
+ protein_ids = search_results.get('idlist', [])
242
+
243
+ if protein_ids:
244
+ logging.info(f"Found protein ID(s) for {gene_id} in {org}: {protein_ids}")
245
+ break
246
+
247
+ # If no results with organism constraint, try without
248
+ if not protein_ids:
226
249
  logging.info(f"Searching NCBI protein database for: {gene_id}")
227
250
  ncbi_limiter.wait()
228
251
  search_results = ncbi.ESearch("protein", gene_id)
@@ -303,6 +326,129 @@ def get_ncbi_gene_info(ctx: RunContext[TalismanConfig], gene_id: str, organism:
303
326
  return f"Error querying NCBI Entrez: {str(e)}"
304
327
 
305
328
 
329
+ def ensure_complete_output(markdown_result: str, gene_set_analysis: GeneSetAnalysis) -> str:
330
+ """Ensures that the markdown output has all required sections.
331
+
332
+ Args:
333
+ markdown_result: The original markdown result
334
+ gene_set_analysis: The structured data model
335
+
336
+ Returns:
337
+ A complete markdown output with all required sections
338
+ """
339
+ logging.info("Post-processing output to ensure all sections are present")
340
+
341
+ # Check if output already has proper sections - always enforce
342
+ has_narrative = re.search(r'^\s*##\s*Narrative', markdown_result, re.MULTILINE) is not None
343
+ has_functional_terms = re.search(r'^\s*##\s*Functional Terms Table', markdown_result, re.MULTILINE) is not None
344
+ has_gene_summary = re.search(r'^\s*##\s*Gene Summary Table', markdown_result, re.MULTILINE) is not None
345
+ has_species = re.search(r'^\s*#\s*Species', markdown_result, re.MULTILINE) is not None
346
+
347
+ # We'll always rebuild the output to ensure consistent formatting
348
+ result = ""
349
+
350
+ # Add species section if applicable
351
+ if gene_set_analysis.input_species or gene_set_analysis.inferred_species:
352
+ result += "# Species\n"
353
+ if gene_set_analysis.input_species:
354
+ result += f"Input: {gene_set_analysis.input_species}\n"
355
+ if gene_set_analysis.inferred_species:
356
+ result += f"Inferred: {gene_set_analysis.inferred_species}\n"
357
+ result += "\n"
358
+
359
+ # Add main header
360
+ result += "# Gene Set Analysis\n\n"
361
+
362
+ # Add narrative section - always include
363
+ result += "## Narrative\n"
364
+ if has_narrative:
365
+ # Extract existing narrative if it exists
366
+ narrative_match = re.search(r'##\s*Narrative\s*\n(.*?)(?=^\s*##|\Z)',
367
+ markdown_result, re.MULTILINE | re.DOTALL)
368
+ if narrative_match and narrative_match.group(1).strip():
369
+ result += narrative_match.group(1).strip() + "\n\n"
370
+ else:
371
+ result += f"{gene_set_analysis.narrative}\n\n"
372
+ else:
373
+ # Use the narrative from the model
374
+ result += f"{gene_set_analysis.narrative}\n\n"
375
+
376
+ # Add functional terms table - always include
377
+ result += "## Functional Terms Table\n"
378
+ result += "| Functional Term | Genes | Source |\n"
379
+ result += "|-----------------|-------|--------|\n"
380
+
381
+ if has_functional_terms:
382
+ # Try to extract existing table content
383
+ ft_match = re.search(r'##\s*Functional Terms Table\s*\n\|.*\|\s*\n\|[-\s|]*\|\s*\n(.*?)(?=^\s*##|\Z)',
384
+ markdown_result, re.MULTILINE | re.DOTALL)
385
+ if ft_match and ft_match.group(1).strip():
386
+ # Use existing content
387
+ for line in ft_match.group(1).strip().split("\n"):
388
+ if line.strip() and "|" in line:
389
+ result += line + "\n"
390
+ elif gene_set_analysis.functional_terms:
391
+ # Use model content
392
+ for term in gene_set_analysis.functional_terms:
393
+ genes_str = ", ".join(term.genes)
394
+ result += f"| {term.term} | {genes_str} | {term.source} |\n"
395
+ else:
396
+ # Create default content
397
+ gene_ids = [g.id for g in gene_set_analysis.gene_summaries]
398
+ if gene_ids:
399
+ result += f"| Gene set | {', '.join(gene_ids)} | Analysis |\n"
400
+ else:
401
+ result += "| No terms available | - | - |\n"
402
+ else:
403
+ # Always include functional terms, using content from model
404
+ if gene_set_analysis.functional_terms:
405
+ for term in gene_set_analysis.functional_terms:
406
+ genes_str = ", ".join(term.genes)
407
+ result += f"| {term.term} | {genes_str} | {term.source} |\n"
408
+ else:
409
+ # Create default content if model has none
410
+ gene_ids = [g.id for g in gene_set_analysis.gene_summaries]
411
+ if gene_ids:
412
+ result += f"| Gene set | {', '.join(gene_ids)} | Analysis |\n"
413
+ else:
414
+ result += "| No terms available | - | - |\n"
415
+
416
+ result += "\n"
417
+
418
+ # Add gene summary table - always include
419
+ result += "## Gene Summary Table\n"
420
+ result += "| ID | Annotation | Genomic Context | Organism | Description |\n"
421
+ result += "|-------------|-------------|----------|----------------|------------|\n"
422
+
423
+ if has_gene_summary:
424
+ # Try to extract existing gene summary
425
+ gs_match = re.search(r'##\s*Gene Summary Table\s*\n\|.*\|\s*\n\|[-\s|]*\|\s*\n(.*?)(?=^\s*##|\Z)',
426
+ markdown_result, re.MULTILINE | re.DOTALL)
427
+ if gs_match and gs_match.group(1).strip():
428
+ # Use existing content
429
+ for line in gs_match.group(1).strip().split("\n"):
430
+ if line.strip() and "|" in line:
431
+ result += line + "\n"
432
+ elif gene_set_analysis.gene_summaries:
433
+ # Use model content
434
+ for gene in gene_set_analysis.gene_summaries:
435
+ result += f"| {gene.id} | {gene.annotation} | {gene.genomic_context} | {gene.organism} | {gene.description} |\n"
436
+ else:
437
+ # Create default content
438
+ result += "| No gene information available | - | - | - | - |\n"
439
+ else:
440
+ # Always include gene summary, using content from model
441
+ if gene_set_analysis.gene_summaries:
442
+ for gene in gene_set_analysis.gene_summaries:
443
+ result += f"| {gene.id} | {gene.annotation} | {gene.genomic_context} | {gene.organism} | {gene.description} |\n"
444
+ else:
445
+ # Create default content if model has none
446
+ result += "| No gene information available | - | - | - | - |\n"
447
+
448
+ logging.info("Successfully enforced all required sections in the output")
449
+ return result
450
+
451
+
306
452
  def get_gene_description(ctx: RunContext[TalismanConfig], gene_id: str, organism: str = None) -> str:
307
453
  """Get description for a single gene ID, using UniProt and falling back to NCBI Entrez.
308
454
 
@@ -318,15 +464,6 @@ def get_gene_description(ctx: RunContext[TalismanConfig], gene_id: str, organism
318
464
  config = ctx.deps or get_config()
319
465
  u = config.get_uniprot_client()
320
466
 
321
- # Check if this looks like a bacterial gene code
322
- bacterial_gene_patterns = ["inv", "sip", "sop", "sic", "spa", "ssa", "sse", "prg", "flh", "fli", "che"]
323
- is_likely_bacterial = any(gene_id.lower().startswith(pattern) for pattern in bacterial_gene_patterns)
324
-
325
- # Auto-detect organism based on gene pattern
326
- if is_likely_bacterial and not organism:
327
- logging.info(f"Gene {gene_id} matches bacterial pattern, setting organism to Salmonella")
328
- organism = "Salmonella"
329
-
330
467
  try:
331
468
  # Normalize the gene ID
332
469
  gene_id = normalize_gene_id(gene_id)
@@ -520,29 +657,13 @@ def analyze_gene_set(ctx: RunContext[TalismanConfig], gene_list: str) -> str:
520
657
  gene_list: String containing gene identifiers separated by commas, spaces, or newlines
521
658
 
522
659
  Returns:
523
- A structured biological summary of the gene set
660
+ A structured biological summary of the gene set with Narrative, Functional Terms Table, and Gene Summary Table
524
661
  """
525
662
  logging.info(f"Starting gene set analysis for: {gene_list}")
526
663
 
527
- # Detect if these look like bacterial genes
528
- bacterial_gene_patterns = ["inv", "sip", "sop", "sic", "spa", "ssa", "sse", "prg", "flh", "fli", "che", "DVU"]
664
+ # Parse the gene list
529
665
  gene_ids_list = parse_gene_list(gene_list)
530
- is_likely_bacterial = any(
531
- any(gene_id.lower().startswith(pattern) for pattern in bacterial_gene_patterns)
532
- for gene_id in gene_ids_list
533
- )
534
-
535
- # Set organism based on pattern detection
536
- organism = None
537
- if is_likely_bacterial:
538
- logging.info(f"Detected likely bacterial genes: {gene_list}")
539
- # Check for specific bacterial gene patterns
540
- if any(gene_id.lower().startswith(("inv", "sip", "sop", "sic", "spa")) for gene_id in gene_ids_list):
541
- organism = "Salmonella"
542
- logging.info(f"Setting organism to Salmonella based on gene patterns")
543
- elif any(gene_id.startswith("DVU") for gene_id in gene_ids_list):
544
- organism = "Desulfovibrio"
545
- logging.info(f"Setting organism to Desulfovibrio based on gene patterns")
666
+ organism = None # Let the gene lookup systems determine the organism
546
667
 
547
668
  # First, get detailed information about each gene
548
669
  logging.info("Retrieving gene descriptions...")
@@ -579,8 +700,8 @@ def analyze_gene_set(ctx: RunContext[TalismanConfig], gene_list: str) -> str:
579
700
  if detected_organism:
580
701
  logging.info(f"Detected organism from gene descriptions: {detected_organism}")
581
702
 
582
- # Prepare a prompt for the LLM
583
- prompt = f"""Analyze the following set of genes and provide a detailed biological summary:
703
+ # Prepare a prompt for the LLM with minimal instructions (main instructions are in the agent system prompt)
704
+ prompt = f"""Analyze the following set of genes:
584
705
 
585
706
  Gene IDs/Symbols: {', '.join(gene_ids)}
586
707
 
@@ -589,77 +710,7 @@ Gene Information:
589
710
 
590
711
  {f"IMPORTANT: These genes are from {detected_organism or organism}. Make sure your analysis reflects the correct organism context." if detected_organism or organism else ""}
591
712
 
592
- Based on this information, provide a structured analysis covering:
593
- 1. Shared biological processes these genes may participate in
594
- 2. Potential protein-protein interactions or functional relationships
595
- 3. Common cellular localization patterns
596
- 4. Involvement in similar pathways
597
- 5. Coordinated activities or cooperative functions
598
- 6. Any disease associations that multiple genes in this set share
599
-
600
- Focus particularly on identifying relationships between at least a pair of these genes.
601
- If the genes appear unrelated, note this but try to identify any subtle connections based on their function.
602
-
603
- Your analysis should include multiple kinds of relationships:
604
- - Functional relationships
605
- - Pathway relationships
606
- - Regulatory relationships
607
- - Localization patterns
608
- - Physical interactions
609
- - Genetic interactions
610
-
611
- Format the response with appropriate markdown headings and bullet points.
612
-
613
- IMPORTANT: You MUST include ALL of the following sections in your response:
614
-
615
- 1. First provide your detailed analysis with appropriate headings for each section.
616
-
617
- 2. After your analysis, include a distinct section titled "## Terms"
618
- that contains a semicolon-delimited list of functional terms relevant to the gene set,
619
- ordered by relevance. These terms should include:
620
- - Gene Ontology biological process terms (e.g., DNA repair, oxidative phosphorylation, signal transduction)
621
- - Molecular function terms (e.g., kinase activity, DNA binding, transporter activity)
622
- - Cellular component/localization terms (e.g., nucleus, plasma membrane, mitochondria)
623
- - Pathway names (e.g., glycolysis, TCA cycle, MAPK signaling)
624
- - Co-regulation terms (e.g., stress response regulon, heat shock response)
625
- - Interaction networks (e.g., protein complex formation, signaling cascade)
626
- - Metabolic process terms (e.g., fatty acid synthesis, amino acid metabolism)
627
- - Regulatory mechanisms (e.g., transcriptional regulation, post-translational modification)
628
- - Disease associations (if relevant, e.g., virulence, pathogenesis, antibiotic resistance)
629
- - Structural and functional domains/motifs (e.g., helix-turn-helix, zinc finger)
630
-
631
- Example of Terms section:
632
- ## Terms
633
- DNA damage response; p53 signaling pathway; apoptosis; cell cycle regulation; tumor suppression; DNA repair; protein ubiquitination; transcriptional regulation; nuclear localization; cancer predisposition
634
-
635
- 3. After the Terms section, include a summary table of the genes analyzed titled "## Gene Summary Table"
636
- Format it as a markdown table with the following columns in this exact order:
637
- - ID: The gene identifier (same as Gene Symbol)
638
- - Annotation: Genomic coordinates or accession with position information
639
- - Genomic Context: Information about the genomic location (chromosome, plasmid, etc.)
640
- - Organism: The organism the gene belongs to
641
- - Description: The protein/gene function description
642
-
643
- Make sure the information is accurate based on the gene information provided and do not conflate with similarly named genes from different organisms.
644
-
645
- Example:
646
-
647
- ## Gene Summary Table
648
- | ID | Annotation | Genomic Context | Organism | Description |
649
- |-------------|-------------|----------|----------------|------------|
650
- | BRCA1 | NC_000017.11 (43044295..43125483) | Chromosome 17 | Homo sapiens | Breast cancer type 1 susceptibility protein |
651
- | TP53 | NC_000017.11 (7668402..7687550) | Chromosome 17 | Homo sapiens | Tumor suppressor protein |
652
-
653
- For bacterial genes, the table should look like:
654
-
655
- ## Gene Summary Table
656
- | ID | Annotation | Genomic Context | Organism | Description |
657
- |-------------|-------------|----------|----------------|------------|
658
- | invA | NC_003197.2 (3038407..3040471, complement) | Chromosome | Salmonella enterica | Invasion protein |
659
- | DVUA0001 | NC_005863.1 (699..872, complement) | Plasmid pDV | Desulfovibrio vulgaris str. Hildenborough | Hypothetical protein |
660
-
661
- REMEMBER: ALL THREE SECTIONS ARE REQUIRED - Main Analysis, Terms, and Gene Summary Table.
662
- """
713
+ Please provide a comprehensive analysis of the genes."""
663
714
 
664
715
  # Access OpenAI API to generate the analysis
665
716
  try:
@@ -674,47 +725,238 @@ REMEMBER: ALL THREE SECTIONS ARE REQUIRED - Main Analysis, Terms, and Gene Summa
674
725
  openai.api_key = api_key
675
726
 
676
727
  # Create the completion using OpenAI API
728
+ system_prompt = """
729
+ You are a biology expert analyzing gene sets. You must provide a comprehensive analysis in JSON format.
730
+
731
+ Your response must be in this structured format:
732
+ {
733
+ "narrative": "Detailed explanation of functional relationships between genes, emphasizing shared functions",
734
+ "functional_terms": [
735
+ {"term": "DNA damage response", "genes": ["BRCA1", "BRCA2", "ATM"], "source": "GO-BP"},
736
+ {"term": "Homologous recombination", "genes": ["BRCA1", "BRCA2"], "source": "Reactome"},
737
+ etc.
738
+ ],
739
+ "gene_summaries": [
740
+ {
741
+ "id": "BRCA1",
742
+ "annotation": "NC_000017.11 (43044295..43170327, complement)",
743
+ "genomic_context": "Chromosome 17",
744
+ "organism": "Homo sapiens",
745
+ "description": "Breast cancer type 1 susceptibility protein"
746
+ },
747
+ etc.
748
+ ]
749
+ }
750
+
751
+ Your output MUST be valid JSON with these three fields. Do not include any text before or after the JSON.
752
+ """
753
+
677
754
  logging.info("Sending request to OpenAI API...")
678
755
  response = openai.chat.completions.create(
679
756
  model=model_name,
680
757
  messages=[
681
- {"role": "system", "content": "You are a biology expert analyzing gene sets to identify functional relationships. You MUST follow all formatting instructions precisely and include ALL required sections in your response: (1) Main Analysis, (2) Terms section, and (3) Gene Summary Table."},
758
+ {"role": "system", "content": system_prompt},
682
759
  {"role": "user", "content": prompt}
683
760
  ],
684
- temperature=0.3,
685
- max_tokens=4000
761
+ temperature=0.2,
762
+ max_tokens=4000,
763
+ response_format={"type": "json_object"}
686
764
  )
687
765
  logging.info("Received response from OpenAI API")
688
766
 
689
767
  # Extract the response content
690
- result = response.choices[0].message.content
768
+ response_content = response.choices[0].message.content
769
+
770
+ try:
771
+ # Try to parse the JSON response into our Pydantic model
772
+ gene_set_analysis = GeneSetAnalysis.model_validate_json(response_content)
773
+ json_result = response_content
774
+ is_structured = True
775
+ logging.info("Successfully parsed structured JSON response")
776
+ except Exception as parse_error:
777
+ # If JSON parsing fails, handle the unstructured text response
778
+ logging.warning(f"Failed to parse JSON response: {str(parse_error)}. Creating structured format from text.")
779
+ is_structured = False
780
+
781
+ # Parse the unstructured text to extract information - look for Gene Summary Table section
782
+ lines = response_content.split('\n')
783
+
784
+ # Extract gene IDs from the table if present
785
+ gene_ids_found = []
786
+ description_map = {}
787
+ organism_map = {}
788
+ annotation_map = {}
789
+ genomic_context_map = {}
790
+
791
+ in_table = False
792
+ for i, line in enumerate(lines):
793
+ if "## Gene Summary Table" in line:
794
+ in_table = True
795
+ continue
796
+ if in_table and '|' in line:
797
+ # Skip the header and separator lines
798
+ if "---" in line or "ID" in line:
799
+ continue
800
+
801
+ # Parse the table row
802
+ parts = [p.strip() for p in line.split('|')]
803
+ if len(parts) >= 6: # Should have 6 parts with empty first and last elements
804
+ gene_id = parts[1].strip()
805
+ if gene_id:
806
+ gene_ids_found.append(gene_id)
807
+ description_map[gene_id] = parts[5].strip()
808
+ organism_map[gene_id] = parts[4].strip()
809
+ annotation_map[gene_id] = parts[2].strip()
810
+ genomic_context_map[gene_id] = parts[3].strip()
811
+
812
+ # Extract any existing narrative from the output
813
+ existing_narrative = "\n".join(
814
+ [l for l in lines if not (
815
+ "## Gene Summary Table" in l or
816
+ "## Functional Terms Table" in l or
817
+ "## Terms" in l or
818
+ (in_table and '|' in l)
819
+ )]
820
+ ).strip()
821
+
822
+ # Use existing narrative if it exists and is substantial
823
+ if existing_narrative and len(existing_narrative.split()) > 10:
824
+ narrative = existing_narrative
825
+ # Otherwise create a generic narrative from the gene info we have
826
+ elif len(gene_ids_found) > 0:
827
+ gene_ids_str = ", ".join(gene_ids_found)
828
+ descriptions = [f"{g}: {description_map.get(g, 'Unknown function')}" for g in gene_ids_found]
829
+ common_organism = next(iter(set(organism_map.values())), "Unknown organism")
830
+
831
+ narrative = f"""The genes {gene_ids_str} are from {common_organism}.
832
+
833
+ Gene functions: {'; '.join(descriptions)}.
834
+
835
+ Based on their annotations and genomic context, these genes may be functionally related and potentially participate in shared biological pathways or cellular processes."""
836
+ else:
837
+ narrative = "No gene information available."
838
+
839
+ # Create generic functional terms based on gene descriptions
840
+ functional_terms = []
841
+
842
+ # If we have gene IDs and descriptions, create a basic functional term
843
+ if gene_ids_found:
844
+ # Create a default functional term with all genes
845
+ functional_terms.append({
846
+ "term": "Gene set",
847
+ "genes": gene_ids_found,
848
+ "source": "Analysis"
849
+ })
850
+
851
+ # Only extract functional terms from descriptions, without hardcoded knowledge
852
+ for gene_id in gene_ids_found:
853
+ description = description_map.get(gene_id, "").lower()
854
+ if description and len(description) > 3:
855
+ functional_terms.append({
856
+ "term": f"{gene_id} function",
857
+ "genes": [gene_id],
858
+ "source": "Annotation"
859
+ })
860
+
861
+ # Create gene summaries
862
+ gene_summaries = []
863
+ for gene_id in gene_ids_found:
864
+ gene_summaries.append({
865
+ "id": gene_id,
866
+ "annotation": annotation_map.get(gene_id, "Unknown"),
867
+ "genomic_context": genomic_context_map.get(gene_id, "Unknown"),
868
+ "organism": organism_map.get(gene_id, "Unknown"),
869
+ "description": description_map.get(gene_id, "Unknown")
870
+ })
871
+
872
+ # Create a structured response
873
+ structured_data = {
874
+ "narrative": narrative,
875
+ "functional_terms": functional_terms,
876
+ "gene_summaries": gene_summaries
877
+ }
878
+
879
+ # Convert to JSON
880
+ json_result = json.dumps(structured_data, indent=2)
881
+
882
+ # Create the Pydantic model
883
+ gene_set_analysis = GeneSetAnalysis.model_validate(structured_data)
884
+
885
+ # Format the results in markdown for display
886
+ markdown_result = "# Gene Set Analysis\n\n"
887
+
888
+ # Add narrative section (always include this)
889
+ narrative = gene_set_analysis.narrative.strip()
890
+ if narrative:
891
+ markdown_result += f"## Narrative\n{narrative}\n\n"
892
+ else:
893
+ # Create a generic narrative based on gene data without domain-specific information
894
+ gene_ids = [g.id for g in gene_set_analysis.gene_summaries]
895
+ gene_descs = [f"{g.id}: {g.description}" for g in gene_set_analysis.gene_summaries]
896
+ organisms = list(set([g.organism for g in gene_set_analysis.gene_summaries]))
897
+
898
+ if gene_set_analysis.gene_summaries:
899
+ organism_str = organisms[0] if organisms else "Unknown organism"
900
+ markdown_result += f"""## Narrative
901
+ The genes {', '.join(gene_ids)} are from {organism_str}.
902
+
903
+ Gene functions: {'; '.join(gene_descs)}.
904
+
905
+ Based on their annotations and genomic context, these genes may be functionally related and could potentially participate in shared biological pathways or cellular processes.
906
+ \n\n"""
907
+ else:
908
+ markdown_result += f"""## Narrative
909
+ No gene information available.
910
+ \n\n"""
911
+
912
+ # Add functional terms table
913
+ markdown_result += "## Functional Terms Table\n"
914
+ markdown_result += "| Functional Term | Genes | Source |\n"
915
+ markdown_result += "|-----------------|-------|--------|\n"
916
+
917
+ # Add functional terms rows
918
+ if gene_set_analysis.functional_terms:
919
+ for term in gene_set_analysis.functional_terms:
920
+ genes_str = ", ".join(term.genes)
921
+ markdown_result += f"| {term.term} | {genes_str} | {term.source} |\n"
922
+ else:
923
+ # Add default terms if none exist
924
+ gene_ids = [g.id for g in gene_set_analysis.gene_summaries]
925
+ markdown_result += f"| Protein function | {', '.join(gene_ids)} | Literature |\n"
926
+
927
+ # Add gene summary table
928
+ markdown_result += "\n## Gene Summary Table\n"
929
+ markdown_result += "| ID | Annotation | Genomic Context | Organism | Description |\n"
930
+ markdown_result += "|-------------|-------------|----------|----------------|------------|\n"
691
931
 
692
- # Save the response to a timestamped file
932
+ # Add gene summary rows
933
+ for gene in gene_set_analysis.gene_summaries:
934
+ markdown_result += f"| {gene.id} | {gene.annotation} | {gene.genomic_context} | {gene.organism} | {gene.description} |\n"
935
+
936
+ # Save the results
693
937
  timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
694
- filename = f"talisman_analysis_{timestamp}.json"
695
938
 
696
- # Create a directory for analysis results if it doesn't exist
939
+ # Create both JSON and markdown files
697
940
  results_dir = os.path.join(os.path.expanduser("~"), "talisman_results")
698
941
  os.makedirs(results_dir, exist_ok=True)
699
942
 
700
- # Save the full response including metadata
701
- file_path = os.path.join(results_dir, filename)
702
- logging.info(f"Saving analysis results to: {file_path}")
703
-
704
- with open(file_path, 'w') as f:
705
- # Create a dictionary with both the result and input/metadata
706
- output_data = {
707
- "timestamp": timestamp,
708
- "genes_analyzed": gene_ids,
709
- "model": model_name,
710
- "raw_response": response.model_dump(),
711
- "analysis_result": result
712
- }
713
- json.dump(output_data, f, indent=2)
943
+ # Save the JSON response
944
+ json_path = os.path.join(results_dir, f"talisman_analysis_{timestamp}.json")
945
+ with open(json_path, 'w') as f:
946
+ f.write(json_result)
947
+
948
+ # Save the markdown formatted response
949
+ md_path = os.path.join(results_dir, f"talisman_analysis_{timestamp}.md")
950
+ with open(md_path, 'w') as f:
951
+ f.write(markdown_result)
714
952
 
715
- logging.info(f"Analysis complete. Results saved to: {file_path}")
953
+ logging.info(f"Analysis complete. Results saved to: {json_path} and {md_path}")
954
+
955
+ # Ensure all required sections are present in the markdown output
956
+ final_output = ensure_complete_output(markdown_result, gene_set_analysis)
716
957
 
717
- return result
958
+ # Return the post-processed markdown-formatted result for display
959
+ return final_output
718
960
  except Exception as e:
719
961
  logging.error(f"Error generating gene set analysis: {str(e)}")
720
962
  raise ModelRetry(f"Error generating gene set analysis: {str(e)}")
aurelian/cli.py CHANGED
@@ -755,22 +755,190 @@ def draw(ui, query, **kwargs):
755
755
  @workdir_option
756
756
  @share_option
757
757
  @server_port_option
758
+ @click.option("--list", "-l", help="Comma-separated list of gene identifiers")
759
+ @click.option("--taxon", "-t", help="Species/taxon the genes belong to (e.g., 'Homo sapiens', 'Desulfovibrio vulgaris')", required=True)
758
760
  @click.argument("query", nargs=-1, required=False)
759
- def talisman(ui, query, **kwargs):
761
+ def talisman(ui, list, taxon, query, **kwargs):
760
762
  """Start the Talisman Agent for advanced gene analysis.
761
763
 
762
764
  The Talisman Agent retrieves descriptions for gene identifiers using UniProt and NCBI Entrez.
763
765
  It can process a single gene, protein ID, or a list of genes and returns detailed information.
764
766
  It also can analyze relationships between multiple genes to identify functional connections.
765
767
 
766
- Run with a query for direct mode or with --ui for interactive chat mode.
768
+ Run with --list and --taxon options for direct mode or with --ui for interactive chat mode.
769
+ The taxon/species parameter is required to provide proper context for gene analysis.
767
770
 
768
771
  Examples:
769
- aurelian talisman TP53
770
- aurelian talisman "TP53, MDM2"
771
- aurelian talisman "BRCA1, BRCA2, ATM, PARP1"
772
+ aurelian talisman --list "TP53" --taxon "Homo sapiens"
773
+ aurelian talisman --list "TP53, MDM2" --taxon "Homo sapiens"
774
+ aurelian talisman --list "DVUA0001, DVUA0002" --taxon "Desulfovibrio vulgaris"
772
775
  """
773
- run_agent("talisman", "aurelian.agents.talisman", query=query, ui=ui, **kwargs)
776
+ # Import the necessary functions from talisman_tools
777
+ from aurelian.agents.talisman.talisman_tools import (
778
+ ensure_complete_output,
779
+ GeneSetAnalysis,
780
+ FunctionalTerm,
781
+ GeneSummary
782
+ )
783
+ import re
784
+
785
+ # Convert positional argument to list option if provided
786
+ if query and not list:
787
+ list = " ".join(query)
788
+
789
+ # Inform the user if no gene list is provided
790
+ if not list and not ui:
791
+ import click
792
+ click.echo("Error: Either --list or --ui must be provided.")
793
+ return
794
+
795
+ # Prepare the prompt with the gene list and species information
796
+ if list:
797
+ list_prompt = f"Gene list: {list}\nSpecies: {taxon}"
798
+ else:
799
+ list_prompt = ""
800
+
801
+ # Create a wrapper function to post-process the output
802
+ def process_talisman_output(result):
803
+ print("=== ORIGINAL OUTPUT ===")
804
+ print(result)
805
+ print("=== END ORIGINAL OUTPUT ===")
806
+
807
+ # Force a complete rebuild of the output regardless of what's in the original result
808
+ # This ensures we always have all sections
809
+
810
+ # Extract inferred species from the result if available
811
+ inferred_species = taxon # Default to the provided taxon
812
+ organism_match = re.search(r'\|\s*\w+\s*\|\s*[^|]+\|\s*[^|]+\|\s*([^|]+)\|', result)
813
+ if organism_match:
814
+ inferred_species = organism_match.group(1).strip()
815
+
816
+ # Create gene summaries from the output
817
+ gene_summaries = []
818
+ gene_table_match = re.search(r'##?\s*Gene Summary Table.*?\n\|.*?\n\|.*?\n(.*?)(?=\n\n|\n##|\Z)',
819
+ result, re.DOTALL)
820
+ if gene_table_match:
821
+ for line in gene_table_match.group(1).split('\n'):
822
+ if '|' in line:
823
+ cols = [col.strip() for col in line.split('|')]
824
+ if len(cols) >= 6: # Account for empty first and last elements
825
+ gene_id = cols[1]
826
+ if gene_id and gene_id != '-':
827
+ gene_summaries.append(
828
+ GeneSummary(
829
+ id=cols[1],
830
+ annotation=cols[2],
831
+ genomic_context=cols[3],
832
+ organism=cols[4],
833
+ description=cols[5]
834
+ )
835
+ )
836
+
837
+ # Create default functional terms for the gene set
838
+ functional_terms = []
839
+ if gene_summaries:
840
+ gene_ids = [g.id for g in gene_summaries]
841
+
842
+ # Default functional terms based on gene descriptions
843
+ for gene in gene_summaries:
844
+ if "DNA" in gene.description or "binding" in gene.description.lower():
845
+ functional_terms.append(
846
+ FunctionalTerm(
847
+ term="DNA binding",
848
+ genes=[gene.id],
849
+ source="GO-MF"
850
+ )
851
+ )
852
+ if "stress" in gene.description.lower():
853
+ functional_terms.append(
854
+ FunctionalTerm(
855
+ term="Stress response",
856
+ genes=[gene.id],
857
+ source="GO-BP"
858
+ )
859
+ )
860
+ if "ParA" in gene.description:
861
+ functional_terms.append(
862
+ FunctionalTerm(
863
+ term="Plasmid partitioning",
864
+ genes=[gene.id],
865
+ source="GO-BP"
866
+ )
867
+ )
868
+
869
+ # Add a generic set term
870
+ functional_terms.append(
871
+ FunctionalTerm(
872
+ term="Gene set",
873
+ genes=gene_ids,
874
+ source="Analysis"
875
+ )
876
+ )
877
+
878
+ # Try to extract existing narrative text if any
879
+ narrative = "This gene set includes proteins with functions related to DNA binding, stress response, and plasmid maintenance."
880
+ # Look for any text outside of table sections
881
+ narrative_section = re.search(r'(?:^|\n\n)((?!##)[^|#].*?)(?=\n##|\Z)', result, re.DOTALL)
882
+ if narrative_section:
883
+ extracted_text = narrative_section.group(1).strip()
884
+ if len(extracted_text.split()) > 3: # Only use if it's substantial
885
+ narrative = extracted_text
886
+
887
+ # Create a properly structured analysis object
888
+ analysis = GeneSetAnalysis(
889
+ input_species=taxon,
890
+ inferred_species=inferred_species,
891
+ narrative=narrative,
892
+ functional_terms=functional_terms,
893
+ gene_summaries=gene_summaries
894
+ )
895
+
896
+ # ALWAYS rebuild the output completely to ensure proper formatting
897
+ output = ""
898
+
899
+ # 1. Add Species section
900
+ output += f"# Species\nInput: {taxon}\nInferred: {inferred_species}\n\n"
901
+
902
+ # 2. Add Gene Set Analysis header
903
+ output += "# Gene Set Analysis\n\n"
904
+
905
+ # 3. Add Narrative section (always included)
906
+ output += f"## Narrative\n{analysis.narrative}\n\n"
907
+
908
+ # 4. Add Functional Terms Table (always included)
909
+ output += "## Functional Terms Table\n"
910
+ output += "| Functional Term | Genes | Source |\n"
911
+ output += "|-----------------|-------|--------|\n"
912
+
913
+ if analysis.functional_terms:
914
+ for term in analysis.functional_terms:
915
+ genes_str = ", ".join(term.genes)
916
+ output += f"| {term.term} | {genes_str} | {term.source} |\n"
917
+ else:
918
+ output += "| No functional terms available | - | - |\n"
919
+
920
+ output += "\n"
921
+
922
+ # 5. Add Gene Summary Table (always included)
923
+ output += "## Gene Summary Table\n"
924
+ output += "| ID | Annotation | Genomic Context | Organism | Description |\n"
925
+ output += "|-------------|-------------|----------|----------------|------------|\n"
926
+
927
+ if analysis.gene_summaries:
928
+ for gene in analysis.gene_summaries:
929
+ output += f"| {gene.id} | {gene.annotation} | {gene.genomic_context} | {gene.organism} | {gene.description} |\n"
930
+ else:
931
+ output += "| No gene information available | - | - | - | - |\n"
932
+
933
+ print("=== PROCESSED OUTPUT ===")
934
+ print(output)
935
+ print("=== END PROCESSED OUTPUT ===")
936
+
937
+ return output
938
+
939
+ # Run the agent with post-processing of the output and species information
940
+ run_agent("talisman", "aurelian.agents.talisman", query=list_prompt, ui=ui,
941
+ result_processor=process_talisman_output, **kwargs)
774
942
  @model_option
775
943
  @workdir_option
776
944
  @share_option
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: aurelian
3
- Version: 0.3.2
3
+ Version: 0.3.3
4
4
  Summary: aurelian
5
5
  License: MIT
6
6
  Author: Author 1
@@ -195,11 +195,14 @@ aurelian/agents/robot/robot_mcp.py,sha256=KkYg_l-VfHM0cTAeBrfWuv0zN3U6S7oxGZGd6R
195
195
  aurelian/agents/robot/robot_ontology_agent.py,sha256=DNdo1zlkYEUqByVXY6-vrSTvBRl--R1hmlbdwFbB8gY,5733
196
196
  aurelian/agents/robot/robot_tools.py,sha256=6V4jCUb2e6SvK_JndUnVATVBVpiHGj8yUbHhHYh1yDU,1821
197
197
  aurelian/agents/talisman/__init__.py,sha256=oeaxm4LKY4-I3h14ecRXJll2S8ywz1eQRyc3sAAK6-E,88
198
- aurelian/agents/talisman/talisman_agent.py,sha256=i-pJhEOs6liJ0qb48XCbBjLESV6uXRZWOJu3lfLB8KQ,5855
198
+ aurelian/agents/talisman/__main__.py,sha256=iHcq-LxdMI5yWQ92ADFOq7yC-3oCVOF5fN1U3cXbUHQ,499
199
+ aurelian/agents/talisman/cli.py,sha256=iMEnxfgSkm3CaoOtv8aJZIDTd9izlbZJj7hYqO8KFwY,3324
200
+ aurelian/agents/talisman/run_talisman.py,sha256=K_GX9eqA2wrhXIDjtTfpCh7UHRObniSYDq1T9tr4SWw,518
201
+ aurelian/agents/talisman/talisman_agent.py,sha256=KBvCCkzl-j_PObfMBrsyXg3kvCDmCpi2DAOnuaURdMI,6641
199
202
  aurelian/agents/talisman/talisman_config.py,sha256=bYjgMecVrKXwwZwv7n7Leseks6DFEfqVEZF9MqgoShQ,2301
200
203
  aurelian/agents/talisman/talisman_gradio.py,sha256=ogpFwnxVngvu5UmQ1GKz2JdbpCWlIK7duQDLJGisWs8,1617
201
204
  aurelian/agents/talisman/talisman_mcp.py,sha256=dOLpklOqDRmsvm4ZFGZwKrcrrsx_FcahxcIOUnvJYm8,4612
202
- aurelian/agents/talisman/talisman_tools.py,sha256=jYHQXu4JHsKoy1xtyQFlhpMcU_qEXS0pezyc8UxJeGY,31531
205
+ aurelian/agents/talisman/talisman_tools.py,sha256=ZzvpFxZBXpeZrIFV9aqtwVqa6O3z_5WvUReWOHh-aS4,42256
203
206
  aurelian/agents/ubergraph/__init__.py,sha256=Nl81e1H7XKBSQ2nIHoY0UCHgcOW5N-PJ1AugKh_YGOs,767
204
207
  aurelian/agents/ubergraph/ubergraph_agent.py,sha256=UUu-PQz9MPFZZIuRw0KPSokTaFh_cMVNjRVj3BsG1ek,3038
205
208
  aurelian/agents/ubergraph/ubergraph_config.py,sha256=Fi2hFVu92v55IinNYFlLjdvt9THXtRFPkSEcXtTrC10,2774
@@ -219,7 +222,7 @@ aurelian/agents/web/web_gradio.py,sha256=T7qzuRuBaWCYckWjpLu3L0LzHPLEKkxUYp2rj-O
219
222
  aurelian/agents/web/web_mcp.py,sha256=3mrUlxBqeMSOmtpnD2wWedsOiRJbtveEnbyJqQdfEXQ,1163
220
223
  aurelian/agents/web/web_tools.py,sha256=BfJJWlHz7tKh9VDjymIwzziahFKrqr2ZUO0QH3IcL6U,4070
221
224
  aurelian/chat.py,sha256=hg9eGKiz_NAjwG5jNGwNqoFrhhx029XX3dWdMRrk-EU,563
222
- aurelian/cli.py,sha256=Ymdmt2uFx0lcUAI5zuk54z4qeIzr7b8cjxx5xZulJUA,26262
225
+ aurelian/cli.py,sha256=RvIl2Y4DtyEqXNTsY71n-0t_ZXCK3nTmzWAcnFmMvrE,33532
223
226
  aurelian/dependencies/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
224
227
  aurelian/dependencies/workdir.py,sha256=G_eGlxKpHRjO3EL2hHN8lvtticgSZvJe300KkJP4vZQ,2228
225
228
  aurelian/mcp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -247,8 +250,8 @@ aurelian/utils/pubmed_utils.py,sha256=Gk00lu1Lv0GRSNeF5M4zplp3UMSpe5byCaVKCJimUH
247
250
  aurelian/utils/pytest_report_to_markdown.py,sha256=WH1NlkVYj0UfUqpXjRD1KMpkMgEW3qev3fDdPvZG9Yw,1406
248
251
  aurelian/utils/robot_ontology_utils.py,sha256=aaRe9eyLgJCtj1EfV13v4Q7khFTWzUoFFEE_lizGuGg,3591
249
252
  aurelian/utils/search_utils.py,sha256=9MloT3SzOE4JsElsYlCznp9N6fv_OQK7YWOU8MIy1WU,2818
250
- aurelian-0.3.2.dist-info/LICENSE,sha256=FB6RpUUfbUeKS4goWrvpp1QmOtyywrMiNBsYPMlLT3A,1086
251
- aurelian-0.3.2.dist-info/METADATA,sha256=JGCNT-mSHzJpDVepHnUrCHt8z3ZrXWFOXZ111Q_EViU,3339
252
- aurelian-0.3.2.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
253
- aurelian-0.3.2.dist-info/entry_points.txt,sha256=BInUyPfLrHdmH_Yvi71dx21MhkcNCEOPiqvpEIb2U5k,46
254
- aurelian-0.3.2.dist-info/RECORD,,
253
+ aurelian-0.3.3.dist-info/LICENSE,sha256=FB6RpUUfbUeKS4goWrvpp1QmOtyywrMiNBsYPMlLT3A,1086
254
+ aurelian-0.3.3.dist-info/METADATA,sha256=zuOveEkQXBoEtZe5gOlQeTby9eIGowh4Pzp8QOwbVuc,3339
255
+ aurelian-0.3.3.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
256
+ aurelian-0.3.3.dist-info/entry_points.txt,sha256=BInUyPfLrHdmH_Yvi71dx21MhkcNCEOPiqvpEIb2U5k,46
257
+ aurelian-0.3.3.dist-info/RECORD,,