npcsh 1.0.31__py3-none-any.whl → 1.0.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. npcsh/alicanto.py +1001 -1015
  2. npcsh/corca.py +61 -21
  3. npcsh/routes.py +16 -15
  4. {npcsh-1.0.31.dist-info → npcsh-1.0.32.dist-info}/METADATA +1 -1
  5. {npcsh-1.0.31.dist-info → npcsh-1.0.32.dist-info}/RECORD +35 -35
  6. {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/alicanto.npc +0 -0
  7. {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/alicanto.png +0 -0
  8. {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/bash_executer.jinx +0 -0
  9. {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/corca.npc +0 -0
  10. {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/corca.png +0 -0
  11. {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/edit_file.jinx +0 -0
  12. {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/foreman.npc +0 -0
  13. {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/frederic.npc +0 -0
  14. {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/frederic4.png +0 -0
  15. {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/guac.png +0 -0
  16. {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/image_generation.jinx +0 -0
  17. {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/internet_search.jinx +0 -0
  18. {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/kadiefa.npc +0 -0
  19. {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/kadiefa.png +0 -0
  20. {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/npcsh.ctx +0 -0
  21. {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/npcsh_sibiji.png +0 -0
  22. {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/plonk.npc +0 -0
  23. {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/plonk.png +0 -0
  24. {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/plonkjr.npc +0 -0
  25. {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/plonkjr.png +0 -0
  26. {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/python_executor.jinx +0 -0
  27. {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/screen_cap.jinx +0 -0
  28. {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/sibiji.npc +0 -0
  29. {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/sibiji.png +0 -0
  30. {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/spool.png +0 -0
  31. {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/yap.png +0 -0
  32. {npcsh-1.0.31.dist-info → npcsh-1.0.32.dist-info}/WHEEL +0 -0
  33. {npcsh-1.0.31.dist-info → npcsh-1.0.32.dist-info}/entry_points.txt +0 -0
  34. {npcsh-1.0.31.dist-info → npcsh-1.0.32.dist-info}/licenses/LICENSE +0 -0
  35. {npcsh-1.0.31.dist-info → npcsh-1.0.32.dist-info}/top_level.txt +0 -0
npcsh/alicanto.py CHANGED
@@ -1,1089 +1,1075 @@
1
+ import json
2
+ import requests
3
+ import argparse
1
4
  import os
2
- import random
3
- from typing import List, Dict, Any, Optional, Union, Tuple
4
- import numpy as np
5
- from collections import defaultdict, Counter
6
- import itertools
7
- import matplotlib.pyplot as plt
8
- from matplotlib.figure import Figure
9
- from io import BytesIO
10
- import base64
11
- import datetime
12
- import tempfile
13
5
  import subprocess
14
- import networkx as nx
6
+ import tempfile
7
+ import random
8
+ import shutil
9
+ from typing import List, Dict, Any, Optional, Tuple
10
+ from datetime import datetime
11
+ from dataclasses import dataclass, asdict, field
12
+ from pathlib import Path
13
+ from concurrent.futures import ThreadPoolExecutor
15
14
 
16
- from npcpy.npc_compiler import NPC
17
- from npcpy.llm_funcs import get_llm_response, extract_facts, identify_groups, assign_groups_to_fact
15
+ from npcpy.tools import auto_tools
16
+ from npcpy.llm_funcs import get_llm_response
17
+ from npcpy.data.web import search_web
18
+ from npcpy.npc_compiler import NPC, Team
18
19
  from npcsh._state import NPCSH_CHAT_MODEL, NPCSH_CHAT_PROVIDER
19
- from npcpy.npc_sysenv import print_and_process_stream_with_markdown
20
-
21
-
22
-
23
- def generate_random_npcs(num_npcs: int,
24
- model: str,
25
- provider: str,
26
- request: str) -> List[NPC]:
27
- """
28
- Generate a diverse set of NPCs with different expertise and perspectives
29
- related to the research request.
30
- """
31
-
32
- if num_npcs == 1:
33
-
34
- name = f"Expert Researcher on {request}"
35
- expertise = "Interdisciplinary semantic theory researcher"
36
- background = "Extensive experience in linguistics, cognitive science, and NLP"
37
- perspective = "Combines formal logic with empirical linguistic evidence"
38
- quirk = "Uses mathematical metaphors to explain language phenomena"
39
- biases = "May favor formal approaches over descriptive linguistics"
40
-
41
- system_prompt = f"""
42
- You are {name}, {expertise}.
43
-
44
- Background: {background}
45
-
46
- Your perspective: {perspective}
47
-
48
- Your methodological quirk: {quirk}
49
-
50
- Note: Be aware that you may have these biases: {biases}
51
-
52
- Your task is to research the given topic thoroughly, focusing on your unique perspective.
53
- Challenge conventional thinking and identify unexpected connections.
54
- Your insights should be provocative and novel.
55
-
56
- IMPORTANT: You must be extremely concise. Limit responses to 50-75 words maximum.
57
- """
58
-
59
- npc = NPC(name=name, primary_directive=f"Research expert on {request}")
60
- npc.system_prompt = system_prompt
61
- return [npc]
62
-
63
-
64
- prompt = f"""
65
- For the research topic: "{request}"
66
-
67
- Generate {num_npcs} diverse expert personas who would have different valuable perspectives on this topic.
68
- I need truly diverse and unusual viewpoints that can lead to innovative insights.
69
-
70
- For each expert, provide:
71
- 1. A name
72
- 2. Their field of expertise (be creative - include unconventional and interdisciplinary fields)
73
- 3. Their background/experience (include unusual career paths and perspectives)
74
- 4. Their unique perspective or approach to the topic (emphasize contrarian, minority, or unexpected viewpoints)
75
- 5. A methodological quirk that makes their research approach unusual
76
- 6. Any potential biases they might have
77
- """
20
+
21
+ from litellm.exceptions import Timeout, ContextWindowExceededError
22
+ import pandas as pd
23
+ import numpy as np
24
+
25
+ from npcsh.wander import perform_single_wandering
26
+
27
+ @dataclass
28
+ class ResearchStep:
29
+ step: int
30
+ thought: str
31
+ action: str
32
+ outcome: str
33
+
34
+ @dataclass
35
+ class SubAgentTrace:
36
+ hypothesis: str
37
+ agent_name: str
38
+ agent_persona: str
39
+ steps: List[ResearchStep] = field(default_factory=list)
40
+ final_files: Dict[str, str] = field(default_factory=dict)
41
+ was_successful: bool = False
42
+
43
+ @dataclass
44
+ class Paper:
45
+ title: str = ""
46
+ abstract: str = ""
47
+ introduction: List[str] = field(default_factory=list)
48
+ methods: List[str] = field(default_factory=list)
49
+ results: List[str] = field(default_factory=list)
50
+ discussion: List[str] = field(default_factory=list)
51
+
52
+ def create_file(filename: str, content: str) -> str:
53
+ filepath = os.path.abspath(filename)
54
+ if os.path.exists(filepath):
55
+ return f"Error: File '{filename}' already exists. Use append_to_file or replace_in_file to modify."
56
+ os.makedirs(os.path.dirname(filepath), exist_ok=True)
57
+ with open(filepath, 'w') as f:
58
+ f.write(content)
59
+ return f"File '{filename}' created successfully."
60
+
61
+ def append_to_file(filename: str, content: str) -> str:
62
+ filepath = os.path.abspath(filename)
63
+ if not os.path.exists(filepath):
64
+ return f"Error: File '{filename}' not found. Use create_file first."
65
+ with open(filepath, 'a') as f:
66
+ f.write("\n" + content)
67
+ return f"Content appended to '{filename}'."
68
+
69
+ def replace_in_file(filename: str, old_content: str, new_content: str) -> str:
70
+ filepath = os.path.abspath(filename)
71
+ if not os.path.exists(filepath):
72
+ return f"Error: File '{filename}' not found."
73
+ with open(filepath, 'r') as f:
74
+ file_contents = f.read()
75
+ file_contents = file_contents.replace(old_content, new_content)
76
+ with open(filepath, 'w') as f:
77
+ f.write(file_contents)
78
+ return f"Content in '{filename}' replaced."
79
+
80
+ def read_file(filename: str) -> str:
81
+ filepath = os.path.abspath(filename)
82
+ if not os.path.exists(filepath):
83
+ return f"Error: File '{filename}' not found."
84
+ with open(filepath, 'r') as f:
85
+ return f.read()
86
+
87
+ def list_files(directory: str = ".") -> List[str]:
88
+ return os.listdir(directory)
89
+
90
+
91
+
92
+ from datasets import load_dataset
93
+ from sklearn.feature_extraction.text import TfidfVectorizer
94
+ from sklearn.metrics.pairwise import cosine_similarity
95
+
96
+ DATASET_CACHE = None
97
+ SEARCH_INDEX = None
98
+
99
+ def load_and_combine_datasets() -> pd.DataFrame:
100
+ all_papers = []
78
101
 
79
- response = get_llm_response(
80
- prompt=prompt,
81
- model=model,
82
- provider=provider,
83
- format="json"
84
- )
102
+ try:
103
+ research_papers = load_dataset("ta-datalab/research_papers", split="train")
104
+ for paper in research_papers:
105
+ all_papers.append({
106
+ 'title': paper.get('title', ''),
107
+ 'abstract': paper.get('abstract', ''),
108
+ 'authors': paper.get('authors', []),
109
+ 'year': paper.get('year', None),
110
+ 'venue': paper.get('venue', ''),
111
+ 'url': paper.get('url', ''),
112
+ 'paperId': paper.get('id', ''),
113
+ 'citationCount': 0,
114
+ 'source': 'research_papers'
115
+ })
116
+ except Exception as e:
117
+ print(f"Failed to load ta-datalab/research_papers: {e}")
85
118
 
86
-
87
- experts_data = response.get('response', [])
119
+ try:
120
+ ml_papers = load_dataset("CShorten/ML-ArXiv-Papers", split="train")
121
+ for paper in ml_papers:
122
+ all_papers.append({
123
+ 'title': paper.get('title', ''),
124
+ 'abstract': paper.get('abstract', ''),
125
+ 'authors': paper.get('authors', '').split(', ') if paper.get('authors') else [],
126
+ 'year': paper.get('year', None),
127
+ 'venue': 'arXiv',
128
+ 'url': paper.get('url', ''),
129
+ 'paperId': paper.get('id', ''),
130
+ 'citationCount': 0,
131
+ 'source': 'ml_arxiv'
132
+ })
133
+ except Exception as e:
134
+ print(f"Failed to load CShorten/ML-ArXiv-Papers: {e}")
88
135
 
89
-
90
- npcs = []
136
+ try:
137
+ astro_papers = load_dataset("ashishkgpian/astrorag_papers", split="train")
138
+ for paper in astro_papers:
139
+ all_papers.append({
140
+ 'title': paper.get('title', ''),
141
+ 'abstract': paper.get('abstract', ''),
142
+ 'authors': paper.get('authors', []),
143
+ 'year': paper.get('year', None),
144
+ 'venue': paper.get('venue', ''),
145
+ 'url': paper.get('url', ''),
146
+ 'paperId': paper.get('id', ''),
147
+ 'citationCount': 0,
148
+ 'source': 'astrorag'
149
+ })
150
+ except Exception as e:
151
+ print(f"Failed to load ashishkgpian/astrorag_papers: {e}")
91
152
 
92
-
93
- if isinstance(experts_data, list):
94
- experts_to_process = experts_data[:num_npcs]
95
- else:
96
-
97
- if isinstance(experts_data, dict):
98
- experts_to_process = [experts_data]
99
- else:
100
-
101
- experts_to_process = [{
102
- "name": f"Expert_1",
103
- "expertise": "Interdisciplinary researcher",
104
- "background": "Diverse academic and practical experience",
105
- "perspective": "Balanced analysis with focus on innovative connections",
106
- "methodological_quirk": "Uses unconventional conceptual frameworks",
107
- "biases": "Tends toward theoretical rather than practical solutions"
108
- }]
153
+ df = pd.DataFrame(all_papers)
154
+ df = df.dropna(subset=['title', 'abstract'])
155
+ df = df[df['abstract'].str.len() > 50]
156
+ return df
157
+
158
+ def create_search_index(df: pd.DataFrame):
159
+ search_texts = df['title'].fillna('') + ' ' + df['abstract'].fillna('')
160
+ vectorizer = TfidfVectorizer(max_features=10000, stop_words='english', ngram_range=(1, 2))
161
+ tfidf_matrix = vectorizer.fit_transform(search_texts)
162
+ return {'vectorizer': vectorizer, 'tfidf_matrix': tfidf_matrix, 'dataframe': df}
163
+
164
+ def initialize_dataset_search():
165
+ global DATASET_CACHE, SEARCH_INDEX
166
+ if DATASET_CACHE is None:
167
+ DATASET_CACHE = load_and_combine_datasets()
168
+ if SEARCH_INDEX is None:
169
+ SEARCH_INDEX = create_search_index(DATASET_CACHE)
170
+ return SEARCH_INDEX
171
+
172
+ import time
173
+
174
+ LAST_S2_REQUEST_TIME = 0
175
+ S2_RATE_LIMIT_DELAY = 1.0
176
+
177
+ def search_semantic_scholar(query: str, limit: int = 10) -> List[Dict[str, Any]]:
178
+ global LAST_S2_REQUEST_TIME
109
179
 
110
- for expert in experts_to_process:
111
- name = expert.get("name", f"Expert_{len(npcs)}")
112
-
113
-
114
- system_prompt = f"""
115
- You are {name}, {expert.get('expertise', 'an expert researcher')}.
116
-
117
- Background: {expert.get('background', 'You have extensive knowledge in your field.')}
118
-
119
- Your perspective: {expert.get('perspective', 'You provide detailed, balanced analysis.')}
120
-
121
- Your methodological quirk: {expert.get('methodological_quirk', 'You approach problems in unconventional ways.')}
122
-
123
- Note: Be aware that you may have these biases: {expert.get('biases', 'None specifically noted.')}
124
-
125
- Your task is to research the given topic thoroughly, focusing on your unique perspective and methodological approach.
126
- Challenge conventional thinking, explore neglected angles, and identify unexpected connections or contradictions.
127
- Your insights should be provocative and novel, not just rehashing mainstream views.
128
-
129
- IMPORTANT: You must be extremely concise. Limit responses to 50-75 words maximum. Focus on substance over verbosity.
130
- Prioritize precision, clarity, and insight density. Eliminate unnecessary words and focus on communicating
131
- the essence of your insights in the most efficient way possible.
132
- """
133
-
134
-
135
- npc = NPC(name=name, primary_directive=f"Research expert on {request}")
136
- npc.system_prompt = system_prompt
137
- npcs.append(npc)
180
+ api_key = os.environ.get('S2_API_KEY')
181
+ if not api_key:
182
+ return []
138
183
 
139
- return npcs
140
-
141
- def generate_research_chain(request: str,
142
- npc: NPC, depth: int,
143
- memory: int = 3,
144
- context: str = None,
145
- model: str = None,
146
- provider: str = None,
147
- exploration_factor: float = 0.3,
148
- creativity_factor: float = 0.5) -> List[str]:
149
- """
150
- Generate a chain of research thoughts from a single NPC, diving deeper with each step.
184
+ current_time = time.time()
185
+ time_since_last = current_time - LAST_S2_REQUEST_TIME
151
186
 
152
- Args:
153
- request: The research question/topic
154
- npc: The NPC generating the research
155
- depth: How many steps of research to perform
156
- memory: How many previous steps to include in context
157
- context: Additional context to include
158
- model: LLM model to use
159
- provider: LLM provider to use
160
- exploration_factor: Probability (0-1) of exploring a tangential direction
161
- creativity_factor: Probability (0-1) of pursuing highly creative or unusual ideas
187
+ if time_since_last < S2_RATE_LIMIT_DELAY:
188
+ sleep_time = S2_RATE_LIMIT_DELAY - time_since_last
189
+ print(f"Rate limiting: sleeping {sleep_time:.2f}s before S2 request")
190
+ time.sleep(sleep_time)
162
191
 
163
- Returns:
164
- List of research findings/thoughts from this chain
165
- """
166
- chain = []
192
+ LAST_S2_REQUEST_TIME = time.time()
167
193
 
168
-
169
- initial_prompt = f"""
170
- Research request: {request}
194
+ url = "https://api.semanticscholar.org/graph/v1/paper/search"
195
+ headers = {"x-api-key": api_key}
196
+ params = {
197
+ "query": query,
198
+ "limit": limit,
199
+ "fields": "title,abstract,authors,year,citationCount,url,tldr"
200
+ }
171
201
 
172
- {f"Additional context: {context}" if context else ""}
202
+ try:
203
+ response = requests.get(url, headers=headers, params=params,
204
+ timeout=30)
205
+ response.raise_for_status()
206
+ return response.json().get('data', [])
207
+ except requests.exceptions.RequestException as e:
208
+ print(f"Semantic Scholar API error: {e}")
209
+ return []
210
+
211
+ def search_papers(query: str, limit: int = 10) -> List[Dict]:
212
+ s2_results = search_semantic_scholar(query, limit)
213
+ if s2_results:
214
+ return s2_results
215
+
216
+ search_index = initialize_dataset_search()
217
+ query_vector = search_index['vectorizer'].transform([query])
218
+ similarities = cosine_similarity(query_vector, search_index['tfidf_matrix']).flatten()
219
+ top_indices = similarities.argsort()[-limit:][::-1]
220
+ results = [search_index['dataframe'].iloc[idx].to_dict() for idx in top_indices if similarities[idx] > 0.01]
221
+ return results
222
+
223
+ def execute_shell_command(command: str) -> Dict[str, Any]:
224
+ try:
225
+ result = subprocess.run(command, shell=True, capture_output=True, text=True, timeout=60)
226
+ return {
227
+ "success": result.returncode == 0,
228
+ "stdout": result.stdout,
229
+ "stderr": result.stderr
230
+ }
231
+ except Exception as e:
232
+ return {"success": False, "stderr": str(e)}
233
+
234
+ def update_paper(paper_state: Paper, section: str, content: str) -> Paper:
235
+ if not hasattr(paper_state, section):
236
+ return paper_state
237
+ target_section = getattr(paper_state, section)
238
+ if isinstance(target_section, list):
239
+ target_section.append(content)
240
+ else:
241
+ setattr(paper_state, section, content)
242
+ return paper_state
243
+
244
+ def get_creative_ideas_for_stuck_agent(
245
+ problem_description: str,
246
+ npc: NPC,
247
+ model: str,
248
+ provider: str
249
+ ) -> str:
250
+ print(f"\n--- SUB-AGENT {npc.name} IS STUCK, INITIATING WANDER ---")
251
+ _, _, raw_brainstorm, _, _ = perform_single_wandering(
252
+ problem=problem_description,
253
+ npc=npc,
254
+ model=model,
255
+ provider=provider
256
+ )
257
+ return raw_brainstorm
258
+
259
+
260
+ @dataclass
261
+ class FileProvenance:
262
+ filename: str
263
+ step_history: List[Tuple[int, str, str, str]] = field(default_factory=list)
264
+
265
+ def get_filesystem_state() -> Dict[str, str]:
266
+ import hashlib
267
+ files = {}
268
+ for f in os.listdir("."):
269
+ if os.path.isfile(f):
270
+ with open(f, 'rb') as file:
271
+ content = file.read()
272
+ files[f] = hashlib.md5(content).hexdigest()[:8]
273
+ return files
274
+
275
+ def summarize_step(thought: str,
276
+ action: str,
277
+ outcome: str,
278
+ fs_before: Dict[str, str],
279
+ fs_after: Dict[str, str],
280
+ file_provenance: Dict[str, FileProvenance],
281
+ step_num: int,
282
+ model: str,
283
+ provider: str,
284
+ npc: NPC) -> str:
285
+
286
+ import hashlib
287
+ import os
288
+
289
+
290
+ current_files = {}
291
+ for f in os.listdir("."):
292
+ if os.path.isfile(f):
293
+ with open(f, 'rb') as file:
294
+ content = file.read()
295
+ current_files[f] = {
296
+ 'size': len(content),
297
+ 'checksum': hashlib.md5(content).hexdigest()[:8]
298
+ }
299
+
300
+
301
+ for f in fs_after:
302
+ if f not in file_provenance:
303
+ file_provenance[f] = FileProvenance(filename=f)
304
+
305
+ change_summary = ""
306
+ if f not in fs_before:
307
+ change_summary = f"Created with {current_files[f]['size']} bytes"
308
+ file_provenance[f].step_history.append((step_num, "CREATED", fs_after[f], change_summary))
309
+ elif fs_before.get(f) != fs_after[f]:
310
+ change_summary = f"Modified to {current_files[f]['size']} bytes"
311
+ file_provenance[f].step_history.append((step_num, "MODIFIED", fs_after[f], change_summary))
312
+
313
+
314
+ provenance_summary = []
315
+ for filename, prov in file_provenance.items():
316
+ history = "; ".join([f"Step {step}: {action} ({checksum}) - {changes}" for step, action, checksum, changes in prov.step_history])
317
+ provenance_summary.append(f"{filename}: {history}")
318
+
319
+ prompt = f"""AGENT'S REASONING: {thought}
320
+
321
+ AGENT'S ACTION: {action}
322
+ AGENT'S CLAIMED OUTCOME: {outcome}
323
+
324
+ COMPLETE FILE PROVENANCE:
325
+ {chr(10).join(provenance_summary)}
326
+
327
+ CURRENT FILESYSTEM:
328
+ Files: {list(current_files.keys())}
329
+ Details: {current_files}
330
+
331
+ Explain plainly what happened and whether the actions produced any measurable effects. If the agent thinks then it is likely time to direct it to
332
+ carry out a specific action.
333
+
334
+ Return JSON with "summary" and "next_step" keys.""" + """
335
+
336
+ {
337
+ "summary": " a summary of what they did and claimed and the extent to which it produced the intended outcome .",
338
+ "next_step": "The concrete next step for the agent to carry out in their research.
339
+
340
+ }
341
+ """
173
342
 
174
- As {npc.name}, begin your research process by:
175
- 1. Analyzing what you know about this topic
176
- 2. Identifying key questions that need to be explored
177
- 3. Providing initial insights based on your expertise and unique perspective
343
+ response = get_llm_response(prompt, model=model, provider=provider, npc=npc, format='json')
344
+ summary_data = response.get('response')
345
+
346
+ return summary_data
347
+
348
+
349
+
350
+ from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime
351
+ from sqlalchemy.ext.declarative import declarative_base
352
+ from sqlalchemy.orm import sessionmaker
353
+ import csv
354
+ import os
355
+ from datetime import datetime
356
+
357
+ Base = declarative_base()
358
+
359
+ class AlicantoPersona(Base):
360
+ __tablename__ = 'alicanto_personas'
361
+
362
+ id = Column(Integer, primary_key=True, autoincrement=True)
363
+ name = Column(String(255))
364
+ birth_year = Column(Integer)
365
+ location = Column(Text)
366
+ leader = Column(Text)
367
+ interests = Column(Text)
368
+ worldview = Column(Text)
369
+ approach = Column(Text)
370
+ persona_text = Column(Text)
371
+ created_at = Column(DateTime, default=datetime.utcnow)
372
+
373
+ def save_persona_to_databases(persona_data: dict):
374
+ """Save persona to both SQLite and CSV for persistence"""
375
+
376
+
377
+ db_path = os.path.expanduser("~/npcsh_history.db")
378
+ engine = create_engine(f'sqlite:///{db_path}')
379
+ Base.metadata.create_all(engine)
380
+ Session = sessionmaker(bind=engine)
381
+ session = Session()
382
+
383
+
384
+ persona = AlicantoPersona(
385
+ name=persona_data.get('name'),
386
+ birth_year=persona_data.get('birth_year'),
387
+ location=persona_data.get('location'),
388
+ leader=persona_data.get('leader'),
389
+ interests=json.dumps(persona_data.get('interests', [])),
390
+ worldview=persona_data.get('worldview'),
391
+ approach=persona_data.get('approach'),
392
+ persona_text=persona_data.get('persona_text')
393
+ )
178
394
 
179
- BE EXTREMELY CONCISE. Focus on substance over wordiness. Provide clear, high-value insights in 50-75 words maximum.
180
- """
395
+ session.add(persona)
396
+ session.commit()
397
+ session.close()
181
398
 
182
- response = get_llm_response(prompt=initial_prompt, model=model, provider=provider, npc=npc, temperature=0.7)
183
- initial_findings = response.get('response', '')
184
- if isinstance(initial_findings, (list, dict)) or hasattr(initial_findings, '__iter__') and not isinstance(initial_findings, (str, bytes)):
185
- initial_findings = ''.join([str(chunk) for chunk in initial_findings])
186
399
 
187
- chain.append(initial_findings)
400
+ csv_dir = os.path.expanduser("~/.npcsh/npc_team")
401
+ os.makedirs(csv_dir, exist_ok=True)
402
+ csv_path = os.path.join(csv_dir, "alicanto_personas.csv")
188
403
 
189
-
190
- for i in range(1, depth):
191
-
192
- memory_context = "\n\n".join(chain[-memory:]) if len(chain) > 0 else ""
193
-
194
-
195
- next_prompt = f"""
196
- Research request: {request}
197
-
198
- Recent research findings:
199
- {memory_context}
200
-
201
- As {npc.name}, continue your research on this topic. Build on previous insights and explore new aspects.
404
+ file_exists = os.path.exists(csv_path)
405
+ with open(csv_path, 'a', newline='') as csvfile:
406
+ fieldnames = ['name', 'birth_year', 'location', 'leader', 'interests',
407
+ 'worldview', 'approach', 'persona_text', 'created_at']
408
+ writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
202
409
 
203
- BE EXTREMELY CONCISE. Keep your response to 50-75 words maximum.
204
- """
205
-
206
- response = get_llm_response(prompt=next_prompt, model=model, provider=provider, npc=npc, temperature=0.7)
207
- next_findings = response.get('response', '')
208
- if isinstance(next_findings, (list, dict)) or hasattr(next_findings, '__iter__') and not isinstance(next_findings, (str, bytes)):
209
- next_findings = ''.join([str(chunk) for chunk in next_findings])
210
-
211
- chain.append(next_findings)
212
-
213
- return chain
214
-
215
- def format_facts_list(facts: List[str]) -> str:
216
- """Format a list of facts for display in a report"""
217
- return "\n".join([f"• {fact}" for fact in facts])
218
-
219
- def simulate_experiments(research: Dict[str, Any],
220
- request: str,
221
- model: str = None,
222
- provider: str = None,
223
- max_experiments: int = None) -> Dict[str, Dict[str, Any]]:
224
- """
225
- Simulate thought experiments based on research findings
226
-
227
- Args:
228
- research: Consolidated research data
229
- request: Original research question
230
- model: LLM model to use
231
- provider: LLM provider to use
232
- max_experiments: Maximum number of experiments to generate
410
+ if not file_exists:
411
+ writer.writeheader()
412
+
413
+ writer.writerow({
414
+ **persona_data,
415
+ 'interests': json.dumps(persona_data.get('interests', [])),
416
+ 'created_at': datetime.now().isoformat()
417
+ })
418
+
419
+ def generate_sub_agent_personas(topic: str, num_agents: int, model: str, provider: str, npc: NPC) -> List[Dict[str, str]]:
420
+ personas = []
421
+ for i in range(num_agents):
422
+ birth_year = random.randint(-32665, 32665)
423
+ teen_year = birth_year + 16
424
+
425
+ json_template = """
426
+ {
427
+ "name": "culturally appropriate full name for someone born in """ + str(birth_year) + """",
428
+ "location": "specific city/region where they were born in """ + str(birth_year) + """",
429
+ "leader": "who ruled their region when they were 16 years old in """ + str(teen_year) + """",
430
+ "interests": ["3-5 specific interests/obsessions they had as a teenager in """ + str(teen_year) + """"],
431
+ "worldview": "one sentence describing their fundamental perspective shaped by growing up in that era",
432
+ "approach": "how their historical background influences their way of thinking"
433
+ }
434
+ """
233
435
 
234
- Returns:
235
- Dictionary mapping experiment titles to experiment data
236
- """
237
-
238
- facts_context = ""
239
-
240
-
241
- if "fact_groups" in research:
242
- for group, facts in list(research["fact_groups"].items())[:5]:
243
- facts_context += f"\n\nThematic Group: {group}\n"
244
- facts_context += format_facts_list(facts)
245
-
246
-
247
- if "combination_insights" in research:
248
- facts_context += "\n\nEmergent Insights:\n"
249
- for combo in research["combination_insights"][:3]:
250
- facts_context += f"• {combo.get('emergent_insight', '')}\n"
251
-
252
-
253
- prompt = f"""
254
- You are a creative research scientist exploring the topic: "{request}"
255
-
256
- Based on the following research findings:
257
-
258
- {facts_context}
259
-
260
- Design {max_experiments if max_experiments else "3-5"} thought experiments that could test, validate, or extend these insights.
261
-
262
- For each experiment:
263
- 1. Create a descriptive title that captures the experiment's focus
264
- 2. Describe the experimental design/methodology (be specific and detailed)
265
- 3. Predict the potential results and their implications
266
- 4. Explain how these results would advance our understanding of {request}
267
-
268
- Format your response as JSON with this structure:
269
- {{
270
- "experiment_title_1": {{
271
- "design": "detailed description of experimental design",
272
- "results": "predicted results and implications"
273
- }},
274
- "experiment_title_2": {{
275
- ...
276
- }}
277
- }}
278
-
279
- Be bold and imaginative in your experimental designs. Consider unconventional approaches,
280
- simulations, thought experiments, and interdisciplinary methods.
281
- """
282
-
283
- response = get_llm_response(prompt=prompt,
284
- model=model,
285
- provider=provider,
286
- temperature=0.8,
287
- format="json")
288
- experiments = response.get("response", {})
289
-
290
-
291
- if max_experiments and isinstance(experiments, dict) and len(experiments) > max_experiments:
292
-
293
- sorted_exps = sorted(experiments.items(), key=lambda x: len(x[0]), reverse=True)
294
- experiments = dict(sorted_exps[:max_experiments])
295
-
296
- return experiments
297
-
298
- def alicanto(request: str,
299
- num_npcs: int = 5,
300
- depth: int = 3, memory: int = 3,
301
- context: str = None,
302
- model: str = None,
303
- provider: str = None,
304
- exploration_factor: float = 0.3,
305
- creativity_factor: float = 0.5,
306
- output_format: str = "report",
307
- max_facts_per_chain: int = None,
308
- max_thematic_groups: int = None,
309
- max_criticisms_per_group: int = None,
310
- max_conceptual_combinations: int = None,
311
- max_experiments: int = None,
312
- generate_pdf: bool = True) -> Dict[str, Any]:
313
- """
314
- Alicanto: Generate diverse research insights by coordinating multiple NPCs with different expertise.
315
-
316
- Args:
317
- request: The research question/topic
318
- num_npcs: Number of NPCs to generate (with different expertise)
319
- depth: Depth of research for each NPC
320
- memory: How many previous steps to include in context
321
- context: Additional context to include
322
- model: LLM model to use
323
- provider: LLM provider to use
324
- exploration_factor: Probability (0-1) of exploring a tangential direction
325
- creativity_factor: Probability (0-1) of pursuing highly creative or unusual ideas
326
- output_format: Format of the output ("report", "json", "markdown")
327
- max_facts_per_chain: Maximum number of facts to extract per research chain
328
- max_thematic_groups: Maximum number of thematic groups to identify
329
- max_criticisms_per_group: Maximum number of criticisms per thematic group
330
- max_conceptual_combinations: Maximum number of conceptual combinations to generate
331
- max_experiments: Maximum number of experiments to generate
332
- generate_pdf: Whether to generate a PDF report
436
+ prompt = f"Generate a unique persona for someone born in {birth_year}. Return JSON:\n{json_template}\n\nMake this person feel real and historically grounded. Consider: technological context, cultural movements, economic conditions, wars, discoveries happening in {teen_year}."
333
437
 
334
- Returns:
335
- Dictionary with research results
336
- """
337
-
338
- if model is None:
339
- model = NPCSH_CHAT_MODEL
340
- if provider is None:
341
- provider = NPCSH_CHAT_PROVIDER
342
-
343
-
344
- print(f"Generating {num_npcs} diverse researcher NPCs...")
345
- researchers = generate_random_npcs(num_npcs, model, provider, request)
346
-
347
-
348
- print(f"Generating research chains (depth={depth})...")
349
- research_chains = {}
350
- facts_by_researcher = {}
351
-
352
- for npc in researchers:
353
- print(f" Research chain from {npc.name}...")
354
- chain = generate_research_chain(
355
- request=request,
356
- npc=npc,
357
- depth=depth,
358
- memory=memory,
359
- context=context,
438
+
439
+ response = get_llm_response(
440
+ prompt,
360
441
  model=model,
361
442
  provider=provider,
362
- exploration_factor=exploration_factor,
363
- creativity_factor=creativity_factor
443
+ npc=npc,
444
+ format='json'
364
445
  )
365
- research_chains[npc.name] = chain
366
446
 
367
-
368
- print(f" Extracting facts from {npc.name}'s research...")
369
- facts = extract_facts("\n\n".join(chain), model=model, provider=provider, npc=npc, context=request)
447
+ new_persona = response.get('response')
448
+ if isinstance(new_persona, str):
449
+ new_persona = json.loads(new_persona)
370
450
 
371
-
372
- if max_facts_per_chain is not None and len(facts) > max_facts_per_chain:
373
- facts = facts[:max_facts_per_chain]
374
-
375
- facts_by_researcher[npc.name] = facts
376
- print({"fact_list": facts})
377
-
378
-
379
- print("Identifying thematic groups across all research insights...")
380
- all_facts = []
381
- for researcher_facts in facts_by_researcher.values():
382
- all_facts.extend(researcher_facts)
383
-
384
- groups = identify_groups(all_facts, model=model, provider=provider)
385
-
386
-
387
- if max_thematic_groups is not None and len(groups) > max_thematic_groups:
388
- groups = groups[:max_thematic_groups]
389
-
390
-
391
- fact_groups = {group: [] for group in groups}
392
- for fact in all_facts:
393
- group_assignments = assign_groups_to_fact(fact, groups, model=model, provider=provider)
394
- assigned_groups = group_assignments.get("groups", [])
395
- for group in assigned_groups:
396
- if group in fact_groups:
397
- fact_groups[group].append(fact)
398
-
399
-
400
- print("Evaluating thematic groups for quality and risk...")
401
- group_evaluations = evaluate_thematic_groups(
402
- fact_groups,
403
- request,
404
- model=model,
405
- provider=provider,
406
- max_criticisms=max_criticisms_per_group
407
- )
408
-
409
-
410
- group_summaries = {}
411
- for group_name, facts in fact_groups.items():
412
- if not facts:
413
- continue
414
-
415
- prompt = f"""
416
- Summarize the key insights from this thematic group of research findings on the topic:
417
- "{request}"
451
+ persona_text = f"You are {new_persona.get('name')}, born {birth_year} in {new_persona.get('location')}, came of age under {new_persona.get('leader')}. Your interests were: {', '.join(new_persona.get('interests', []))}. {new_persona.get('worldview')} {new_persona.get('approach')}"
418
452
 
419
- Thematic Group: {group_name}
420
453
 
421
- Findings:
422
- {format_facts_list(facts)}
454
+ persona_data = {
455
+ 'name': new_persona.get('name'),
456
+ 'birth_year': birth_year,
457
+ 'location': new_persona.get('location'),
458
+ 'leader': new_persona.get('leader'),
459
+ 'interests': new_persona.get('interests', []),
460
+ 'worldview': new_persona.get('worldview'),
461
+ 'approach': new_persona.get('approach'),
462
+ 'persona_text': persona_text
463
+ }
423
464
 
424
- Provide a concise, coherent synthesis that captures the core ideas,
425
- emphasizes what's most novel or significant, and suggests potential implications.
426
- Keep your response to 200-300 words.
427
- """
428
465
 
429
- response = get_llm_response(prompt=prompt, model=model, provider=provider)
430
- summary = response.get('response', '')
431
- if isinstance(summary, (list, dict)) or hasattr(summary, '__iter__') and not isinstance(summary, (str, bytes)):
432
- summary = ''.join([str(chunk) for chunk in summary])
466
+ save_persona_to_databases(persona_data)
433
467
 
434
- group_summaries[group_name] = summary
435
-
436
-
437
- print("Generating conceptual combinations to spark novel insights...")
438
- fact_lists = list(facts_by_researcher.values())
439
- combinations = generate_conceptual_combinations(
440
- fact_lists,
441
- sample_size=min(3, len(all_facts)),
442
- num_combinations=max_conceptual_combinations if max_conceptual_combinations is not None else 5
443
- )
444
-
445
-
446
- print("Analyzing conceptual combinations for emergent insights...")
447
- combination_insights = analyze_conceptual_combinations(
448
- combinations,
449
- request,
450
- model=model,
451
- provider=provider
452
- )
453
-
454
-
455
- print("Identifying meta-patterns across research approaches...")
456
- meta_patterns = identify_patterns_across_chains(research_chains, model=model, provider=provider)
457
-
458
-
459
- print("Consolidating research into comprehensive synthesis...")
460
-
461
-
462
- integration_points = []
463
-
464
-
465
- for group, facts in fact_groups.items():
466
- if facts:
467
- integration_points.append(f"From thematic group '{group}':")
468
- for fact in facts[:3]:
469
- integration_points.append(f"- {fact}")
470
-
471
-
472
- for insight in combination_insights[:3]:
473
- integration_points.append(f"Emergent insight: {insight.get('emergent_insight', '')}")
474
-
475
-
476
- integration_points.append(f"Meta-analysis insight: {meta_patterns.get('meta_analysis', '')[:300]}...")
477
-
478
-
479
- integration_prompt = f"""
480
- Consolidate these diverse research findings into a comprehensive, integrative analysis of the topic:
481
- "{request}"
482
-
483
- Key points from the research:
484
- {format_facts_list(integration_points)}
485
-
486
- Your consolidation should:
487
- 1. Provide a coherent synthesis of the diverse perspectives
488
- 2. Identify the most significant findings and patterns
489
- 3. Note any tensions, contradictions, or complementary insights
490
- 4. Suggest an integrated framework for understanding the topic
491
- 5. Briefly outline implications and future directions
492
-
493
- Aim for a comprehensive, balanced, and insightful analysis (300-500 words).
494
- """
495
-
496
- integration_response = get_llm_response(integration_prompt, model=model, provider=provider)
497
- integration = integration_response.get('response', '')
498
- if isinstance(integration, (list, dict)) or hasattr(integration, '__iter__') and not isinstance(integration, (str, bytes)):
499
- integration = ''.join([str(chunk) for chunk in integration])
500
-
501
-
502
- summary_prompt = f"""
503
- Create a concise executive summary (150 words max) of this research on:
504
- "{request}"
505
-
506
- Integration:
507
- {integration}
508
-
509
- Focus on the most significant findings and implications. This should be suitable for someone who only has time to read a brief overview.
510
- """
511
-
512
- summary_response = get_llm_response(summary_prompt, model=model, provider=provider)
513
- ideas_summarized = summary_response.get('response', '')
514
- if isinstance(ideas_summarized, (list, dict)) or hasattr(ideas_summarized, '__iter__') and not isinstance(ideas_summarized, (str, bytes)):
515
- ideas_summarized = ''.join([str(chunk) for chunk in ideas_summarized])
468
+ personas.append({
469
+ "name": new_persona.get('name'),
470
+ "persona": persona_text
471
+ })
516
472
 
517
-
518
- print("Generating simulated experiments...")
519
- research_results = {
520
- "research_request": request,
521
- "research_chains": research_chains,
522
- "fact_groups": fact_groups,
523
- "group_evaluations": group_evaluations,
524
- "group_summaries": group_summaries,
525
- "combination_insights": combination_insights,
526
- "meta_patterns": meta_patterns,
527
- "integration": integration,
528
- "ideas_summarized": ideas_summarized
529
- }
473
+ return personas
530
474
 
531
- experiments = simulate_experiments(
532
- research_results,
533
- request,
475
+
476
+ def create_sub_agent(
477
+ model: str,
478
+ provider: str,
479
+ hypothesis: str,
480
+ name: str,
481
+ persona: str
482
+ ) -> NPC:
483
+
484
+ def wander_wrapper(problem_description: str) -> str:
485
+ return get_creative_ideas_for_stuck_agent(
486
+ problem_description,
487
+ agent,
488
+ model,
489
+ provider
490
+ )
491
+
492
+
493
+
494
+
495
+
496
+
497
+
498
+ tools = [
499
+ create_file,
500
+ append_to_file,
501
+ replace_in_file,
502
+ read_file,
503
+ list_files,
504
+ execute_shell_command,
505
+ search_papers,
506
+ wander_wrapper,
507
+ search_web
508
+ ]
509
+
510
+ agent = NPC(
511
+ name=name,
534
512
  model=model,
535
513
  provider=provider,
536
- max_experiments=max_experiments
514
+ primary_directive=persona,
515
+ tools=tools
537
516
  )
538
517
 
539
-
540
- pdf_path = None
541
- if generate_pdf:
542
- pdf_path = generate_pdf_report(request, model, provider, research_results, experiments)
543
-
544
-
545
- research_results["experiments"] = experiments
546
- research_results["pdf_path"] = pdf_path
547
-
548
- return research_results
518
+ return agent
549
519
 
550
- def evaluate_thematic_groups(fact_groups: Dict[str, List[str]], request: str, model: str = None, provider: str = None, max_criticisms: int = None) -> Dict[str, Dict[str, int]]:
551
- """
552
- Evaluate each thematic group for quality, potential risks, and biases.
553
-
554
- Args:
555
- fact_groups: Dictionary mapping group names to lists of facts
556
- request: The original research question
557
- model: LLM model to use
558
- provider: LLM provider to use
559
- max_criticisms: Maximum number of criticisms to generate per group
560
-
561
- Returns:
562
- Dictionary mapping group names to evaluation metrics
563
- """
564
- evaluations = {}
565
-
566
- for group_name, facts in fact_groups.items():
567
- facts_text = format_facts_list(facts)
568
-
569
- prompt = f"""
570
- Evaluate this thematic group of research insights on the topic:
571
- "{request}"
572
-
573
- Thematic Group: {group_name}
574
-
575
- Insights:
576
- {facts_text}
577
-
578
- Evaluate this group of insights on a scale of 1-10 (where 10 is highest) for:
579
- 1. Novelty: How original and non-obvious are these insights?
580
- 2. Depth: How deeply do they explore the underlying concepts?
581
- 3. Practicality: How useful are these insights for further research or application?
582
- 4. Evidence: How well-supported do these claims appear to be?
583
- 5. Risk: What is the chance that these insights lead to problematic directions or dead ends?
584
-
585
- Then identify potential weaknesses, biases, or limitations in these insights.
586
- {f"Provide exactly {max_criticisms} criticisms." if max_criticisms is not None else ""}
587
-
588
- Format your response as:
589
- Novelty: [score]
590
- Depth: [score]
591
- Practicality: [score]
592
- Evidence: [score]
593
- Risk: [score]
594
-
595
- Criticisms:
596
- 1. [First criticism]
597
- 2. [Second criticism]
598
- ...
599
- """
520
+
521
+
522
+ def sub_agent_trace(hypothesis: str,
523
+ persona: Dict[str, str],
524
+ user_query: str,
525
+ model: str,
526
+ provider: str,
527
+ max_steps: int = 50) -> SubAgentTrace:
528
+ agent_name = persona.get("name")
529
+ agent_persona = persona.get("persona")
530
+ agent = create_sub_agent(model, provider, hypothesis, agent_name, agent_persona)
531
+
532
+ trace = SubAgentTrace(hypothesis=hypothesis, agent_name=agent_name, agent_persona=agent_persona)
533
+ summarized_history = []
534
+ file_provenance = {}
535
+ created_files = set()
536
+ summary = {}
537
+
538
+ major_step = 0
539
+
540
+ while major_step < max_steps:
541
+ fs_before = get_filesystem_state()
542
+
543
+ provenance_summary = []
544
+ for filename, prov in file_provenance.items():
545
+ history = "; ".join([f"Step {step}: {action} ({checksum}) - {changes}" for step, action, checksum, changes in prov.step_history])
546
+ provenance_summary.append(f"{filename}: {history}")
547
+
548
+ history_str = "\n".join(summarized_history)
549
+ next_step_text = f"This is the next step suggested by your advisor. : BEGIN NEXT_STEP: {summary.get('next_step')} END NEXT STEP" if summary else ""
550
+
551
+ initial_prompt = f"""
552
+ Test the following hypothesis: '{hypothesis}' as related to the user query: '{user_query}'.
553
+ Only focus on your specific hypothesis, other agents are being tasked with other aspects of the problem.
554
+
555
+ Use bash commands to carry out research through the execute_shell_command.
556
+ Adjust files with `replace_in_file` and use `read_file` and `list_files` to verify file states and file creation.
557
+ Create files with create_file()
558
+
559
+ Test with execute_shell_command when needed
560
+ Get unstuck with wander_wrapper
561
+
562
+ When you have a definitive result, say RESEARCH_COMPLETE.
563
+
564
+ FILE PROVENANCE HISTORY:
565
+ {chr(10).join(provenance_summary)}
566
+
567
+ CURRENT FILES: {list(fs_before.keys())}
568
+
569
+ COMPLETE ACTION HISTORY:
570
+ BEGIN HISTORY
571
+ `
572
+ {history_str}
573
+ `
574
+ END HISTORy
575
+
576
+ What specific action will you take next to test your hypothesis?
577
+ AVAILABLE TOOLS: create_file, append_to_file, replace_in_file, read_file, list_files, execute_shell_command, wander_wrapper, search_web .
578
+
579
+ Do not repeat actions. Do not constantly think unless you need to brainstorm or wander. Use `execute_shell_command` for anything complicated beyond a simple file read, replace, create.
580
+ Use `search_web` with provider of {os.environ.get('NPCSH_SEARCH_PROVIDER') } to look up items if you are struggling to understand why errors are happening with code execution.
581
+ Do not waste time re-verifying the same package versins or libraries when you can explicitly look up usage patterns that are up to date. Do not assume that your generated code will be correct the first time or up to date
582
+ amd if you are finding irreconcilable errors that you cannot seem to figure out locally then you need to search. For example, if you assume a python package you installed like `sqlite-vector' is importable like
583
+ "from sqlite.vector" and keep running into import or module errors, it it probably because you need to look up the correct way to access the library. It may have been that you would need to import "sqlite_vector" or "sql_vector".
584
+ There is no way to know this information a priori and instead of wasting time verifying pip installations, its better to look for actual usage patterns, either by inspecting the source code of the pip package itself or simply by
585
+ searching the web.
586
+
587
+ This should guide your next steps:
588
+
589
+ `{next_step_text} `
590
+
591
+ Your goal is to research. To set up experiments, create figures that can be included in a latex document report, and produce data outputs as well in csvs for verification and reusability and reproducibility.
592
+
593
+
594
+ Do not use seaborn. On matplotlib plots, do not use grids or titles.
595
+ """
600
596
 
601
- response = get_llm_response(prompt=prompt, model=model, provider=provider)
602
- eval_text = response.get('response', '')
603
- if isinstance(eval_text, (list, dict)) or hasattr(eval_text, '__iter__') and not isinstance(eval_text, (str, bytes)):
604
- eval_text = ''.join([str(chunk) for chunk in eval_text])
597
+ print(f"\n{'='*80}")
598
+ print(f"AUTONOMOUS LOOP {major_step + 1} FOR {agent_name}")
599
+ print(f"{'='*80}")
600
+ print(f"HYPOTHESIS: {hypothesis}")
601
+ print(f"FILES BEFORE: {list(fs_before.keys())}")
605
602
 
606
-
607
- scores = {}
608
- criticisms = []
609
- in_criticisms = False
603
+ messages = []
604
+ all_thoughts = []
605
+ all_actions = []
606
+ all_outcomes = []
610
607
 
611
- for line in eval_text.split('\n'):
612
- line = line.strip()
613
- if not line:
614
- continue
608
+ for micro_step in range(5):
609
+ print(f"\n--- Micro-step {micro_step + 1}/4 ---")
615
610
 
616
- if line.lower() == "criticisms:":
617
- in_criticisms = True
611
+ if micro_step == 0:
612
+ current_prompt = initial_prompt
613
+ print("SENDING INITIAL RESEARCH PROMPT")
614
+ else:
615
+ current_prompt = "Continue your work. What's your next action?"
616
+ print(f"SENDING CONTINUATION PROMPT: '{current_prompt}'")
617
+ try:
618
+ response = agent.get_llm_response(current_prompt,
619
+ messages=messages,
620
+ auto_process_tool_calls=True)
621
+ except Timeout:
618
622
  continue
623
+ except ContextWindowExceededError:
624
+ break
625
+ messages = response.get('messages', [])
619
626
 
620
- if in_criticisms:
621
-
622
- if line[0].isdigit() and line[1:].startswith('. '):
623
- criticism = line[line.find(' ')+1:].strip()
624
- criticisms.append(criticism)
627
+ thought = response.get('response')
628
+ if thought is None:
629
+ thought = ''
630
+ print("WARNING: No thought received from agent")
625
631
  else:
626
-
627
- if ':' in line:
628
- metric, score_str = line.split(':', 1)
629
- metric = metric.strip()
630
- try:
631
- score = int(score_str.strip())
632
- scores[metric] = score
633
- except ValueError:
634
- pass
635
-
636
-
637
- if max_criticisms is not None and len(criticisms) > max_criticisms:
638
- criticisms = criticisms[:max_criticisms]
639
-
640
- evaluations[group_name] = {
641
- **scores,
642
- "criticisms": criticisms
643
- }
644
-
645
- return evaluations
646
-
647
- def generate_conceptual_combinations(fact_lists: List[List[str]], sample_size: int = 3, num_combinations: int = 5) -> List[Dict]:
648
- """
649
- Generate interesting combinations of facts from different researchers to spark novel ideas.
632
+ print(f"AGENT THOUGHT: {thought[:200]}{'...' if len(thought) > 200 else ''}")
633
+ all_thoughts.append(thought)
634
+
635
+ if thought and "RESEARCH_COMPLETE" in thought.upper():
636
+ print(f"✓ RESEARCH COMPLETED at micro-step {micro_step + 1}")
637
+ break
638
+
639
+ if response.get('tool_results'):
640
+ tool_results = response['tool_results']
641
+ print(f"TOOLS USED: {len(tool_results)} tool(s)")
642
+
643
+ for i, res in enumerate(tool_results):
644
+ tool_name = res.get('tool_name')
645
+ args = res.get('arguments', {})
646
+ result = res.get('result')
647
+
648
+ print(f" Tool {i+1}: {tool_name}({args})")
649
+ for arg, item in args.items():
650
+ print(f" {arg}: {item}")
651
+ if isinstance(result, str) and len(result) > 150:
652
+ print(f" Result: {result[:150]}...")
653
+ else:
654
+ print(f" Result: {result}")
655
+
656
+ action_str = ", ".join([f"{res['tool_name']}({res.get('arguments', {})})" for res in tool_results])
657
+ outcomes = []
658
+
659
+ for res in tool_results:
660
+ if res['tool_name'] in ['create_file', 'append_to_file', 'replace_in_file']:
661
+ filename = res.get('arguments', {}).get('filename')
662
+ if filename:
663
+ created_files.add(filename)
664
+ if os.path.exists(filename):
665
+ trace.was_successful = True
666
+ print(f" ✓ File created: {filename}")
667
+
668
+ result_data = res.get('result')
669
+ outcomes.append(str(result_data))
670
+
671
+ outcome_str = " | ".join(outcomes)
672
+ all_actions.append(action_str)
673
+ all_outcomes.append(outcome_str)
674
+ else:
675
+ print("NO TOOLS USED - Agent only provided reasoning")
676
+
677
+ fs_after = get_filesystem_state()
678
+ print(f"\nFILES AFTER: {list(fs_after.keys())}")
679
+
680
+ new_files = set(fs_after.keys()) - set(fs_before.keys())
681
+ if new_files:
682
+ print(f"NEW FILES CREATED: {list(new_files)}")
683
+
684
+ combined_thought = " ".join(all_thoughts)
685
+ combined_action = " | ".join(filter(None, all_actions))
686
+ combined_outcome = " | ".join(filter(None, all_outcomes))
687
+
688
+ print(f"\nCOMPRESSING AUTONOMOUS SESSION...")
689
+ print(f"THOUGHTS: {len(all_thoughts)} messages")
690
+ print(f"ACTIONS: {len(all_actions)} tool uses")
691
+
692
+ summary = summarize_step(combined_thought,
693
+ combined_action,
694
+ combined_outcome,
695
+ fs_before,
696
+ fs_after,
697
+ file_provenance,
698
+ major_step + 1,
699
+ model,
700
+ provider,
701
+ agent)
702
+
703
+ print(f"SUMMARY: {summary.get('summary', 'No summary')}")
704
+ print(f"NEXT STEP: {summary.get('next_step', 'No next step')}")
705
+
706
+ summarized_history.append(f"Step {major_step + 1}: {summary.get('summary')} ")
707
+
708
+ trace.steps.append(ResearchStep(
709
+ step=major_step + 1,
710
+ thought=combined_thought,
711
+ action=combined_action,
712
+ outcome=combined_outcome
713
+ ))
714
+
715
+ if combined_thought and "RESEARCH_COMPLETE" in combined_thought.upper():
716
+ print(f"✓ RESEARCH COMPLETED FOR {agent_name}")
717
+ break
718
+
719
+ major_step += 1
650
720
 
651
- Args:
652
- fact_lists: List of fact lists from different NPCs
653
- sample_size: Number of facts to include in each combination
654
- num_combinations: Number of combinations to generate
655
-
656
- Returns:
657
- List of dictionaries containing the combinations and generated insights
658
- """
659
-
660
- all_facts_with_source = []
661
- for i, facts in enumerate(fact_lists):
662
- for fact in facts:
663
- all_facts_with_source.append((i, fact))
721
+ for filename in created_files:
722
+ if os.path.exists(filename):
723
+ trace.final_files[filename] = read_file(filename)
664
724
 
665
-
666
- combinations = []
667
- for _ in range(num_combinations):
668
- if len(all_facts_with_source) <= sample_size:
669
- sample = all_facts_with_source
670
- else:
671
- sample = random.sample(all_facts_with_source, sample_size)
672
-
673
- combinations.append({
674
- "facts": [fact for _, fact in sample],
675
- "sources": [source for source, _ in sample]
676
- })
725
+ print(f"\nFINAL RESULTS FOR {agent_name}:")
726
+ print(f"SUCCESS: {trace.was_successful}")
727
+ print(f"FILES CREATED: {list(trace.final_files.keys())}")
677
728
 
678
- return combinations
729
+ return trace
679
730
 
680
- def analyze_conceptual_combinations(combinations: List[Dict], request: str, model: str = None, provider: str = None) -> List[Dict]:
681
- """
682
- Analyze combinations of facts to identify emergent patterns and generate novel hypotheses.
683
-
684
- Args:
685
- combinations: List of fact combinations
686
- request: The original research question
687
- model: LLM model to use
688
- provider: LLM provider to use
689
-
690
- Returns:
691
- List of dictionaries with analysis results
692
- """
693
- results = []
694
-
695
- for i, combo in enumerate(combinations):
696
- facts_formatted = format_facts_list(combo["facts"])
697
-
698
- prompt = f"""
699
- Consider these seemingly unrelated insights from different researchers exploring the topic:
700
- "{request}"
701
-
702
- {facts_formatted}
703
-
704
- Your task is to identify a non-obvious connection, pattern, or insight that emerges when these ideas are juxtaposed.
705
- Focus on discovering something truly novel that none of the individual researchers may have recognized.
706
-
707
- 1. Identify a surprising emergent pattern or connection
708
- 2. Develop a novel hypothesis or research question based on this pattern
709
- 3. Explain how this insight challenges or extends conventional thinking on the topic
710
- 4. Suggest an unconventional methodology or approach to explore this new direction
711
-
712
- Be bold, imaginative, and interdisciplinary in your thinking.
713
- """
714
-
715
- response = get_llm_response(prompt=prompt, model=model, provider=provider, temperature=0.9)
716
- insight = response.get('response', '')
717
- if isinstance(insight, (list, dict)) or hasattr(insight, '__iter__') and not isinstance(insight, (str, bytes)):
718
- insight = ''.join([str(chunk) for chunk in insight])
731
+
732
+
733
+
734
+ def save_trace_for_training(
719
735
 
720
- results.append({
721
- "combination_id": i+1,
722
- "facts": combo["facts"],
723
- "sources": combo["sources"],
724
- "emergent_insight": insight
736
+
737
+ traces: List[SubAgentTrace],
738
+ output_dir: str = "./alicanto_traces"
739
+ ):
740
+ os.makedirs(output_dir, exist_ok=True)
741
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
742
+ filename = f"trace_{timestamp}.csv"
743
+ filepath = os.path.join(output_dir, filename)
744
+
745
+ flattened_data = []
746
+ for trace in traces:
747
+ for step in trace.steps:
748
+ flattened_data.append({
749
+ "hypothesis": trace.hypothesis,
750
+ "agent_name": trace.agent_name,
751
+ "agent_persona": trace.agent_persona,
752
+ "was_successful": trace.was_successful,
753
+ "step": step.step,
754
+ "thought": step.thought,
755
+ "action": step.action,
756
+ "outcome": step.outcome,
757
+ "final_files": json.dumps(trace.final_files)
758
+ })
759
+
760
+ if not flattened_data:
761
+ return
762
+
763
+ df = pd.DataFrame(flattened_data)
764
+ df.to_csv(filepath, index=False)
765
+
766
+ print(f"Full research trace saved to {filepath}")
767
+ return filepath
768
+ def compress_traces_for_synthesis(traces: List[SubAgentTrace], model: str, provider: str, npc: NPC) -> str:
769
+ compressed_summaries = []
770
+
771
+ for trace in traces:
772
+ steps_summary = []
773
+ for step in trace.steps[-3:]: # Only last 3 steps
774
+ if step.thought:
775
+ thought_short = step.thought[:100] + "..." if len(step.thought) > 100 else step.thought
776
+ else:
777
+ thought_short = "No thought recorded"
778
+
779
+ if step.action:
780
+ action_short = step.action[:100] + "..." if len(step.action) > 100 else step.action
781
+ else:
782
+ action_short = "No action taken"
783
+
784
+ steps_summary.append(f"Step {step.step}: {thought_short} | {action_short}")
785
+
786
+ files_created = list(trace.final_files.keys()) if trace.final_files else []
787
+
788
+ compressed_summaries.append({
789
+ "agent": trace.agent_name,
790
+ "hypothesis": trace.hypothesis,
791
+ "success": trace.was_successful,
792
+ "key_steps": steps_summary,
793
+ "files_created": files_created,
794
+ "final_file_count": len(files_created)
725
795
  })
726
796
 
727
- return results
797
+ return json.dumps(compressed_summaries, indent=2)
798
+ def format_paper_as_latex(paper: Paper, authors: List[str]) -> str:
799
+ author_string = ", ".join(authors)
800
+ introduction_content = "\n\n".join(paper.introduction)
801
+ methods_content = "\n\n".join(paper.methods)
802
+ results_content = "\n\n".join(paper.results)
803
+ discussion_content = "\n\n".join(paper.discussion)
728
804
 
729
- def identify_patterns_across_chains(chains: Dict[str, List[str]], model: str = None, provider: str = None) -> Dict:
730
- """
731
- Identify meta-patterns across research chains, searching for higher-order insights.
732
-
733
- Args:
734
- chains: Dictionary mapping NPC names to their research chains
735
- model: LLM model to use
736
- provider: LLM provider to use
737
-
738
- Returns:
739
- Dictionary with meta-analysis results
740
- """
741
-
742
- chain_summaries = {}
743
- for name, chain in chains.items():
744
- full_text = "\n\n".join(chain)
745
-
746
- summary_prompt = f"""
747
- Summarize the key themes, methodologies, and unusual perspectives in this research chain:
748
-
749
- {full_text[:2000]}...
750
-
751
- Focus on what makes this researcher's approach unique or valuable. Identify their core assumptions,
752
- methodological innovations, and blindspots (150-200 words).
753
- """
754
-
755
- response = get_llm_response(prompt=summary_prompt, model=model, provider=provider)
756
- summary = response.get('response', '')
757
- if isinstance(summary, (list, dict)) or hasattr(summary, '__iter__') and not isinstance(summary, (str, bytes)):
758
- summary = ''.join([str(chunk) for chunk in summary])
759
-
760
- chain_summaries[name] = summary
761
-
762
-
763
- all_summaries = "\n\n".join([f"[{name}]\n{summary}" for name, summary in chain_summaries.items()])
764
-
765
- meta_analysis_prompt = f"""
766
- Analyze these research approaches on the topic:
767
-
768
- {all_summaries}
769
-
770
- Identify:
771
- 1. Surprising methodological patterns - how are researchers approaching this problem in innovative ways?
772
- 2. Conceptual blindspots - what aspects seem to be collectively overlooked?
773
- 3. Emerging paradigms - are there new frameworks or models taking shape across multiple perspectives?
774
- 4. Productive tensions - where do disagreements or contradictions suggest valuable new research directions?
775
- 5. The topology of the problem space - how might we map the conceptual territory in a novel way?
776
-
777
- Focus on identifying higher-order insights that emerge from comparing these different approaches.
778
- Your analysis should challenge conventions and suggest new ways of framing the entire research domain.
779
- """
805
+ return f"""
806
+ \\documentclass{{article}}
807
+ \\title{{{paper.title}}}
808
+ \\author{{{author_string}}}
809
+ \\date{{\\today}}
810
+ \\begin{{document}}
811
+ \\maketitle
812
+ \\begin{{abstract}}
813
+ {paper.abstract}
814
+ \\end{{abstract}}
815
+ \\section*{{Introduction}}
816
+ {introduction_content}
817
+ \\section*{{Methods}}
818
+ {methods_content}
819
+ \\section*{{Results}}
820
+ {results_content}
821
+ \\section*{{Discussion}}
822
+ {discussion_content}
823
+ \\end{{document}}
824
+ """
825
+
826
+
827
+
828
+ def alicanto(
829
+ query: str,
830
+ num_agents: int = 3,
831
+ max_steps: int = 10,
832
+ model: str = NPCSH_CHAT_MODEL,
833
+ provider: str = NPCSH_CHAT_PROVIDER,
834
+ **kwargs
835
+ ) -> None:
836
+
837
+ print("=== ALICANTO RESEARCH SYSTEM STARTING ===")
838
+ print(f"Query: {query}")
839
+ print(f"Agents: {num_agents}, Max steps per agent: {max_steps}")
840
+ print(f"Model: {model}, Provider: {provider}")
841
+
842
+ def wander_wrapper_coordinator(problem_description: str) -> str:
843
+ return get_creative_ideas_for_stuck_agent(
844
+ problem_description,
845
+ alicanto_coordinator,
846
+ model,
847
+ provider
848
+ )
780
849
 
781
- response = get_llm_response(prompt=meta_analysis_prompt, model=model, provider=provider, temperature=0.8)
782
- meta_analysis = response.get('response', '')
783
- if isinstance(meta_analysis, (list, dict)) or hasattr(meta_analysis, '__iter__') and not isinstance(meta_analysis, (str, bytes)):
784
- meta_analysis = ''.join([str(chunk) for chunk in meta_analysis])
850
+ alicanto_coordinator = NPC(
851
+ name="Alicanto",
852
+ model=model,
853
+ provider=provider,
854
+ primary_directive="You are Alicanto the mythical bird. You research topics iteratively by writing to LaTeX files and searching for more information.",
855
+ tools=[
856
+ create_file,
857
+ append_to_file,
858
+ replace_in_file,
859
+ read_file,
860
+ list_files,
861
+ execute_shell_command,
862
+ search_papers,
863
+ search_web,
864
+ wander_wrapper_coordinator
865
+ ]
866
+ )
867
+
868
+ print("\n--- Step 1: Generating hypotheses and personas ---")
869
+
870
+ one_shot_example_hypotheses = """
871
+ "example_input": "Investigate the impact of quantum annealing on protein folding.",
872
+ "example_output": {
873
+ "hypotheses": [
874
+ "Implementing a quantum annealer simulation for a small peptide chain will identify lower energy states faster than a classical simulated annealing approach.",
875
+ "The choice of qubit connectivity in the quantum annealer's topology significantly impacts the final folded state's accuracy for proteins with long-range interactions.",
876
+ "Encoding the protein's residue interactions as a QUBO problem is feasible for structures up to 50 amino acids before qubit requirements become prohibitive."
877
+ ]
878
+ }
879
+ """
880
+ hypotheses_prompt = f"""Based on the following research topic, generate a list of {num_agents} distinct, specific, and empirically testable hypotheses.
881
+
882
+ TOPIC: "{query}"
883
+
884
+ Return a JSON object with a single key "hypotheses" which is a list of strings.
885
+
886
+ Here is an example of the expected input and output format:
887
+ {one_shot_example_hypotheses}
888
+
889
+ Return ONLY the JSON object.
890
+ """
785
891
 
786
-
787
- directions_prompt = f"""
788
- Based on this meta-analysis of research approaches to the topic:
892
+ print("Generating hypotheses...")
893
+ response = get_llm_response(
894
+ hypotheses_prompt,
895
+ model=model,
896
+ provider=provider,
897
+ npc=alicanto_coordinator,
898
+ format='json'
899
+ )
789
900
 
790
- {meta_analysis}
901
+ if not response or not response.get('response'):
902
+ print("ERROR: Failed to get hypotheses response")
903
+ return
904
+
905
+ hypotheses = response.get('response').get('hypotheses')
906
+ if not hypotheses:
907
+ print("ERROR: No hypotheses generated")
908
+ return
909
+
910
+ print(f"Generated {len(hypotheses)} hypotheses:")
911
+ for i, h in enumerate(hypotheses):
912
+ print(f" {i+1}. {h}")
913
+
914
+ print("\nGenerating agent personas...")
915
+ personas = generate_sub_agent_personas(
916
+ query,
917
+ num_agents,
918
+ model,
919
+ provider,
920
+ alicanto_coordinator
921
+ )
791
922
 
792
- Propose 5 highly innovative research directions that could transform this field.
793
- For each direction:
794
- 1. Frame a provocative research question
795
- 2. Explain why it's both important and neglected
796
- 3. Suggest an unconventional methodology to explore it
797
- 4. Describe what a breakthrough in this direction might look like
923
+ if not personas:
924
+ print("ERROR: No personas generated")
925
+ return
798
926
 
799
- Your suggestions should be bold, interdisciplinary, and challenge fundamental assumptions.
800
- Aim for directions that most researchers haven't considered but that could lead to significant advances.
801
- """
927
+ print(f"Generated {len(personas)} personas:")
928
+ for i, p in enumerate(personas):
929
+ print(f" {i+1}. {p.get('name')}: {p.get('persona')}")
930
+
931
+ print("\n--- Step 2: Delegating hypotheses to Sub-Agents for serial execution ---")
932
+
933
+ all_traces = []
934
+ for i, hypo in enumerate(hypotheses):
935
+ persona = personas[i % len(personas)]
936
+ print(f"\nStarting sub-agent {i+1}/{len(hypotheses)}")
937
+ trace = sub_agent_trace(
938
+ hypo,
939
+ persona,
940
+ query,
941
+ model,
942
+ provider,
943
+ max_steps
944
+ )
945
+ all_traces.append(trace)
946
+ print(f"Sub-agent {i+1} completed. Success: {trace.was_successful}")
947
+
948
+ print(f"\nAll sub-agents completed. Saving traces...")
949
+ save_trace_for_training(all_traces)
950
+ compressed_research = compress_traces_for_synthesis(all_traces, model, provider, alicanto_coordinator)
951
+
952
+ print("\n--- Step 3: Creating initial paper structure ---")
802
953
 
803
- response = get_llm_response(prompt=directions_prompt, model=model, provider=provider, temperature=0.9)
804
- new_directions = response.get('response', '')
805
- if isinstance(new_directions, (list, dict)) or hasattr(new_directions, '__iter__') and not isinstance(new_directions, (str, bytes)):
806
- new_directions = ''.join([str(chunk) for chunk in new_directions])
954
+ author_list = [trace.agent_name for trace in all_traces]
955
+ author_string = ", ".join(author_list)
807
956
 
808
- return {
809
- "chain_summaries": chain_summaries,
810
- "meta_analysis": meta_analysis,
811
- "innovative_directions": new_directions
812
- }
957
+ initial_latex = f"""\\documentclass{{article}}
958
+ \\title{{% TODO: TITLE}}
959
+ \\author{{{author_string}}}
960
+ \\date{{\\today}}
961
+ \\begin{{document}}
962
+ \\maketitle
813
963
 
814
- def preprocess_content_for_pdf(content: str, model: str = None, provider: str = None, max_words: int = 2000, concise_mode: bool = False) -> str:
815
- """
816
- Quick and lightweight preprocessing for PDF generation.
964
+ \\begin{{abstract}}
965
+ % TODO: ABSTRACT
966
+ \\end{{abstract}}
967
+
968
+ \\section{{Introduction}}
969
+ % TODO: INTRODUCTION
970
+
971
+ \\section{{Methods}}
972
+ % TODO: METHODS
973
+
974
+ \\section{{Results}}
975
+ % TODO: RESULTS
976
+
977
+ \\section{{Discussion}}
978
+ % TODO: DISCUSSION
979
+
980
+ \\end{{document}}"""
981
+
982
+ create_file("paper.tex", initial_latex)
983
+
984
+ print("\n--- Step 4: Iterative paper writing ---")
817
985
 
818
- Args:
819
- content: Raw content to preprocess
820
- model: LLM model to use (optional)
821
- provider: LLM provider to use (optional)
822
- max_words: Maximum word count (default 2000)
823
- concise_mode: If True, creates a very short summary instead of full formatting
824
-
825
- Returns:
826
- Formatted content ready for PDF generation
827
- """
828
-
829
- if not isinstance(content, str):
830
- content = str(content)
986
+ todo_sections = ["TITLE", "ABSTRACT", "INTRODUCTION", "METHODS", "RESULTS", "DISCUSSION"]
831
987
 
832
-
833
- if concise_mode:
988
+ for section_round in range(len(todo_sections)):
989
+ print(f"\n--- Section Round {section_round + 1} ---")
834
990
 
835
- if model is None:
836
- model = NPCSH_CHAT_MODEL
837
- if provider is None:
838
- provider = NPCSH_CHAT_PROVIDER
839
-
840
- concise_prompt = f"""
841
- Summarize the following content into an extremely concise, no-bullshit format with maximum 500 words:
842
- {content}
991
+ current_paper = read_file("paper.tex")
992
+ sections_status = {section: "EMPTY" if f"% TODO: {section}" in current_paper else "COMPLETE"
993
+ for section in todo_sections}
843
994
 
844
- - Use clear section headings
845
- - Use bullet points for key ideas
846
- - Focus only on essential insights
847
- - No verbose academic language
848
- - No padding or fillers
849
- - Just the core ideas in simple language
850
- """
995
+ print(f"Section status: {sections_status}")
851
996
 
852
- response = get_llm_response(prompt=concise_prompt, model=model, provider=provider)
853
- content = response.get('response', '')
854
-
855
-
856
- for char, replacement in {
857
- '%': '',
858
- '#': '-',
859
- '_': '-',
860
- '~': '-',
861
- '^': '',
862
- '\\': '/',
863
- '{': '(',
864
- '}': ')'
865
- }.items():
866
- content = content.replace(char, replacement)
867
-
868
-
869
- words = content.split()
870
- if len(words) > max_words:
871
- content = ' '.join(words[:max_words]) + '... [truncated]'
872
-
873
- return content.strip()
874
-
875
- def generate_pdf_report(request: str,
876
- model,
877
- provider,
878
- research: Dict[str, Any],
879
- experiments: Dict[str, Dict[str, Any]],
880
- output_path: str = None,
881
- max_pages: int = 5) -> str:
882
- """
883
- Generate a professional PDF report using LaTeX for superior formatting, typesetting, and layout.
884
-
885
- Args:
886
- request: The original research question
887
- research: The consolidated research results
888
- experiments: The simulated experiments and their results
889
- output_path: Path to save the PDF report (default: current directory)
890
- fast_mode: If True, uses simpler formatting
891
- concise_mode: If True, drastically reduces content length
892
- max_pages: Maximum number of pages to generate (approximate)
997
+ # Find next section to work on
998
+ next_section = None
999
+ for section in todo_sections:
1000
+ if sections_status[section] == "EMPTY":
1001
+ next_section = section
1002
+ break
893
1003
 
894
- Returns:
895
- Path to the generated PDF file
896
- """
897
- if output_path is None:
898
- output_path = os.getcwd()
899
-
900
-
901
- sanitized_request = "".join(c for c in request if c.isalnum() or c.isspace()).strip()
902
- sanitized_request = sanitized_request.replace(" ", "_")[:50]
903
- timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
904
- filename = f"{sanitized_request}_{timestamp}"
905
-
906
-
907
- try:
908
- subprocess.run(["which", "pdflatex"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
909
- except subprocess.CalledProcessError:
910
- print("LaTeX not installed. Attempting to install...")
911
- try:
912
- subprocess.run(["apt-get", "update"], check=True)
913
- subprocess.run(["apt-get", "install", "-y", "texlive-latex-base", "texlive-fonts-recommended",
914
- "texlive-fonts-extra", "texlive-latex-extra"], check=True)
915
- except subprocess.CalledProcessError as e:
916
- print(f"Error installing LaTeX: {str(e)}")
917
- return None
918
-
919
- chart_path = None
920
- try:
921
- if "group_evaluations" in research and research["group_evaluations"]:
922
-
923
- figures_dir = os.path.join(output_path, "figures")
924
- os.makedirs(figures_dir, exist_ok=True)
925
-
926
- fig, ax = plt.subplots(figsize=(7.5, 4))
927
- plt.style.use('ggplot')
928
-
929
- groups = []
930
- scores = []
1004
+ if not next_section:
1005
+ print("All sections complete")
1006
+ break
1007
+
1008
+ print(f"Working on section: {next_section}")
1009
+
1010
+ # Autonomous loop for this section (like sub-agents)
1011
+ messages = []
1012
+
1013
+ initial_prompt = f"""You are writing a research paper about: "{query}"
1014
+
1015
+ Research data from sub-agents: {compressed_research}
1016
+
1017
+ Current paper content:
1018
+ {current_paper}
1019
+
1020
+ Your task: Complete the {next_section} section by replacing "% TODO: {next_section}" with actual content.
1021
+
1022
+ Use replace_in_file to update the paper. Use search_papers or search_web if you need more information.
1023
+
1024
+ Focus ONLY on the {next_section} section. Write 2-4 paragraphs of substantial academic content.
1025
+
1026
+ Available tools: replace_in_file, read_file, search_papers, search_web"""
1027
+
1028
+ for micro_step in range(5): # 5 turns per section like sub-agents
1029
+ print(f"\n--- Micro-step {micro_step + 1}/5 for {next_section} ---")
931
1030
 
932
- for group_name, eval_data in research["group_evaluations"].items():
933
- groups.append(group_name[:30])
934
- quality_score = (eval_data.get("Novelty", 5) + eval_data.get("Depth", 5) +
935
- eval_data.get("Practicality", 5) + eval_data.get("Evidence", 5)) / 4
936
- scores.append(quality_score)
1031
+ if micro_step == 0:
1032
+ current_prompt = initial_prompt
1033
+ else:
1034
+ current_prompt = f"Continue working on the {next_section} section. What's your next action?"
937
1035
 
938
-
939
- sorted_data = sorted(zip(groups, scores), key=lambda x: x[1], reverse=True)
940
- groups = [x[0] for x in sorted_data]
941
- scores = [x[1] for x in sorted_data]
1036
+ try:
1037
+ response = alicanto_coordinator.get_llm_response(
1038
+ current_prompt,
1039
+ messages=messages,
1040
+ auto_process_tool_calls=True
1041
+ )
1042
+ except (Timeout, ContextWindowExceededError):
1043
+ break
1044
+
1045
+ messages = response.get('messages', [])
942
1046
 
943
-
944
- y_pos = range(len(groups))
945
- ax.barh(y_pos, scores, color='steelblue')
946
- ax.set_yticks(y_pos)
947
- ax.set_yticklabels(groups)
948
- ax.set_xlabel('Quality Score (1-10)')
949
- ax.set_title('Thematic Groups by Quality Score')
950
- plt.tight_layout()
951
1047
 
952
-
953
- chart_path = os.path.join(figures_dir, f"thematic_groups.pdf")
954
- plt.savefig(chart_path, dpi=300, bbox_inches='tight', format='pdf')
955
- plt.close()
956
- except Exception as e:
957
- print(f"Warning: Could not generate chart: {str(e)}")
958
-
959
-
960
- latex_content = generate_latex_document(request, model, provider, research, experiments, chart_path, max_pages)
961
-
962
-
963
- tex_path = os.path.join(output_path, f"{filename}.tex")
964
- with open(tex_path, "w") as f:
965
- f.write(latex_content)
966
-
967
-
968
- try:
969
-
970
- result = subprocess.run(
971
- ["pdflatex", "-interaction=nonstopmode", "-output-directory", output_path, tex_path],
972
- stdout=subprocess.PIPE,
973
- stderr=subprocess.PIPE
974
- )
975
-
976
- if result.returncode != 0:
977
- print(f"Warning: First LaTeX run had issues (exit code {result.returncode})")
978
-
979
-
980
-
981
- result = subprocess.run(
982
- ["pdflatex", "-interaction=nonstopmode", "-output-directory", output_path, tex_path],
983
- stdout=subprocess.PIPE,
984
- stderr=subprocess.PIPE
985
- )
986
-
987
- if result.returncode != 0:
988
- print(f"Warning: Second LaTeX run had issues (exit code {result.returncode})")
989
-
990
- log_path = os.path.join(output_path, f"{filename}.log")
991
- if os.path.exists(log_path):
992
- print(f"Check LaTeX log for details: {log_path}")
993
- except Exception as e:
994
- print(f"Error during LaTeX compilation: {str(e)}")
995
- return None
996
-
997
-
998
- for ext in [".aux", ".out", ".toc"]:
999
- try:
1000
- os.remove(os.path.join(output_path, f"{filename}{ext}"))
1001
- except OSError:
1002
- pass
1003
-
1004
-
1005
- pdf_path = os.path.join(output_path, f"{filename}.pdf")
1006
- if os.path.exists(pdf_path):
1007
- print(f"PDF report successfully generated using LaTeX: {pdf_path}")
1008
- return pdf_path
1009
- else:
1010
- print(f"PDF generation failed. Check the LaTeX log for details.")
1011
- return None
1048
+ final_paper = read_file("paper.tex")
1049
+ print(f"\n{'='*60}")
1050
+ print("FINAL RESEARCH PAPER (LATEX)")
1051
+ print("="*60)
1052
+ print(final_paper)
1053
+ print(f"\nPaper saved as paper.tex")
1012
1054
 
1013
- def generate_latex_document(request: str, model, provider, research: Dict[str, Any], experiments: Dict[str, Dict[str, Any]],
1014
- chart_path: str = None, max_pages: int = 5) -> str:
1015
- """
1016
- Generate LaTeX document content.
1017
-
1018
- Args:
1019
- request: The research topic
1020
- research: Research results
1021
- experiments: Experiments data
1022
- chart_path: Path to the thematic groups chart
1023
- max_pages: Maximum number of pages (approximate)
1024
-
1025
- Returns:
1026
- LaTeX document content as a string
1027
- """
1028
-
1029
- figure_paths = {}
1030
- if chart_path:
1031
-
1032
- figure_paths["thematic_groups"] = os.path.basename(chart_path)
1033
-
1034
-
1035
-
1036
- if isinstance(experiments, dict):
1037
- for title in experiments.keys():
1038
- sanitized_title = title.replace(" ", "_")
1039
- potential_image = f"{sanitized_title}_experiment.png"
1040
- if os.path.exists(potential_image):
1041
- figure_paths[sanitized_title] = potential_image
1042
-
1043
-
1044
- figure_path_description_dict = {}
1045
- for name, path in figure_paths.items():
1046
- figure_path_description_dict[name] = path
1047
-
1048
-
1049
- prompt = f'''
1050
- Generate a LaTeX document for a research report on the topic: "{request}"
1051
- Here is the summary of the research: {research}
1052
-
1053
- Here is the summary of the experiments: {experiments}''' +"""
1054
- Write your response in a way that academically details the research, its motivation, and experiments
1055
- and ensure any place where a citation may be needed is indicated by including an empty '\\cite{citation_needed}'
1056
-
1057
- IMPORTANT INSTRUCTIONS FOR DOCUMENT PREPARATION:
1058
- 1. DO NOT include \\bibliography{references} or any bibliography commands, as we don't have a references file
1059
- 2. Instead, create a \\begin{thebibliography}{99} ... \\end{thebibliography} section with example references
1060
- 3. For figures, use relative paths like 'figures/thematic_groups.pdf' rather than absolute paths
1061
- 4. Make sure all LaTeX commands are properly formatted and do not use undefined packages
1062
- 5. Keep the document structure simple and robust to avoid compilation errors
1063
- """+f"""
1064
- The figures are located at the following paths: {figure_path_description_dict}
1065
- """
1066
-
1067
-
1068
- latex_response = get_llm_response(prompt=prompt, model=model, provider=provider )
1069
- latex_content = latex_response.get('response', '')
1070
-
1071
-
1072
- latex_content = latex_content.replace('\\bibliography{references}', '')
1073
- latex_content = latex_content.replace('\\bibliographystyle{plain}', '')
1074
-
1075
-
1076
- latex_content = latex_content.replace('/home/caug/npcww/npcsh/figures/', 'figures/')
1077
-
1078
-
1079
- if '\\begin{thebibliography}' not in latex_content and '\\end{document}' in latex_content:
1080
- bibliography = """
1081
- \\begin{thebibliography}{9}
1082
- \\bibitem{citation1} Author, A. (2023). Title of the work. Journal Name, 10(2), 123-456.
1083
- \\bibitem{citation2} Researcher, B. (2022). Another relevant publication. Conference Proceedings, 789-012.
1084
- \\end{thebibliography}
1085
- """
1086
- latex_content = latex_content.replace('\\end{document}', f'{bibliography}\n\\end{{document}}')
1087
-
1088
- return latex_content
1089
1055
 
1056
+ def main():
1057
+ parser = argparse.ArgumentParser(description="Alicanto Multi-Agent Research System")
1058
+ parser.add_argument("topic", help="Research topic to investigate")
1059
+ parser.add_argument("--num-agents", type=int, default=3, help="Number of sub-agents to run.")
1060
+ parser.add_argument("--max-steps", type=int, default=10, help="Maximum steps for each sub-agent.")
1061
+ parser.add_argument("--model", default=NPCSH_CHAT_MODEL, help="LLM model to use")
1062
+ parser.add_argument("--provider", default=NPCSH_CHAT_PROVIDER, help="LLM provider to use")
1063
+
1064
+ args = parser.parse_args()
1065
+
1066
+ alicanto(
1067
+ query=args.topic,
1068
+ num_agents=args.num_agents,
1069
+ max_steps=args.max_steps,
1070
+ model=args.model,
1071
+ provider=args.provider
1072
+ )
1073
+
1074
+ if __name__ == "__main__":
1075
+ main()