npcsh 1.0.31__py3-none-any.whl → 1.0.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- npcsh/alicanto.py +1001 -1015
- npcsh/corca.py +61 -21
- npcsh/routes.py +16 -15
- {npcsh-1.0.31.dist-info → npcsh-1.0.32.dist-info}/METADATA +1 -1
- {npcsh-1.0.31.dist-info → npcsh-1.0.32.dist-info}/RECORD +35 -35
- {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/alicanto.npc +0 -0
- {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/alicanto.png +0 -0
- {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/bash_executer.jinx +0 -0
- {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/corca.npc +0 -0
- {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/corca.png +0 -0
- {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/edit_file.jinx +0 -0
- {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/foreman.npc +0 -0
- {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/frederic.npc +0 -0
- {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/frederic4.png +0 -0
- {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/guac.png +0 -0
- {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/image_generation.jinx +0 -0
- {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/internet_search.jinx +0 -0
- {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/kadiefa.npc +0 -0
- {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/kadiefa.png +0 -0
- {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/npcsh.ctx +0 -0
- {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/npcsh_sibiji.png +0 -0
- {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/plonk.npc +0 -0
- {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/plonk.png +0 -0
- {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/plonkjr.npc +0 -0
- {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/plonkjr.png +0 -0
- {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/python_executor.jinx +0 -0
- {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/screen_cap.jinx +0 -0
- {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/sibiji.npc +0 -0
- {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/sibiji.png +0 -0
- {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/spool.png +0 -0
- {npcsh-1.0.31.data → npcsh-1.0.32.data}/data/npcsh/npc_team/yap.png +0 -0
- {npcsh-1.0.31.dist-info → npcsh-1.0.32.dist-info}/WHEEL +0 -0
- {npcsh-1.0.31.dist-info → npcsh-1.0.32.dist-info}/entry_points.txt +0 -0
- {npcsh-1.0.31.dist-info → npcsh-1.0.32.dist-info}/licenses/LICENSE +0 -0
- {npcsh-1.0.31.dist-info → npcsh-1.0.32.dist-info}/top_level.txt +0 -0
npcsh/alicanto.py
CHANGED
|
@@ -1,1089 +1,1075 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import requests
|
|
3
|
+
import argparse
|
|
1
4
|
import os
|
|
2
|
-
import random
|
|
3
|
-
from typing import List, Dict, Any, Optional, Union, Tuple
|
|
4
|
-
import numpy as np
|
|
5
|
-
from collections import defaultdict, Counter
|
|
6
|
-
import itertools
|
|
7
|
-
import matplotlib.pyplot as plt
|
|
8
|
-
from matplotlib.figure import Figure
|
|
9
|
-
from io import BytesIO
|
|
10
|
-
import base64
|
|
11
|
-
import datetime
|
|
12
|
-
import tempfile
|
|
13
5
|
import subprocess
|
|
14
|
-
import
|
|
6
|
+
import tempfile
|
|
7
|
+
import random
|
|
8
|
+
import shutil
|
|
9
|
+
from typing import List, Dict, Any, Optional, Tuple
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from dataclasses import dataclass, asdict, field
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
15
14
|
|
|
16
|
-
from npcpy.
|
|
17
|
-
from npcpy.llm_funcs import get_llm_response
|
|
15
|
+
from npcpy.tools import auto_tools
|
|
16
|
+
from npcpy.llm_funcs import get_llm_response
|
|
17
|
+
from npcpy.data.web import search_web
|
|
18
|
+
from npcpy.npc_compiler import NPC, Team
|
|
18
19
|
from npcsh._state import NPCSH_CHAT_MODEL, NPCSH_CHAT_PROVIDER
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
""
|
|
20
|
+
|
|
21
|
+
from litellm.exceptions import Timeout, ContextWindowExceededError
|
|
22
|
+
import pandas as pd
|
|
23
|
+
import numpy as np
|
|
24
|
+
|
|
25
|
+
from npcsh.wander import perform_single_wandering
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class ResearchStep:
|
|
29
|
+
step: int
|
|
30
|
+
thought: str
|
|
31
|
+
action: str
|
|
32
|
+
outcome: str
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class SubAgentTrace:
|
|
36
|
+
hypothesis: str
|
|
37
|
+
agent_name: str
|
|
38
|
+
agent_persona: str
|
|
39
|
+
steps: List[ResearchStep] = field(default_factory=list)
|
|
40
|
+
final_files: Dict[str, str] = field(default_factory=dict)
|
|
41
|
+
was_successful: bool = False
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class Paper:
|
|
45
|
+
title: str = ""
|
|
46
|
+
abstract: str = ""
|
|
47
|
+
introduction: List[str] = field(default_factory=list)
|
|
48
|
+
methods: List[str] = field(default_factory=list)
|
|
49
|
+
results: List[str] = field(default_factory=list)
|
|
50
|
+
discussion: List[str] = field(default_factory=list)
|
|
51
|
+
|
|
52
|
+
def create_file(filename: str, content: str) -> str:
|
|
53
|
+
filepath = os.path.abspath(filename)
|
|
54
|
+
if os.path.exists(filepath):
|
|
55
|
+
return f"Error: File '{filename}' already exists. Use append_to_file or replace_in_file to modify."
|
|
56
|
+
os.makedirs(os.path.dirname(filepath), exist_ok=True)
|
|
57
|
+
with open(filepath, 'w') as f:
|
|
58
|
+
f.write(content)
|
|
59
|
+
return f"File '{filename}' created successfully."
|
|
60
|
+
|
|
61
|
+
def append_to_file(filename: str, content: str) -> str:
|
|
62
|
+
filepath = os.path.abspath(filename)
|
|
63
|
+
if not os.path.exists(filepath):
|
|
64
|
+
return f"Error: File '{filename}' not found. Use create_file first."
|
|
65
|
+
with open(filepath, 'a') as f:
|
|
66
|
+
f.write("\n" + content)
|
|
67
|
+
return f"Content appended to '{filename}'."
|
|
68
|
+
|
|
69
|
+
def replace_in_file(filename: str, old_content: str, new_content: str) -> str:
|
|
70
|
+
filepath = os.path.abspath(filename)
|
|
71
|
+
if not os.path.exists(filepath):
|
|
72
|
+
return f"Error: File '{filename}' not found."
|
|
73
|
+
with open(filepath, 'r') as f:
|
|
74
|
+
file_contents = f.read()
|
|
75
|
+
file_contents = file_contents.replace(old_content, new_content)
|
|
76
|
+
with open(filepath, 'w') as f:
|
|
77
|
+
f.write(file_contents)
|
|
78
|
+
return f"Content in '{filename}' replaced."
|
|
79
|
+
|
|
80
|
+
def read_file(filename: str) -> str:
|
|
81
|
+
filepath = os.path.abspath(filename)
|
|
82
|
+
if not os.path.exists(filepath):
|
|
83
|
+
return f"Error: File '{filename}' not found."
|
|
84
|
+
with open(filepath, 'r') as f:
|
|
85
|
+
return f.read()
|
|
86
|
+
|
|
87
|
+
def list_files(directory: str = ".") -> List[str]:
|
|
88
|
+
return os.listdir(directory)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
from datasets import load_dataset
|
|
93
|
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
94
|
+
from sklearn.metrics.pairwise import cosine_similarity
|
|
95
|
+
|
|
96
|
+
DATASET_CACHE = None
|
|
97
|
+
SEARCH_INDEX = None
|
|
98
|
+
|
|
99
|
+
def load_and_combine_datasets() -> pd.DataFrame:
|
|
100
|
+
all_papers = []
|
|
78
101
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
102
|
+
try:
|
|
103
|
+
research_papers = load_dataset("ta-datalab/research_papers", split="train")
|
|
104
|
+
for paper in research_papers:
|
|
105
|
+
all_papers.append({
|
|
106
|
+
'title': paper.get('title', ''),
|
|
107
|
+
'abstract': paper.get('abstract', ''),
|
|
108
|
+
'authors': paper.get('authors', []),
|
|
109
|
+
'year': paper.get('year', None),
|
|
110
|
+
'venue': paper.get('venue', ''),
|
|
111
|
+
'url': paper.get('url', ''),
|
|
112
|
+
'paperId': paper.get('id', ''),
|
|
113
|
+
'citationCount': 0,
|
|
114
|
+
'source': 'research_papers'
|
|
115
|
+
})
|
|
116
|
+
except Exception as e:
|
|
117
|
+
print(f"Failed to load ta-datalab/research_papers: {e}")
|
|
85
118
|
|
|
86
|
-
|
|
87
|
-
|
|
119
|
+
try:
|
|
120
|
+
ml_papers = load_dataset("CShorten/ML-ArXiv-Papers", split="train")
|
|
121
|
+
for paper in ml_papers:
|
|
122
|
+
all_papers.append({
|
|
123
|
+
'title': paper.get('title', ''),
|
|
124
|
+
'abstract': paper.get('abstract', ''),
|
|
125
|
+
'authors': paper.get('authors', '').split(', ') if paper.get('authors') else [],
|
|
126
|
+
'year': paper.get('year', None),
|
|
127
|
+
'venue': 'arXiv',
|
|
128
|
+
'url': paper.get('url', ''),
|
|
129
|
+
'paperId': paper.get('id', ''),
|
|
130
|
+
'citationCount': 0,
|
|
131
|
+
'source': 'ml_arxiv'
|
|
132
|
+
})
|
|
133
|
+
except Exception as e:
|
|
134
|
+
print(f"Failed to load CShorten/ML-ArXiv-Papers: {e}")
|
|
88
135
|
|
|
89
|
-
|
|
90
|
-
|
|
136
|
+
try:
|
|
137
|
+
astro_papers = load_dataset("ashishkgpian/astrorag_papers", split="train")
|
|
138
|
+
for paper in astro_papers:
|
|
139
|
+
all_papers.append({
|
|
140
|
+
'title': paper.get('title', ''),
|
|
141
|
+
'abstract': paper.get('abstract', ''),
|
|
142
|
+
'authors': paper.get('authors', []),
|
|
143
|
+
'year': paper.get('year', None),
|
|
144
|
+
'venue': paper.get('venue', ''),
|
|
145
|
+
'url': paper.get('url', ''),
|
|
146
|
+
'paperId': paper.get('id', ''),
|
|
147
|
+
'citationCount': 0,
|
|
148
|
+
'source': 'astrorag'
|
|
149
|
+
})
|
|
150
|
+
except Exception as e:
|
|
151
|
+
print(f"Failed to load ashishkgpian/astrorag_papers: {e}")
|
|
91
152
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
153
|
+
df = pd.DataFrame(all_papers)
|
|
154
|
+
df = df.dropna(subset=['title', 'abstract'])
|
|
155
|
+
df = df[df['abstract'].str.len() > 50]
|
|
156
|
+
return df
|
|
157
|
+
|
|
158
|
+
def create_search_index(df: pd.DataFrame):
|
|
159
|
+
search_texts = df['title'].fillna('') + ' ' + df['abstract'].fillna('')
|
|
160
|
+
vectorizer = TfidfVectorizer(max_features=10000, stop_words='english', ngram_range=(1, 2))
|
|
161
|
+
tfidf_matrix = vectorizer.fit_transform(search_texts)
|
|
162
|
+
return {'vectorizer': vectorizer, 'tfidf_matrix': tfidf_matrix, 'dataframe': df}
|
|
163
|
+
|
|
164
|
+
def initialize_dataset_search():
|
|
165
|
+
global DATASET_CACHE, SEARCH_INDEX
|
|
166
|
+
if DATASET_CACHE is None:
|
|
167
|
+
DATASET_CACHE = load_and_combine_datasets()
|
|
168
|
+
if SEARCH_INDEX is None:
|
|
169
|
+
SEARCH_INDEX = create_search_index(DATASET_CACHE)
|
|
170
|
+
return SEARCH_INDEX
|
|
171
|
+
|
|
172
|
+
import time
|
|
173
|
+
|
|
174
|
+
LAST_S2_REQUEST_TIME = 0
|
|
175
|
+
S2_RATE_LIMIT_DELAY = 1.0
|
|
176
|
+
|
|
177
|
+
def search_semantic_scholar(query: str, limit: int = 10) -> List[Dict[str, Any]]:
|
|
178
|
+
global LAST_S2_REQUEST_TIME
|
|
109
179
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
system_prompt = f"""
|
|
115
|
-
You are {name}, {expert.get('expertise', 'an expert researcher')}.
|
|
116
|
-
|
|
117
|
-
Background: {expert.get('background', 'You have extensive knowledge in your field.')}
|
|
118
|
-
|
|
119
|
-
Your perspective: {expert.get('perspective', 'You provide detailed, balanced analysis.')}
|
|
120
|
-
|
|
121
|
-
Your methodological quirk: {expert.get('methodological_quirk', 'You approach problems in unconventional ways.')}
|
|
122
|
-
|
|
123
|
-
Note: Be aware that you may have these biases: {expert.get('biases', 'None specifically noted.')}
|
|
124
|
-
|
|
125
|
-
Your task is to research the given topic thoroughly, focusing on your unique perspective and methodological approach.
|
|
126
|
-
Challenge conventional thinking, explore neglected angles, and identify unexpected connections or contradictions.
|
|
127
|
-
Your insights should be provocative and novel, not just rehashing mainstream views.
|
|
128
|
-
|
|
129
|
-
IMPORTANT: You must be extremely concise. Limit responses to 50-75 words maximum. Focus on substance over verbosity.
|
|
130
|
-
Prioritize precision, clarity, and insight density. Eliminate unnecessary words and focus on communicating
|
|
131
|
-
the essence of your insights in the most efficient way possible.
|
|
132
|
-
"""
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
npc = NPC(name=name, primary_directive=f"Research expert on {request}")
|
|
136
|
-
npc.system_prompt = system_prompt
|
|
137
|
-
npcs.append(npc)
|
|
180
|
+
api_key = os.environ.get('S2_API_KEY')
|
|
181
|
+
if not api_key:
|
|
182
|
+
return []
|
|
138
183
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
def generate_research_chain(request: str,
|
|
142
|
-
npc: NPC, depth: int,
|
|
143
|
-
memory: int = 3,
|
|
144
|
-
context: str = None,
|
|
145
|
-
model: str = None,
|
|
146
|
-
provider: str = None,
|
|
147
|
-
exploration_factor: float = 0.3,
|
|
148
|
-
creativity_factor: float = 0.5) -> List[str]:
|
|
149
|
-
"""
|
|
150
|
-
Generate a chain of research thoughts from a single NPC, diving deeper with each step.
|
|
184
|
+
current_time = time.time()
|
|
185
|
+
time_since_last = current_time - LAST_S2_REQUEST_TIME
|
|
151
186
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
memory: How many previous steps to include in context
|
|
157
|
-
context: Additional context to include
|
|
158
|
-
model: LLM model to use
|
|
159
|
-
provider: LLM provider to use
|
|
160
|
-
exploration_factor: Probability (0-1) of exploring a tangential direction
|
|
161
|
-
creativity_factor: Probability (0-1) of pursuing highly creative or unusual ideas
|
|
187
|
+
if time_since_last < S2_RATE_LIMIT_DELAY:
|
|
188
|
+
sleep_time = S2_RATE_LIMIT_DELAY - time_since_last
|
|
189
|
+
print(f"Rate limiting: sleeping {sleep_time:.2f}s before S2 request")
|
|
190
|
+
time.sleep(sleep_time)
|
|
162
191
|
|
|
163
|
-
|
|
164
|
-
List of research findings/thoughts from this chain
|
|
165
|
-
"""
|
|
166
|
-
chain = []
|
|
192
|
+
LAST_S2_REQUEST_TIME = time.time()
|
|
167
193
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
194
|
+
url = "https://api.semanticscholar.org/graph/v1/paper/search"
|
|
195
|
+
headers = {"x-api-key": api_key}
|
|
196
|
+
params = {
|
|
197
|
+
"query": query,
|
|
198
|
+
"limit": limit,
|
|
199
|
+
"fields": "title,abstract,authors,year,citationCount,url,tldr"
|
|
200
|
+
}
|
|
171
201
|
|
|
172
|
-
|
|
202
|
+
try:
|
|
203
|
+
response = requests.get(url, headers=headers, params=params,
|
|
204
|
+
timeout=30)
|
|
205
|
+
response.raise_for_status()
|
|
206
|
+
return response.json().get('data', [])
|
|
207
|
+
except requests.exceptions.RequestException as e:
|
|
208
|
+
print(f"Semantic Scholar API error: {e}")
|
|
209
|
+
return []
|
|
210
|
+
|
|
211
|
+
def search_papers(query: str, limit: int = 10) -> List[Dict]:
|
|
212
|
+
s2_results = search_semantic_scholar(query, limit)
|
|
213
|
+
if s2_results:
|
|
214
|
+
return s2_results
|
|
215
|
+
|
|
216
|
+
search_index = initialize_dataset_search()
|
|
217
|
+
query_vector = search_index['vectorizer'].transform([query])
|
|
218
|
+
similarities = cosine_similarity(query_vector, search_index['tfidf_matrix']).flatten()
|
|
219
|
+
top_indices = similarities.argsort()[-limit:][::-1]
|
|
220
|
+
results = [search_index['dataframe'].iloc[idx].to_dict() for idx in top_indices if similarities[idx] > 0.01]
|
|
221
|
+
return results
|
|
222
|
+
|
|
223
|
+
def execute_shell_command(command: str) -> Dict[str, Any]:
|
|
224
|
+
try:
|
|
225
|
+
result = subprocess.run(command, shell=True, capture_output=True, text=True, timeout=60)
|
|
226
|
+
return {
|
|
227
|
+
"success": result.returncode == 0,
|
|
228
|
+
"stdout": result.stdout,
|
|
229
|
+
"stderr": result.stderr
|
|
230
|
+
}
|
|
231
|
+
except Exception as e:
|
|
232
|
+
return {"success": False, "stderr": str(e)}
|
|
233
|
+
|
|
234
|
+
def update_paper(paper_state: Paper, section: str, content: str) -> Paper:
|
|
235
|
+
if not hasattr(paper_state, section):
|
|
236
|
+
return paper_state
|
|
237
|
+
target_section = getattr(paper_state, section)
|
|
238
|
+
if isinstance(target_section, list):
|
|
239
|
+
target_section.append(content)
|
|
240
|
+
else:
|
|
241
|
+
setattr(paper_state, section, content)
|
|
242
|
+
return paper_state
|
|
243
|
+
|
|
244
|
+
def get_creative_ideas_for_stuck_agent(
|
|
245
|
+
problem_description: str,
|
|
246
|
+
npc: NPC,
|
|
247
|
+
model: str,
|
|
248
|
+
provider: str
|
|
249
|
+
) -> str:
|
|
250
|
+
print(f"\n--- SUB-AGENT {npc.name} IS STUCK, INITIATING WANDER ---")
|
|
251
|
+
_, _, raw_brainstorm, _, _ = perform_single_wandering(
|
|
252
|
+
problem=problem_description,
|
|
253
|
+
npc=npc,
|
|
254
|
+
model=model,
|
|
255
|
+
provider=provider
|
|
256
|
+
)
|
|
257
|
+
return raw_brainstorm
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
@dataclass
|
|
261
|
+
class FileProvenance:
|
|
262
|
+
filename: str
|
|
263
|
+
step_history: List[Tuple[int, str, str, str]] = field(default_factory=list)
|
|
264
|
+
|
|
265
|
+
def get_filesystem_state() -> Dict[str, str]:
|
|
266
|
+
import hashlib
|
|
267
|
+
files = {}
|
|
268
|
+
for f in os.listdir("."):
|
|
269
|
+
if os.path.isfile(f):
|
|
270
|
+
with open(f, 'rb') as file:
|
|
271
|
+
content = file.read()
|
|
272
|
+
files[f] = hashlib.md5(content).hexdigest()[:8]
|
|
273
|
+
return files
|
|
274
|
+
|
|
275
|
+
def summarize_step(thought: str,
|
|
276
|
+
action: str,
|
|
277
|
+
outcome: str,
|
|
278
|
+
fs_before: Dict[str, str],
|
|
279
|
+
fs_after: Dict[str, str],
|
|
280
|
+
file_provenance: Dict[str, FileProvenance],
|
|
281
|
+
step_num: int,
|
|
282
|
+
model: str,
|
|
283
|
+
provider: str,
|
|
284
|
+
npc: NPC) -> str:
|
|
285
|
+
|
|
286
|
+
import hashlib
|
|
287
|
+
import os
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
current_files = {}
|
|
291
|
+
for f in os.listdir("."):
|
|
292
|
+
if os.path.isfile(f):
|
|
293
|
+
with open(f, 'rb') as file:
|
|
294
|
+
content = file.read()
|
|
295
|
+
current_files[f] = {
|
|
296
|
+
'size': len(content),
|
|
297
|
+
'checksum': hashlib.md5(content).hexdigest()[:8]
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
for f in fs_after:
|
|
302
|
+
if f not in file_provenance:
|
|
303
|
+
file_provenance[f] = FileProvenance(filename=f)
|
|
304
|
+
|
|
305
|
+
change_summary = ""
|
|
306
|
+
if f not in fs_before:
|
|
307
|
+
change_summary = f"Created with {current_files[f]['size']} bytes"
|
|
308
|
+
file_provenance[f].step_history.append((step_num, "CREATED", fs_after[f], change_summary))
|
|
309
|
+
elif fs_before.get(f) != fs_after[f]:
|
|
310
|
+
change_summary = f"Modified to {current_files[f]['size']} bytes"
|
|
311
|
+
file_provenance[f].step_history.append((step_num, "MODIFIED", fs_after[f], change_summary))
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
provenance_summary = []
|
|
315
|
+
for filename, prov in file_provenance.items():
|
|
316
|
+
history = "; ".join([f"Step {step}: {action} ({checksum}) - {changes}" for step, action, checksum, changes in prov.step_history])
|
|
317
|
+
provenance_summary.append(f"{filename}: {history}")
|
|
318
|
+
|
|
319
|
+
prompt = f"""AGENT'S REASONING: {thought}
|
|
320
|
+
|
|
321
|
+
AGENT'S ACTION: {action}
|
|
322
|
+
AGENT'S CLAIMED OUTCOME: {outcome}
|
|
323
|
+
|
|
324
|
+
COMPLETE FILE PROVENANCE:
|
|
325
|
+
{chr(10).join(provenance_summary)}
|
|
326
|
+
|
|
327
|
+
CURRENT FILESYSTEM:
|
|
328
|
+
Files: {list(current_files.keys())}
|
|
329
|
+
Details: {current_files}
|
|
330
|
+
|
|
331
|
+
Explain plainly what happened and whether the actions produced any measurable effects. If the agent thinks then it is likely time to direct it to
|
|
332
|
+
carry out a specific action.
|
|
333
|
+
|
|
334
|
+
Return JSON with "summary" and "next_step" keys.""" + """
|
|
335
|
+
|
|
336
|
+
{
|
|
337
|
+
"summary": " a summary of what they did and claimed and the extent to which it produced the intended outcome .",
|
|
338
|
+
"next_step": "The concrete next step for the agent to carry out in their research.
|
|
339
|
+
|
|
340
|
+
}
|
|
341
|
+
"""
|
|
173
342
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
343
|
+
response = get_llm_response(prompt, model=model, provider=provider, npc=npc, format='json')
|
|
344
|
+
summary_data = response.get('response')
|
|
345
|
+
|
|
346
|
+
return summary_data
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime
|
|
351
|
+
from sqlalchemy.ext.declarative import declarative_base
|
|
352
|
+
from sqlalchemy.orm import sessionmaker
|
|
353
|
+
import csv
|
|
354
|
+
import os
|
|
355
|
+
from datetime import datetime
|
|
356
|
+
|
|
357
|
+
Base = declarative_base()
|
|
358
|
+
|
|
359
|
+
class AlicantoPersona(Base):
|
|
360
|
+
__tablename__ = 'alicanto_personas'
|
|
361
|
+
|
|
362
|
+
id = Column(Integer, primary_key=True, autoincrement=True)
|
|
363
|
+
name = Column(String(255))
|
|
364
|
+
birth_year = Column(Integer)
|
|
365
|
+
location = Column(Text)
|
|
366
|
+
leader = Column(Text)
|
|
367
|
+
interests = Column(Text)
|
|
368
|
+
worldview = Column(Text)
|
|
369
|
+
approach = Column(Text)
|
|
370
|
+
persona_text = Column(Text)
|
|
371
|
+
created_at = Column(DateTime, default=datetime.utcnow)
|
|
372
|
+
|
|
373
|
+
def save_persona_to_databases(persona_data: dict):
|
|
374
|
+
"""Save persona to both SQLite and CSV for persistence"""
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
db_path = os.path.expanduser("~/npcsh_history.db")
|
|
378
|
+
engine = create_engine(f'sqlite:///{db_path}')
|
|
379
|
+
Base.metadata.create_all(engine)
|
|
380
|
+
Session = sessionmaker(bind=engine)
|
|
381
|
+
session = Session()
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
persona = AlicantoPersona(
|
|
385
|
+
name=persona_data.get('name'),
|
|
386
|
+
birth_year=persona_data.get('birth_year'),
|
|
387
|
+
location=persona_data.get('location'),
|
|
388
|
+
leader=persona_data.get('leader'),
|
|
389
|
+
interests=json.dumps(persona_data.get('interests', [])),
|
|
390
|
+
worldview=persona_data.get('worldview'),
|
|
391
|
+
approach=persona_data.get('approach'),
|
|
392
|
+
persona_text=persona_data.get('persona_text')
|
|
393
|
+
)
|
|
178
394
|
|
|
179
|
-
|
|
180
|
-
|
|
395
|
+
session.add(persona)
|
|
396
|
+
session.commit()
|
|
397
|
+
session.close()
|
|
181
398
|
|
|
182
|
-
response = get_llm_response(prompt=initial_prompt, model=model, provider=provider, npc=npc, temperature=0.7)
|
|
183
|
-
initial_findings = response.get('response', '')
|
|
184
|
-
if isinstance(initial_findings, (list, dict)) or hasattr(initial_findings, '__iter__') and not isinstance(initial_findings, (str, bytes)):
|
|
185
|
-
initial_findings = ''.join([str(chunk) for chunk in initial_findings])
|
|
186
399
|
|
|
187
|
-
|
|
400
|
+
csv_dir = os.path.expanduser("~/.npcsh/npc_team")
|
|
401
|
+
os.makedirs(csv_dir, exist_ok=True)
|
|
402
|
+
csv_path = os.path.join(csv_dir, "alicanto_personas.csv")
|
|
188
403
|
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
next_prompt = f"""
|
|
196
|
-
Research request: {request}
|
|
197
|
-
|
|
198
|
-
Recent research findings:
|
|
199
|
-
{memory_context}
|
|
200
|
-
|
|
201
|
-
As {npc.name}, continue your research on this topic. Build on previous insights and explore new aspects.
|
|
404
|
+
file_exists = os.path.exists(csv_path)
|
|
405
|
+
with open(csv_path, 'a', newline='') as csvfile:
|
|
406
|
+
fieldnames = ['name', 'birth_year', 'location', 'leader', 'interests',
|
|
407
|
+
'worldview', 'approach', 'persona_text', 'created_at']
|
|
408
|
+
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
|
202
409
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
research: Consolidated research data
|
|
229
|
-
request: Original research question
|
|
230
|
-
model: LLM model to use
|
|
231
|
-
provider: LLM provider to use
|
|
232
|
-
max_experiments: Maximum number of experiments to generate
|
|
410
|
+
if not file_exists:
|
|
411
|
+
writer.writeheader()
|
|
412
|
+
|
|
413
|
+
writer.writerow({
|
|
414
|
+
**persona_data,
|
|
415
|
+
'interests': json.dumps(persona_data.get('interests', [])),
|
|
416
|
+
'created_at': datetime.now().isoformat()
|
|
417
|
+
})
|
|
418
|
+
|
|
419
|
+
def generate_sub_agent_personas(topic: str, num_agents: int, model: str, provider: str, npc: NPC) -> List[Dict[str, str]]:
|
|
420
|
+
personas = []
|
|
421
|
+
for i in range(num_agents):
|
|
422
|
+
birth_year = random.randint(-32665, 32665)
|
|
423
|
+
teen_year = birth_year + 16
|
|
424
|
+
|
|
425
|
+
json_template = """
|
|
426
|
+
{
|
|
427
|
+
"name": "culturally appropriate full name for someone born in """ + str(birth_year) + """",
|
|
428
|
+
"location": "specific city/region where they were born in """ + str(birth_year) + """",
|
|
429
|
+
"leader": "who ruled their region when they were 16 years old in """ + str(teen_year) + """",
|
|
430
|
+
"interests": ["3-5 specific interests/obsessions they had as a teenager in """ + str(teen_year) + """"],
|
|
431
|
+
"worldview": "one sentence describing their fundamental perspective shaped by growing up in that era",
|
|
432
|
+
"approach": "how their historical background influences their way of thinking"
|
|
433
|
+
}
|
|
434
|
+
"""
|
|
233
435
|
|
|
234
|
-
|
|
235
|
-
Dictionary mapping experiment titles to experiment data
|
|
236
|
-
"""
|
|
237
|
-
|
|
238
|
-
facts_context = ""
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
if "fact_groups" in research:
|
|
242
|
-
for group, facts in list(research["fact_groups"].items())[:5]:
|
|
243
|
-
facts_context += f"\n\nThematic Group: {group}\n"
|
|
244
|
-
facts_context += format_facts_list(facts)
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
if "combination_insights" in research:
|
|
248
|
-
facts_context += "\n\nEmergent Insights:\n"
|
|
249
|
-
for combo in research["combination_insights"][:3]:
|
|
250
|
-
facts_context += f"• {combo.get('emergent_insight', '')}\n"
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
prompt = f"""
|
|
254
|
-
You are a creative research scientist exploring the topic: "{request}"
|
|
255
|
-
|
|
256
|
-
Based on the following research findings:
|
|
257
|
-
|
|
258
|
-
{facts_context}
|
|
259
|
-
|
|
260
|
-
Design {max_experiments if max_experiments else "3-5"} thought experiments that could test, validate, or extend these insights.
|
|
261
|
-
|
|
262
|
-
For each experiment:
|
|
263
|
-
1. Create a descriptive title that captures the experiment's focus
|
|
264
|
-
2. Describe the experimental design/methodology (be specific and detailed)
|
|
265
|
-
3. Predict the potential results and their implications
|
|
266
|
-
4. Explain how these results would advance our understanding of {request}
|
|
267
|
-
|
|
268
|
-
Format your response as JSON with this structure:
|
|
269
|
-
{{
|
|
270
|
-
"experiment_title_1": {{
|
|
271
|
-
"design": "detailed description of experimental design",
|
|
272
|
-
"results": "predicted results and implications"
|
|
273
|
-
}},
|
|
274
|
-
"experiment_title_2": {{
|
|
275
|
-
...
|
|
276
|
-
}}
|
|
277
|
-
}}
|
|
278
|
-
|
|
279
|
-
Be bold and imaginative in your experimental designs. Consider unconventional approaches,
|
|
280
|
-
simulations, thought experiments, and interdisciplinary methods.
|
|
281
|
-
"""
|
|
282
|
-
|
|
283
|
-
response = get_llm_response(prompt=prompt,
|
|
284
|
-
model=model,
|
|
285
|
-
provider=provider,
|
|
286
|
-
temperature=0.8,
|
|
287
|
-
format="json")
|
|
288
|
-
experiments = response.get("response", {})
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
if max_experiments and isinstance(experiments, dict) and len(experiments) > max_experiments:
|
|
292
|
-
|
|
293
|
-
sorted_exps = sorted(experiments.items(), key=lambda x: len(x[0]), reverse=True)
|
|
294
|
-
experiments = dict(sorted_exps[:max_experiments])
|
|
295
|
-
|
|
296
|
-
return experiments
|
|
297
|
-
|
|
298
|
-
def alicanto(request: str,
|
|
299
|
-
num_npcs: int = 5,
|
|
300
|
-
depth: int = 3, memory: int = 3,
|
|
301
|
-
context: str = None,
|
|
302
|
-
model: str = None,
|
|
303
|
-
provider: str = None,
|
|
304
|
-
exploration_factor: float = 0.3,
|
|
305
|
-
creativity_factor: float = 0.5,
|
|
306
|
-
output_format: str = "report",
|
|
307
|
-
max_facts_per_chain: int = None,
|
|
308
|
-
max_thematic_groups: int = None,
|
|
309
|
-
max_criticisms_per_group: int = None,
|
|
310
|
-
max_conceptual_combinations: int = None,
|
|
311
|
-
max_experiments: int = None,
|
|
312
|
-
generate_pdf: bool = True) -> Dict[str, Any]:
|
|
313
|
-
"""
|
|
314
|
-
Alicanto: Generate diverse research insights by coordinating multiple NPCs with different expertise.
|
|
315
|
-
|
|
316
|
-
Args:
|
|
317
|
-
request: The research question/topic
|
|
318
|
-
num_npcs: Number of NPCs to generate (with different expertise)
|
|
319
|
-
depth: Depth of research for each NPC
|
|
320
|
-
memory: How many previous steps to include in context
|
|
321
|
-
context: Additional context to include
|
|
322
|
-
model: LLM model to use
|
|
323
|
-
provider: LLM provider to use
|
|
324
|
-
exploration_factor: Probability (0-1) of exploring a tangential direction
|
|
325
|
-
creativity_factor: Probability (0-1) of pursuing highly creative or unusual ideas
|
|
326
|
-
output_format: Format of the output ("report", "json", "markdown")
|
|
327
|
-
max_facts_per_chain: Maximum number of facts to extract per research chain
|
|
328
|
-
max_thematic_groups: Maximum number of thematic groups to identify
|
|
329
|
-
max_criticisms_per_group: Maximum number of criticisms per thematic group
|
|
330
|
-
max_conceptual_combinations: Maximum number of conceptual combinations to generate
|
|
331
|
-
max_experiments: Maximum number of experiments to generate
|
|
332
|
-
generate_pdf: Whether to generate a PDF report
|
|
436
|
+
prompt = f"Generate a unique persona for someone born in {birth_year}. Return JSON:\n{json_template}\n\nMake this person feel real and historically grounded. Consider: technological context, cultural movements, economic conditions, wars, discoveries happening in {teen_year}."
|
|
333
437
|
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
if model is None:
|
|
339
|
-
model = NPCSH_CHAT_MODEL
|
|
340
|
-
if provider is None:
|
|
341
|
-
provider = NPCSH_CHAT_PROVIDER
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
print(f"Generating {num_npcs} diverse researcher NPCs...")
|
|
345
|
-
researchers = generate_random_npcs(num_npcs, model, provider, request)
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
print(f"Generating research chains (depth={depth})...")
|
|
349
|
-
research_chains = {}
|
|
350
|
-
facts_by_researcher = {}
|
|
351
|
-
|
|
352
|
-
for npc in researchers:
|
|
353
|
-
print(f" Research chain from {npc.name}...")
|
|
354
|
-
chain = generate_research_chain(
|
|
355
|
-
request=request,
|
|
356
|
-
npc=npc,
|
|
357
|
-
depth=depth,
|
|
358
|
-
memory=memory,
|
|
359
|
-
context=context,
|
|
438
|
+
|
|
439
|
+
response = get_llm_response(
|
|
440
|
+
prompt,
|
|
360
441
|
model=model,
|
|
361
442
|
provider=provider,
|
|
362
|
-
|
|
363
|
-
|
|
443
|
+
npc=npc,
|
|
444
|
+
format='json'
|
|
364
445
|
)
|
|
365
|
-
research_chains[npc.name] = chain
|
|
366
446
|
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
447
|
+
new_persona = response.get('response')
|
|
448
|
+
if isinstance(new_persona, str):
|
|
449
|
+
new_persona = json.loads(new_persona)
|
|
370
450
|
|
|
371
|
-
|
|
372
|
-
if max_facts_per_chain is not None and len(facts) > max_facts_per_chain:
|
|
373
|
-
facts = facts[:max_facts_per_chain]
|
|
374
|
-
|
|
375
|
-
facts_by_researcher[npc.name] = facts
|
|
376
|
-
print({"fact_list": facts})
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
print("Identifying thematic groups across all research insights...")
|
|
380
|
-
all_facts = []
|
|
381
|
-
for researcher_facts in facts_by_researcher.values():
|
|
382
|
-
all_facts.extend(researcher_facts)
|
|
383
|
-
|
|
384
|
-
groups = identify_groups(all_facts, model=model, provider=provider)
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
if max_thematic_groups is not None and len(groups) > max_thematic_groups:
|
|
388
|
-
groups = groups[:max_thematic_groups]
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
fact_groups = {group: [] for group in groups}
|
|
392
|
-
for fact in all_facts:
|
|
393
|
-
group_assignments = assign_groups_to_fact(fact, groups, model=model, provider=provider)
|
|
394
|
-
assigned_groups = group_assignments.get("groups", [])
|
|
395
|
-
for group in assigned_groups:
|
|
396
|
-
if group in fact_groups:
|
|
397
|
-
fact_groups[group].append(fact)
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
print("Evaluating thematic groups for quality and risk...")
|
|
401
|
-
group_evaluations = evaluate_thematic_groups(
|
|
402
|
-
fact_groups,
|
|
403
|
-
request,
|
|
404
|
-
model=model,
|
|
405
|
-
provider=provider,
|
|
406
|
-
max_criticisms=max_criticisms_per_group
|
|
407
|
-
)
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
group_summaries = {}
|
|
411
|
-
for group_name, facts in fact_groups.items():
|
|
412
|
-
if not facts:
|
|
413
|
-
continue
|
|
414
|
-
|
|
415
|
-
prompt = f"""
|
|
416
|
-
Summarize the key insights from this thematic group of research findings on the topic:
|
|
417
|
-
"{request}"
|
|
451
|
+
persona_text = f"You are {new_persona.get('name')}, born {birth_year} in {new_persona.get('location')}, came of age under {new_persona.get('leader')}. Your interests were: {', '.join(new_persona.get('interests', []))}. {new_persona.get('worldview')} {new_persona.get('approach')}"
|
|
418
452
|
|
|
419
|
-
Thematic Group: {group_name}
|
|
420
453
|
|
|
421
|
-
|
|
422
|
-
|
|
454
|
+
persona_data = {
|
|
455
|
+
'name': new_persona.get('name'),
|
|
456
|
+
'birth_year': birth_year,
|
|
457
|
+
'location': new_persona.get('location'),
|
|
458
|
+
'leader': new_persona.get('leader'),
|
|
459
|
+
'interests': new_persona.get('interests', []),
|
|
460
|
+
'worldview': new_persona.get('worldview'),
|
|
461
|
+
'approach': new_persona.get('approach'),
|
|
462
|
+
'persona_text': persona_text
|
|
463
|
+
}
|
|
423
464
|
|
|
424
|
-
Provide a concise, coherent synthesis that captures the core ideas,
|
|
425
|
-
emphasizes what's most novel or significant, and suggests potential implications.
|
|
426
|
-
Keep your response to 200-300 words.
|
|
427
|
-
"""
|
|
428
465
|
|
|
429
|
-
|
|
430
|
-
summary = response.get('response', '')
|
|
431
|
-
if isinstance(summary, (list, dict)) or hasattr(summary, '__iter__') and not isinstance(summary, (str, bytes)):
|
|
432
|
-
summary = ''.join([str(chunk) for chunk in summary])
|
|
466
|
+
save_persona_to_databases(persona_data)
|
|
433
467
|
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
fact_lists = list(facts_by_researcher.values())
|
|
439
|
-
combinations = generate_conceptual_combinations(
|
|
440
|
-
fact_lists,
|
|
441
|
-
sample_size=min(3, len(all_facts)),
|
|
442
|
-
num_combinations=max_conceptual_combinations if max_conceptual_combinations is not None else 5
|
|
443
|
-
)
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
print("Analyzing conceptual combinations for emergent insights...")
|
|
447
|
-
combination_insights = analyze_conceptual_combinations(
|
|
448
|
-
combinations,
|
|
449
|
-
request,
|
|
450
|
-
model=model,
|
|
451
|
-
provider=provider
|
|
452
|
-
)
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
print("Identifying meta-patterns across research approaches...")
|
|
456
|
-
meta_patterns = identify_patterns_across_chains(research_chains, model=model, provider=provider)
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
print("Consolidating research into comprehensive synthesis...")
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
integration_points = []
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
for group, facts in fact_groups.items():
|
|
466
|
-
if facts:
|
|
467
|
-
integration_points.append(f"From thematic group '{group}':")
|
|
468
|
-
for fact in facts[:3]:
|
|
469
|
-
integration_points.append(f"- {fact}")
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
for insight in combination_insights[:3]:
|
|
473
|
-
integration_points.append(f"Emergent insight: {insight.get('emergent_insight', '')}")
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
integration_points.append(f"Meta-analysis insight: {meta_patterns.get('meta_analysis', '')[:300]}...")
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
integration_prompt = f"""
|
|
480
|
-
Consolidate these diverse research findings into a comprehensive, integrative analysis of the topic:
|
|
481
|
-
"{request}"
|
|
482
|
-
|
|
483
|
-
Key points from the research:
|
|
484
|
-
{format_facts_list(integration_points)}
|
|
485
|
-
|
|
486
|
-
Your consolidation should:
|
|
487
|
-
1. Provide a coherent synthesis of the diverse perspectives
|
|
488
|
-
2. Identify the most significant findings and patterns
|
|
489
|
-
3. Note any tensions, contradictions, or complementary insights
|
|
490
|
-
4. Suggest an integrated framework for understanding the topic
|
|
491
|
-
5. Briefly outline implications and future directions
|
|
492
|
-
|
|
493
|
-
Aim for a comprehensive, balanced, and insightful analysis (300-500 words).
|
|
494
|
-
"""
|
|
495
|
-
|
|
496
|
-
integration_response = get_llm_response(integration_prompt, model=model, provider=provider)
|
|
497
|
-
integration = integration_response.get('response', '')
|
|
498
|
-
if isinstance(integration, (list, dict)) or hasattr(integration, '__iter__') and not isinstance(integration, (str, bytes)):
|
|
499
|
-
integration = ''.join([str(chunk) for chunk in integration])
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
summary_prompt = f"""
|
|
503
|
-
Create a concise executive summary (150 words max) of this research on:
|
|
504
|
-
"{request}"
|
|
505
|
-
|
|
506
|
-
Integration:
|
|
507
|
-
{integration}
|
|
508
|
-
|
|
509
|
-
Focus on the most significant findings and implications. This should be suitable for someone who only has time to read a brief overview.
|
|
510
|
-
"""
|
|
511
|
-
|
|
512
|
-
summary_response = get_llm_response(summary_prompt, model=model, provider=provider)
|
|
513
|
-
ideas_summarized = summary_response.get('response', '')
|
|
514
|
-
if isinstance(ideas_summarized, (list, dict)) or hasattr(ideas_summarized, '__iter__') and not isinstance(ideas_summarized, (str, bytes)):
|
|
515
|
-
ideas_summarized = ''.join([str(chunk) for chunk in ideas_summarized])
|
|
468
|
+
personas.append({
|
|
469
|
+
"name": new_persona.get('name'),
|
|
470
|
+
"persona": persona_text
|
|
471
|
+
})
|
|
516
472
|
|
|
517
|
-
|
|
518
|
-
print("Generating simulated experiments...")
|
|
519
|
-
research_results = {
|
|
520
|
-
"research_request": request,
|
|
521
|
-
"research_chains": research_chains,
|
|
522
|
-
"fact_groups": fact_groups,
|
|
523
|
-
"group_evaluations": group_evaluations,
|
|
524
|
-
"group_summaries": group_summaries,
|
|
525
|
-
"combination_insights": combination_insights,
|
|
526
|
-
"meta_patterns": meta_patterns,
|
|
527
|
-
"integration": integration,
|
|
528
|
-
"ideas_summarized": ideas_summarized
|
|
529
|
-
}
|
|
473
|
+
return personas
|
|
530
474
|
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
475
|
+
|
|
476
|
+
def create_sub_agent(
|
|
477
|
+
model: str,
|
|
478
|
+
provider: str,
|
|
479
|
+
hypothesis: str,
|
|
480
|
+
name: str,
|
|
481
|
+
persona: str
|
|
482
|
+
) -> NPC:
|
|
483
|
+
|
|
484
|
+
def wander_wrapper(problem_description: str) -> str:
|
|
485
|
+
return get_creative_ideas_for_stuck_agent(
|
|
486
|
+
problem_description,
|
|
487
|
+
agent,
|
|
488
|
+
model,
|
|
489
|
+
provider
|
|
490
|
+
)
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
tools = [
|
|
499
|
+
create_file,
|
|
500
|
+
append_to_file,
|
|
501
|
+
replace_in_file,
|
|
502
|
+
read_file,
|
|
503
|
+
list_files,
|
|
504
|
+
execute_shell_command,
|
|
505
|
+
search_papers,
|
|
506
|
+
wander_wrapper,
|
|
507
|
+
search_web
|
|
508
|
+
]
|
|
509
|
+
|
|
510
|
+
agent = NPC(
|
|
511
|
+
name=name,
|
|
534
512
|
model=model,
|
|
535
513
|
provider=provider,
|
|
536
|
-
|
|
514
|
+
primary_directive=persona,
|
|
515
|
+
tools=tools
|
|
537
516
|
)
|
|
538
517
|
|
|
539
|
-
|
|
540
|
-
pdf_path = None
|
|
541
|
-
if generate_pdf:
|
|
542
|
-
pdf_path = generate_pdf_report(request, model, provider, research_results, experiments)
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
research_results["experiments"] = experiments
|
|
546
|
-
research_results["pdf_path"] = pdf_path
|
|
547
|
-
|
|
548
|
-
return research_results
|
|
518
|
+
return agent
|
|
549
519
|
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
def sub_agent_trace(hypothesis: str,
|
|
523
|
+
persona: Dict[str, str],
|
|
524
|
+
user_query: str,
|
|
525
|
+
model: str,
|
|
526
|
+
provider: str,
|
|
527
|
+
max_steps: int = 50) -> SubAgentTrace:
|
|
528
|
+
agent_name = persona.get("name")
|
|
529
|
+
agent_persona = persona.get("persona")
|
|
530
|
+
agent = create_sub_agent(model, provider, hypothesis, agent_name, agent_persona)
|
|
531
|
+
|
|
532
|
+
trace = SubAgentTrace(hypothesis=hypothesis, agent_name=agent_name, agent_persona=agent_persona)
|
|
533
|
+
summarized_history = []
|
|
534
|
+
file_provenance = {}
|
|
535
|
+
created_files = set()
|
|
536
|
+
summary = {}
|
|
537
|
+
|
|
538
|
+
major_step = 0
|
|
539
|
+
|
|
540
|
+
while major_step < max_steps:
|
|
541
|
+
fs_before = get_filesystem_state()
|
|
542
|
+
|
|
543
|
+
provenance_summary = []
|
|
544
|
+
for filename, prov in file_provenance.items():
|
|
545
|
+
history = "; ".join([f"Step {step}: {action} ({checksum}) - {changes}" for step, action, checksum, changes in prov.step_history])
|
|
546
|
+
provenance_summary.append(f"{filename}: {history}")
|
|
547
|
+
|
|
548
|
+
history_str = "\n".join(summarized_history)
|
|
549
|
+
next_step_text = f"This is the next step suggested by your advisor. : BEGIN NEXT_STEP: {summary.get('next_step')} END NEXT STEP" if summary else ""
|
|
550
|
+
|
|
551
|
+
initial_prompt = f"""
|
|
552
|
+
Test the following hypothesis: '{hypothesis}' as related to the user query: '{user_query}'.
|
|
553
|
+
Only focus on your specific hypothesis, other agents are being tasked with other aspects of the problem.
|
|
554
|
+
|
|
555
|
+
Use bash commands to carry out research through the execute_shell_command.
|
|
556
|
+
Adjust files with `replace_in_file` and use `read_file` and `list_files` to verify file states and file creation.
|
|
557
|
+
Create files with create_file()
|
|
558
|
+
|
|
559
|
+
Test with execute_shell_command when needed
|
|
560
|
+
Get unstuck with wander_wrapper
|
|
561
|
+
|
|
562
|
+
When you have a definitive result, say RESEARCH_COMPLETE.
|
|
563
|
+
|
|
564
|
+
FILE PROVENANCE HISTORY:
|
|
565
|
+
{chr(10).join(provenance_summary)}
|
|
566
|
+
|
|
567
|
+
CURRENT FILES: {list(fs_before.keys())}
|
|
568
|
+
|
|
569
|
+
COMPLETE ACTION HISTORY:
|
|
570
|
+
BEGIN HISTORY
|
|
571
|
+
`
|
|
572
|
+
{history_str}
|
|
573
|
+
`
|
|
574
|
+
END HISTORy
|
|
575
|
+
|
|
576
|
+
What specific action will you take next to test your hypothesis?
|
|
577
|
+
AVAILABLE TOOLS: create_file, append_to_file, replace_in_file, read_file, list_files, execute_shell_command, wander_wrapper, search_web .
|
|
578
|
+
|
|
579
|
+
Do not repeat actions. Do not constantly think unless you need to brainstorm or wander. Use `execute_shell_command` for anything complicated beyond a simple file read, replace, create.
|
|
580
|
+
Use `search_web` with provider of {os.environ.get('NPCSH_SEARCH_PROVIDER') } to look up items if you are struggling to understand why errors are happening with code execution.
|
|
581
|
+
Do not waste time re-verifying the same package versins or libraries when you can explicitly look up usage patterns that are up to date. Do not assume that your generated code will be correct the first time or up to date
|
|
582
|
+
amd if you are finding irreconcilable errors that you cannot seem to figure out locally then you need to search. For example, if you assume a python package you installed like `sqlite-vector' is importable like
|
|
583
|
+
"from sqlite.vector" and keep running into import or module errors, it it probably because you need to look up the correct way to access the library. It may have been that you would need to import "sqlite_vector" or "sql_vector".
|
|
584
|
+
There is no way to know this information a priori and instead of wasting time verifying pip installations, its better to look for actual usage patterns, either by inspecting the source code of the pip package itself or simply by
|
|
585
|
+
searching the web.
|
|
586
|
+
|
|
587
|
+
This should guide your next steps:
|
|
588
|
+
|
|
589
|
+
`{next_step_text} `
|
|
590
|
+
|
|
591
|
+
Your goal is to research. To set up experiments, create figures that can be included in a latex document report, and produce data outputs as well in csvs for verification and reusability and reproducibility.
|
|
592
|
+
|
|
593
|
+
|
|
594
|
+
Do not use seaborn. On matplotlib plots, do not use grids or titles.
|
|
595
|
+
"""
|
|
600
596
|
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
597
|
+
print(f"\n{'='*80}")
|
|
598
|
+
print(f"AUTONOMOUS LOOP {major_step + 1} FOR {agent_name}")
|
|
599
|
+
print(f"{'='*80}")
|
|
600
|
+
print(f"HYPOTHESIS: {hypothesis}")
|
|
601
|
+
print(f"FILES BEFORE: {list(fs_before.keys())}")
|
|
605
602
|
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
603
|
+
messages = []
|
|
604
|
+
all_thoughts = []
|
|
605
|
+
all_actions = []
|
|
606
|
+
all_outcomes = []
|
|
610
607
|
|
|
611
|
-
for
|
|
612
|
-
|
|
613
|
-
if not line:
|
|
614
|
-
continue
|
|
608
|
+
for micro_step in range(5):
|
|
609
|
+
print(f"\n--- Micro-step {micro_step + 1}/4 ---")
|
|
615
610
|
|
|
616
|
-
if
|
|
617
|
-
|
|
611
|
+
if micro_step == 0:
|
|
612
|
+
current_prompt = initial_prompt
|
|
613
|
+
print("SENDING INITIAL RESEARCH PROMPT")
|
|
614
|
+
else:
|
|
615
|
+
current_prompt = "Continue your work. What's your next action?"
|
|
616
|
+
print(f"SENDING CONTINUATION PROMPT: '{current_prompt}'")
|
|
617
|
+
try:
|
|
618
|
+
response = agent.get_llm_response(current_prompt,
|
|
619
|
+
messages=messages,
|
|
620
|
+
auto_process_tool_calls=True)
|
|
621
|
+
except Timeout:
|
|
618
622
|
continue
|
|
623
|
+
except ContextWindowExceededError:
|
|
624
|
+
break
|
|
625
|
+
messages = response.get('messages', [])
|
|
619
626
|
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
criticisms.append(criticism)
|
|
627
|
+
thought = response.get('response')
|
|
628
|
+
if thought is None:
|
|
629
|
+
thought = ''
|
|
630
|
+
print("WARNING: No thought received from agent")
|
|
625
631
|
else:
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
"
|
|
649
|
-
|
|
632
|
+
print(f"AGENT THOUGHT: {thought[:200]}{'...' if len(thought) > 200 else ''}")
|
|
633
|
+
all_thoughts.append(thought)
|
|
634
|
+
|
|
635
|
+
if thought and "RESEARCH_COMPLETE" in thought.upper():
|
|
636
|
+
print(f"✓ RESEARCH COMPLETED at micro-step {micro_step + 1}")
|
|
637
|
+
break
|
|
638
|
+
|
|
639
|
+
if response.get('tool_results'):
|
|
640
|
+
tool_results = response['tool_results']
|
|
641
|
+
print(f"TOOLS USED: {len(tool_results)} tool(s)")
|
|
642
|
+
|
|
643
|
+
for i, res in enumerate(tool_results):
|
|
644
|
+
tool_name = res.get('tool_name')
|
|
645
|
+
args = res.get('arguments', {})
|
|
646
|
+
result = res.get('result')
|
|
647
|
+
|
|
648
|
+
print(f" Tool {i+1}: {tool_name}({args})")
|
|
649
|
+
for arg, item in args.items():
|
|
650
|
+
print(f" {arg}: {item}")
|
|
651
|
+
if isinstance(result, str) and len(result) > 150:
|
|
652
|
+
print(f" Result: {result[:150]}...")
|
|
653
|
+
else:
|
|
654
|
+
print(f" Result: {result}")
|
|
655
|
+
|
|
656
|
+
action_str = ", ".join([f"{res['tool_name']}({res.get('arguments', {})})" for res in tool_results])
|
|
657
|
+
outcomes = []
|
|
658
|
+
|
|
659
|
+
for res in tool_results:
|
|
660
|
+
if res['tool_name'] in ['create_file', 'append_to_file', 'replace_in_file']:
|
|
661
|
+
filename = res.get('arguments', {}).get('filename')
|
|
662
|
+
if filename:
|
|
663
|
+
created_files.add(filename)
|
|
664
|
+
if os.path.exists(filename):
|
|
665
|
+
trace.was_successful = True
|
|
666
|
+
print(f" ✓ File created: {filename}")
|
|
667
|
+
|
|
668
|
+
result_data = res.get('result')
|
|
669
|
+
outcomes.append(str(result_data))
|
|
670
|
+
|
|
671
|
+
outcome_str = " | ".join(outcomes)
|
|
672
|
+
all_actions.append(action_str)
|
|
673
|
+
all_outcomes.append(outcome_str)
|
|
674
|
+
else:
|
|
675
|
+
print("NO TOOLS USED - Agent only provided reasoning")
|
|
676
|
+
|
|
677
|
+
fs_after = get_filesystem_state()
|
|
678
|
+
print(f"\nFILES AFTER: {list(fs_after.keys())}")
|
|
679
|
+
|
|
680
|
+
new_files = set(fs_after.keys()) - set(fs_before.keys())
|
|
681
|
+
if new_files:
|
|
682
|
+
print(f"NEW FILES CREATED: {list(new_files)}")
|
|
683
|
+
|
|
684
|
+
combined_thought = " ".join(all_thoughts)
|
|
685
|
+
combined_action = " | ".join(filter(None, all_actions))
|
|
686
|
+
combined_outcome = " | ".join(filter(None, all_outcomes))
|
|
687
|
+
|
|
688
|
+
print(f"\nCOMPRESSING AUTONOMOUS SESSION...")
|
|
689
|
+
print(f"THOUGHTS: {len(all_thoughts)} messages")
|
|
690
|
+
print(f"ACTIONS: {len(all_actions)} tool uses")
|
|
691
|
+
|
|
692
|
+
summary = summarize_step(combined_thought,
|
|
693
|
+
combined_action,
|
|
694
|
+
combined_outcome,
|
|
695
|
+
fs_before,
|
|
696
|
+
fs_after,
|
|
697
|
+
file_provenance,
|
|
698
|
+
major_step + 1,
|
|
699
|
+
model,
|
|
700
|
+
provider,
|
|
701
|
+
agent)
|
|
702
|
+
|
|
703
|
+
print(f"SUMMARY: {summary.get('summary', 'No summary')}")
|
|
704
|
+
print(f"NEXT STEP: {summary.get('next_step', 'No next step')}")
|
|
705
|
+
|
|
706
|
+
summarized_history.append(f"Step {major_step + 1}: {summary.get('summary')} ")
|
|
707
|
+
|
|
708
|
+
trace.steps.append(ResearchStep(
|
|
709
|
+
step=major_step + 1,
|
|
710
|
+
thought=combined_thought,
|
|
711
|
+
action=combined_action,
|
|
712
|
+
outcome=combined_outcome
|
|
713
|
+
))
|
|
714
|
+
|
|
715
|
+
if combined_thought and "RESEARCH_COMPLETE" in combined_thought.upper():
|
|
716
|
+
print(f"✓ RESEARCH COMPLETED FOR {agent_name}")
|
|
717
|
+
break
|
|
718
|
+
|
|
719
|
+
major_step += 1
|
|
650
720
|
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
num_combinations: Number of combinations to generate
|
|
655
|
-
|
|
656
|
-
Returns:
|
|
657
|
-
List of dictionaries containing the combinations and generated insights
|
|
658
|
-
"""
|
|
659
|
-
|
|
660
|
-
all_facts_with_source = []
|
|
661
|
-
for i, facts in enumerate(fact_lists):
|
|
662
|
-
for fact in facts:
|
|
663
|
-
all_facts_with_source.append((i, fact))
|
|
721
|
+
for filename in created_files:
|
|
722
|
+
if os.path.exists(filename):
|
|
723
|
+
trace.final_files[filename] = read_file(filename)
|
|
664
724
|
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
if len(all_facts_with_source) <= sample_size:
|
|
669
|
-
sample = all_facts_with_source
|
|
670
|
-
else:
|
|
671
|
-
sample = random.sample(all_facts_with_source, sample_size)
|
|
672
|
-
|
|
673
|
-
combinations.append({
|
|
674
|
-
"facts": [fact for _, fact in sample],
|
|
675
|
-
"sources": [source for source, _ in sample]
|
|
676
|
-
})
|
|
725
|
+
print(f"\nFINAL RESULTS FOR {agent_name}:")
|
|
726
|
+
print(f"SUCCESS: {trace.was_successful}")
|
|
727
|
+
print(f"FILES CREATED: {list(trace.final_files.keys())}")
|
|
677
728
|
|
|
678
|
-
return
|
|
729
|
+
return trace
|
|
679
730
|
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
Args:
|
|
685
|
-
combinations: List of fact combinations
|
|
686
|
-
request: The original research question
|
|
687
|
-
model: LLM model to use
|
|
688
|
-
provider: LLM provider to use
|
|
689
|
-
|
|
690
|
-
Returns:
|
|
691
|
-
List of dictionaries with analysis results
|
|
692
|
-
"""
|
|
693
|
-
results = []
|
|
694
|
-
|
|
695
|
-
for i, combo in enumerate(combinations):
|
|
696
|
-
facts_formatted = format_facts_list(combo["facts"])
|
|
697
|
-
|
|
698
|
-
prompt = f"""
|
|
699
|
-
Consider these seemingly unrelated insights from different researchers exploring the topic:
|
|
700
|
-
"{request}"
|
|
701
|
-
|
|
702
|
-
{facts_formatted}
|
|
703
|
-
|
|
704
|
-
Your task is to identify a non-obvious connection, pattern, or insight that emerges when these ideas are juxtaposed.
|
|
705
|
-
Focus on discovering something truly novel that none of the individual researchers may have recognized.
|
|
706
|
-
|
|
707
|
-
1. Identify a surprising emergent pattern or connection
|
|
708
|
-
2. Develop a novel hypothesis or research question based on this pattern
|
|
709
|
-
3. Explain how this insight challenges or extends conventional thinking on the topic
|
|
710
|
-
4. Suggest an unconventional methodology or approach to explore this new direction
|
|
711
|
-
|
|
712
|
-
Be bold, imaginative, and interdisciplinary in your thinking.
|
|
713
|
-
"""
|
|
714
|
-
|
|
715
|
-
response = get_llm_response(prompt=prompt, model=model, provider=provider, temperature=0.9)
|
|
716
|
-
insight = response.get('response', '')
|
|
717
|
-
if isinstance(insight, (list, dict)) or hasattr(insight, '__iter__') and not isinstance(insight, (str, bytes)):
|
|
718
|
-
insight = ''.join([str(chunk) for chunk in insight])
|
|
731
|
+
|
|
732
|
+
|
|
733
|
+
|
|
734
|
+
def save_trace_for_training(
|
|
719
735
|
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
736
|
+
|
|
737
|
+
traces: List[SubAgentTrace],
|
|
738
|
+
output_dir: str = "./alicanto_traces"
|
|
739
|
+
):
|
|
740
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
741
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
742
|
+
filename = f"trace_{timestamp}.csv"
|
|
743
|
+
filepath = os.path.join(output_dir, filename)
|
|
744
|
+
|
|
745
|
+
flattened_data = []
|
|
746
|
+
for trace in traces:
|
|
747
|
+
for step in trace.steps:
|
|
748
|
+
flattened_data.append({
|
|
749
|
+
"hypothesis": trace.hypothesis,
|
|
750
|
+
"agent_name": trace.agent_name,
|
|
751
|
+
"agent_persona": trace.agent_persona,
|
|
752
|
+
"was_successful": trace.was_successful,
|
|
753
|
+
"step": step.step,
|
|
754
|
+
"thought": step.thought,
|
|
755
|
+
"action": step.action,
|
|
756
|
+
"outcome": step.outcome,
|
|
757
|
+
"final_files": json.dumps(trace.final_files)
|
|
758
|
+
})
|
|
759
|
+
|
|
760
|
+
if not flattened_data:
|
|
761
|
+
return
|
|
762
|
+
|
|
763
|
+
df = pd.DataFrame(flattened_data)
|
|
764
|
+
df.to_csv(filepath, index=False)
|
|
765
|
+
|
|
766
|
+
print(f"Full research trace saved to {filepath}")
|
|
767
|
+
return filepath
|
|
768
|
+
def compress_traces_for_synthesis(traces: List[SubAgentTrace], model: str, provider: str, npc: NPC) -> str:
|
|
769
|
+
compressed_summaries = []
|
|
770
|
+
|
|
771
|
+
for trace in traces:
|
|
772
|
+
steps_summary = []
|
|
773
|
+
for step in trace.steps[-3:]: # Only last 3 steps
|
|
774
|
+
if step.thought:
|
|
775
|
+
thought_short = step.thought[:100] + "..." if len(step.thought) > 100 else step.thought
|
|
776
|
+
else:
|
|
777
|
+
thought_short = "No thought recorded"
|
|
778
|
+
|
|
779
|
+
if step.action:
|
|
780
|
+
action_short = step.action[:100] + "..." if len(step.action) > 100 else step.action
|
|
781
|
+
else:
|
|
782
|
+
action_short = "No action taken"
|
|
783
|
+
|
|
784
|
+
steps_summary.append(f"Step {step.step}: {thought_short} | {action_short}")
|
|
785
|
+
|
|
786
|
+
files_created = list(trace.final_files.keys()) if trace.final_files else []
|
|
787
|
+
|
|
788
|
+
compressed_summaries.append({
|
|
789
|
+
"agent": trace.agent_name,
|
|
790
|
+
"hypothesis": trace.hypothesis,
|
|
791
|
+
"success": trace.was_successful,
|
|
792
|
+
"key_steps": steps_summary,
|
|
793
|
+
"files_created": files_created,
|
|
794
|
+
"final_file_count": len(files_created)
|
|
725
795
|
})
|
|
726
796
|
|
|
727
|
-
return
|
|
797
|
+
return json.dumps(compressed_summaries, indent=2)
|
|
798
|
+
def format_paper_as_latex(paper: Paper, authors: List[str]) -> str:
|
|
799
|
+
author_string = ", ".join(authors)
|
|
800
|
+
introduction_content = "\n\n".join(paper.introduction)
|
|
801
|
+
methods_content = "\n\n".join(paper.methods)
|
|
802
|
+
results_content = "\n\n".join(paper.results)
|
|
803
|
+
discussion_content = "\n\n".join(paper.discussion)
|
|
728
804
|
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
3. Emerging paradigms - are there new frameworks or models taking shape across multiple perspectives?
|
|
774
|
-
4. Productive tensions - where do disagreements or contradictions suggest valuable new research directions?
|
|
775
|
-
5. The topology of the problem space - how might we map the conceptual territory in a novel way?
|
|
776
|
-
|
|
777
|
-
Focus on identifying higher-order insights that emerge from comparing these different approaches.
|
|
778
|
-
Your analysis should challenge conventions and suggest new ways of framing the entire research domain.
|
|
779
|
-
"""
|
|
805
|
+
return f"""
|
|
806
|
+
\\documentclass{{article}}
|
|
807
|
+
\\title{{{paper.title}}}
|
|
808
|
+
\\author{{{author_string}}}
|
|
809
|
+
\\date{{\\today}}
|
|
810
|
+
\\begin{{document}}
|
|
811
|
+
\\maketitle
|
|
812
|
+
\\begin{{abstract}}
|
|
813
|
+
{paper.abstract}
|
|
814
|
+
\\end{{abstract}}
|
|
815
|
+
\\section*{{Introduction}}
|
|
816
|
+
{introduction_content}
|
|
817
|
+
\\section*{{Methods}}
|
|
818
|
+
{methods_content}
|
|
819
|
+
\\section*{{Results}}
|
|
820
|
+
{results_content}
|
|
821
|
+
\\section*{{Discussion}}
|
|
822
|
+
{discussion_content}
|
|
823
|
+
\\end{{document}}
|
|
824
|
+
"""
|
|
825
|
+
|
|
826
|
+
|
|
827
|
+
|
|
828
|
+
def alicanto(
|
|
829
|
+
query: str,
|
|
830
|
+
num_agents: int = 3,
|
|
831
|
+
max_steps: int = 10,
|
|
832
|
+
model: str = NPCSH_CHAT_MODEL,
|
|
833
|
+
provider: str = NPCSH_CHAT_PROVIDER,
|
|
834
|
+
**kwargs
|
|
835
|
+
) -> None:
|
|
836
|
+
|
|
837
|
+
print("=== ALICANTO RESEARCH SYSTEM STARTING ===")
|
|
838
|
+
print(f"Query: {query}")
|
|
839
|
+
print(f"Agents: {num_agents}, Max steps per agent: {max_steps}")
|
|
840
|
+
print(f"Model: {model}, Provider: {provider}")
|
|
841
|
+
|
|
842
|
+
def wander_wrapper_coordinator(problem_description: str) -> str:
|
|
843
|
+
return get_creative_ideas_for_stuck_agent(
|
|
844
|
+
problem_description,
|
|
845
|
+
alicanto_coordinator,
|
|
846
|
+
model,
|
|
847
|
+
provider
|
|
848
|
+
)
|
|
780
849
|
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
850
|
+
alicanto_coordinator = NPC(
|
|
851
|
+
name="Alicanto",
|
|
852
|
+
model=model,
|
|
853
|
+
provider=provider,
|
|
854
|
+
primary_directive="You are Alicanto the mythical bird. You research topics iteratively by writing to LaTeX files and searching for more information.",
|
|
855
|
+
tools=[
|
|
856
|
+
create_file,
|
|
857
|
+
append_to_file,
|
|
858
|
+
replace_in_file,
|
|
859
|
+
read_file,
|
|
860
|
+
list_files,
|
|
861
|
+
execute_shell_command,
|
|
862
|
+
search_papers,
|
|
863
|
+
search_web,
|
|
864
|
+
wander_wrapper_coordinator
|
|
865
|
+
]
|
|
866
|
+
)
|
|
867
|
+
|
|
868
|
+
print("\n--- Step 1: Generating hypotheses and personas ---")
|
|
869
|
+
|
|
870
|
+
one_shot_example_hypotheses = """
|
|
871
|
+
"example_input": "Investigate the impact of quantum annealing on protein folding.",
|
|
872
|
+
"example_output": {
|
|
873
|
+
"hypotheses": [
|
|
874
|
+
"Implementing a quantum annealer simulation for a small peptide chain will identify lower energy states faster than a classical simulated annealing approach.",
|
|
875
|
+
"The choice of qubit connectivity in the quantum annealer's topology significantly impacts the final folded state's accuracy for proteins with long-range interactions.",
|
|
876
|
+
"Encoding the protein's residue interactions as a QUBO problem is feasible for structures up to 50 amino acids before qubit requirements become prohibitive."
|
|
877
|
+
]
|
|
878
|
+
}
|
|
879
|
+
"""
|
|
880
|
+
hypotheses_prompt = f"""Based on the following research topic, generate a list of {num_agents} distinct, specific, and empirically testable hypotheses.
|
|
881
|
+
|
|
882
|
+
TOPIC: "{query}"
|
|
883
|
+
|
|
884
|
+
Return a JSON object with a single key "hypotheses" which is a list of strings.
|
|
885
|
+
|
|
886
|
+
Here is an example of the expected input and output format:
|
|
887
|
+
{one_shot_example_hypotheses}
|
|
888
|
+
|
|
889
|
+
Return ONLY the JSON object.
|
|
890
|
+
"""
|
|
785
891
|
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
892
|
+
print("Generating hypotheses...")
|
|
893
|
+
response = get_llm_response(
|
|
894
|
+
hypotheses_prompt,
|
|
895
|
+
model=model,
|
|
896
|
+
provider=provider,
|
|
897
|
+
npc=alicanto_coordinator,
|
|
898
|
+
format='json'
|
|
899
|
+
)
|
|
789
900
|
|
|
790
|
-
|
|
901
|
+
if not response or not response.get('response'):
|
|
902
|
+
print("ERROR: Failed to get hypotheses response")
|
|
903
|
+
return
|
|
904
|
+
|
|
905
|
+
hypotheses = response.get('response').get('hypotheses')
|
|
906
|
+
if not hypotheses:
|
|
907
|
+
print("ERROR: No hypotheses generated")
|
|
908
|
+
return
|
|
909
|
+
|
|
910
|
+
print(f"Generated {len(hypotheses)} hypotheses:")
|
|
911
|
+
for i, h in enumerate(hypotheses):
|
|
912
|
+
print(f" {i+1}. {h}")
|
|
913
|
+
|
|
914
|
+
print("\nGenerating agent personas...")
|
|
915
|
+
personas = generate_sub_agent_personas(
|
|
916
|
+
query,
|
|
917
|
+
num_agents,
|
|
918
|
+
model,
|
|
919
|
+
provider,
|
|
920
|
+
alicanto_coordinator
|
|
921
|
+
)
|
|
791
922
|
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
2. Explain why it's both important and neglected
|
|
796
|
-
3. Suggest an unconventional methodology to explore it
|
|
797
|
-
4. Describe what a breakthrough in this direction might look like
|
|
923
|
+
if not personas:
|
|
924
|
+
print("ERROR: No personas generated")
|
|
925
|
+
return
|
|
798
926
|
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
927
|
+
print(f"Generated {len(personas)} personas:")
|
|
928
|
+
for i, p in enumerate(personas):
|
|
929
|
+
print(f" {i+1}. {p.get('name')}: {p.get('persona')}")
|
|
930
|
+
|
|
931
|
+
print("\n--- Step 2: Delegating hypotheses to Sub-Agents for serial execution ---")
|
|
932
|
+
|
|
933
|
+
all_traces = []
|
|
934
|
+
for i, hypo in enumerate(hypotheses):
|
|
935
|
+
persona = personas[i % len(personas)]
|
|
936
|
+
print(f"\nStarting sub-agent {i+1}/{len(hypotheses)}")
|
|
937
|
+
trace = sub_agent_trace(
|
|
938
|
+
hypo,
|
|
939
|
+
persona,
|
|
940
|
+
query,
|
|
941
|
+
model,
|
|
942
|
+
provider,
|
|
943
|
+
max_steps
|
|
944
|
+
)
|
|
945
|
+
all_traces.append(trace)
|
|
946
|
+
print(f"Sub-agent {i+1} completed. Success: {trace.was_successful}")
|
|
947
|
+
|
|
948
|
+
print(f"\nAll sub-agents completed. Saving traces...")
|
|
949
|
+
save_trace_for_training(all_traces)
|
|
950
|
+
compressed_research = compress_traces_for_synthesis(all_traces, model, provider, alicanto_coordinator)
|
|
951
|
+
|
|
952
|
+
print("\n--- Step 3: Creating initial paper structure ---")
|
|
802
953
|
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
if isinstance(new_directions, (list, dict)) or hasattr(new_directions, '__iter__') and not isinstance(new_directions, (str, bytes)):
|
|
806
|
-
new_directions = ''.join([str(chunk) for chunk in new_directions])
|
|
954
|
+
author_list = [trace.agent_name for trace in all_traces]
|
|
955
|
+
author_string = ", ".join(author_list)
|
|
807
956
|
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
957
|
+
initial_latex = f"""\\documentclass{{article}}
|
|
958
|
+
\\title{{% TODO: TITLE}}
|
|
959
|
+
\\author{{{author_string}}}
|
|
960
|
+
\\date{{\\today}}
|
|
961
|
+
\\begin{{document}}
|
|
962
|
+
\\maketitle
|
|
813
963
|
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
964
|
+
\\begin{{abstract}}
|
|
965
|
+
% TODO: ABSTRACT
|
|
966
|
+
\\end{{abstract}}
|
|
967
|
+
|
|
968
|
+
\\section{{Introduction}}
|
|
969
|
+
% TODO: INTRODUCTION
|
|
970
|
+
|
|
971
|
+
\\section{{Methods}}
|
|
972
|
+
% TODO: METHODS
|
|
973
|
+
|
|
974
|
+
\\section{{Results}}
|
|
975
|
+
% TODO: RESULTS
|
|
976
|
+
|
|
977
|
+
\\section{{Discussion}}
|
|
978
|
+
% TODO: DISCUSSION
|
|
979
|
+
|
|
980
|
+
\\end{{document}}"""
|
|
981
|
+
|
|
982
|
+
create_file("paper.tex", initial_latex)
|
|
983
|
+
|
|
984
|
+
print("\n--- Step 4: Iterative paper writing ---")
|
|
817
985
|
|
|
818
|
-
|
|
819
|
-
content: Raw content to preprocess
|
|
820
|
-
model: LLM model to use (optional)
|
|
821
|
-
provider: LLM provider to use (optional)
|
|
822
|
-
max_words: Maximum word count (default 2000)
|
|
823
|
-
concise_mode: If True, creates a very short summary instead of full formatting
|
|
824
|
-
|
|
825
|
-
Returns:
|
|
826
|
-
Formatted content ready for PDF generation
|
|
827
|
-
"""
|
|
828
|
-
|
|
829
|
-
if not isinstance(content, str):
|
|
830
|
-
content = str(content)
|
|
986
|
+
todo_sections = ["TITLE", "ABSTRACT", "INTRODUCTION", "METHODS", "RESULTS", "DISCUSSION"]
|
|
831
987
|
|
|
832
|
-
|
|
833
|
-
|
|
988
|
+
for section_round in range(len(todo_sections)):
|
|
989
|
+
print(f"\n--- Section Round {section_round + 1} ---")
|
|
834
990
|
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
provider = NPCSH_CHAT_PROVIDER
|
|
839
|
-
|
|
840
|
-
concise_prompt = f"""
|
|
841
|
-
Summarize the following content into an extremely concise, no-bullshit format with maximum 500 words:
|
|
842
|
-
{content}
|
|
991
|
+
current_paper = read_file("paper.tex")
|
|
992
|
+
sections_status = {section: "EMPTY" if f"% TODO: {section}" in current_paper else "COMPLETE"
|
|
993
|
+
for section in todo_sections}
|
|
843
994
|
|
|
844
|
-
|
|
845
|
-
- Use bullet points for key ideas
|
|
846
|
-
- Focus only on essential insights
|
|
847
|
-
- No verbose academic language
|
|
848
|
-
- No padding or fillers
|
|
849
|
-
- Just the core ideas in simple language
|
|
850
|
-
"""
|
|
995
|
+
print(f"Section status: {sections_status}")
|
|
851
996
|
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
'#': '-',
|
|
859
|
-
'_': '-',
|
|
860
|
-
'~': '-',
|
|
861
|
-
'^': '',
|
|
862
|
-
'\\': '/',
|
|
863
|
-
'{': '(',
|
|
864
|
-
'}': ')'
|
|
865
|
-
}.items():
|
|
866
|
-
content = content.replace(char, replacement)
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
words = content.split()
|
|
870
|
-
if len(words) > max_words:
|
|
871
|
-
content = ' '.join(words[:max_words]) + '... [truncated]'
|
|
872
|
-
|
|
873
|
-
return content.strip()
|
|
874
|
-
|
|
875
|
-
def generate_pdf_report(request: str,
|
|
876
|
-
model,
|
|
877
|
-
provider,
|
|
878
|
-
research: Dict[str, Any],
|
|
879
|
-
experiments: Dict[str, Dict[str, Any]],
|
|
880
|
-
output_path: str = None,
|
|
881
|
-
max_pages: int = 5) -> str:
|
|
882
|
-
"""
|
|
883
|
-
Generate a professional PDF report using LaTeX for superior formatting, typesetting, and layout.
|
|
884
|
-
|
|
885
|
-
Args:
|
|
886
|
-
request: The original research question
|
|
887
|
-
research: The consolidated research results
|
|
888
|
-
experiments: The simulated experiments and their results
|
|
889
|
-
output_path: Path to save the PDF report (default: current directory)
|
|
890
|
-
fast_mode: If True, uses simpler formatting
|
|
891
|
-
concise_mode: If True, drastically reduces content length
|
|
892
|
-
max_pages: Maximum number of pages to generate (approximate)
|
|
997
|
+
# Find next section to work on
|
|
998
|
+
next_section = None
|
|
999
|
+
for section in todo_sections:
|
|
1000
|
+
if sections_status[section] == "EMPTY":
|
|
1001
|
+
next_section = section
|
|
1002
|
+
break
|
|
893
1003
|
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
try:
|
|
921
|
-
if "group_evaluations" in research and research["group_evaluations"]:
|
|
922
|
-
|
|
923
|
-
figures_dir = os.path.join(output_path, "figures")
|
|
924
|
-
os.makedirs(figures_dir, exist_ok=True)
|
|
925
|
-
|
|
926
|
-
fig, ax = plt.subplots(figsize=(7.5, 4))
|
|
927
|
-
plt.style.use('ggplot')
|
|
928
|
-
|
|
929
|
-
groups = []
|
|
930
|
-
scores = []
|
|
1004
|
+
if not next_section:
|
|
1005
|
+
print("All sections complete")
|
|
1006
|
+
break
|
|
1007
|
+
|
|
1008
|
+
print(f"Working on section: {next_section}")
|
|
1009
|
+
|
|
1010
|
+
# Autonomous loop for this section (like sub-agents)
|
|
1011
|
+
messages = []
|
|
1012
|
+
|
|
1013
|
+
initial_prompt = f"""You are writing a research paper about: "{query}"
|
|
1014
|
+
|
|
1015
|
+
Research data from sub-agents: {compressed_research}
|
|
1016
|
+
|
|
1017
|
+
Current paper content:
|
|
1018
|
+
{current_paper}
|
|
1019
|
+
|
|
1020
|
+
Your task: Complete the {next_section} section by replacing "% TODO: {next_section}" with actual content.
|
|
1021
|
+
|
|
1022
|
+
Use replace_in_file to update the paper. Use search_papers or search_web if you need more information.
|
|
1023
|
+
|
|
1024
|
+
Focus ONLY on the {next_section} section. Write 2-4 paragraphs of substantial academic content.
|
|
1025
|
+
|
|
1026
|
+
Available tools: replace_in_file, read_file, search_papers, search_web"""
|
|
1027
|
+
|
|
1028
|
+
for micro_step in range(5): # 5 turns per section like sub-agents
|
|
1029
|
+
print(f"\n--- Micro-step {micro_step + 1}/5 for {next_section} ---")
|
|
931
1030
|
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
scores.append(quality_score)
|
|
1031
|
+
if micro_step == 0:
|
|
1032
|
+
current_prompt = initial_prompt
|
|
1033
|
+
else:
|
|
1034
|
+
current_prompt = f"Continue working on the {next_section} section. What's your next action?"
|
|
937
1035
|
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
1036
|
+
try:
|
|
1037
|
+
response = alicanto_coordinator.get_llm_response(
|
|
1038
|
+
current_prompt,
|
|
1039
|
+
messages=messages,
|
|
1040
|
+
auto_process_tool_calls=True
|
|
1041
|
+
)
|
|
1042
|
+
except (Timeout, ContextWindowExceededError):
|
|
1043
|
+
break
|
|
1044
|
+
|
|
1045
|
+
messages = response.get('messages', [])
|
|
942
1046
|
|
|
943
|
-
|
|
944
|
-
y_pos = range(len(groups))
|
|
945
|
-
ax.barh(y_pos, scores, color='steelblue')
|
|
946
|
-
ax.set_yticks(y_pos)
|
|
947
|
-
ax.set_yticklabels(groups)
|
|
948
|
-
ax.set_xlabel('Quality Score (1-10)')
|
|
949
|
-
ax.set_title('Thematic Groups by Quality Score')
|
|
950
|
-
plt.tight_layout()
|
|
951
1047
|
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
latex_content = generate_latex_document(request, model, provider, research, experiments, chart_path, max_pages)
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
tex_path = os.path.join(output_path, f"{filename}.tex")
|
|
964
|
-
with open(tex_path, "w") as f:
|
|
965
|
-
f.write(latex_content)
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
try:
|
|
969
|
-
|
|
970
|
-
result = subprocess.run(
|
|
971
|
-
["pdflatex", "-interaction=nonstopmode", "-output-directory", output_path, tex_path],
|
|
972
|
-
stdout=subprocess.PIPE,
|
|
973
|
-
stderr=subprocess.PIPE
|
|
974
|
-
)
|
|
975
|
-
|
|
976
|
-
if result.returncode != 0:
|
|
977
|
-
print(f"Warning: First LaTeX run had issues (exit code {result.returncode})")
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
result = subprocess.run(
|
|
982
|
-
["pdflatex", "-interaction=nonstopmode", "-output-directory", output_path, tex_path],
|
|
983
|
-
stdout=subprocess.PIPE,
|
|
984
|
-
stderr=subprocess.PIPE
|
|
985
|
-
)
|
|
986
|
-
|
|
987
|
-
if result.returncode != 0:
|
|
988
|
-
print(f"Warning: Second LaTeX run had issues (exit code {result.returncode})")
|
|
989
|
-
|
|
990
|
-
log_path = os.path.join(output_path, f"{filename}.log")
|
|
991
|
-
if os.path.exists(log_path):
|
|
992
|
-
print(f"Check LaTeX log for details: {log_path}")
|
|
993
|
-
except Exception as e:
|
|
994
|
-
print(f"Error during LaTeX compilation: {str(e)}")
|
|
995
|
-
return None
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
for ext in [".aux", ".out", ".toc"]:
|
|
999
|
-
try:
|
|
1000
|
-
os.remove(os.path.join(output_path, f"{filename}{ext}"))
|
|
1001
|
-
except OSError:
|
|
1002
|
-
pass
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
pdf_path = os.path.join(output_path, f"{filename}.pdf")
|
|
1006
|
-
if os.path.exists(pdf_path):
|
|
1007
|
-
print(f"PDF report successfully generated using LaTeX: {pdf_path}")
|
|
1008
|
-
return pdf_path
|
|
1009
|
-
else:
|
|
1010
|
-
print(f"PDF generation failed. Check the LaTeX log for details.")
|
|
1011
|
-
return None
|
|
1048
|
+
final_paper = read_file("paper.tex")
|
|
1049
|
+
print(f"\n{'='*60}")
|
|
1050
|
+
print("FINAL RESEARCH PAPER (LATEX)")
|
|
1051
|
+
print("="*60)
|
|
1052
|
+
print(final_paper)
|
|
1053
|
+
print(f"\nPaper saved as paper.tex")
|
|
1012
1054
|
|
|
1013
|
-
def generate_latex_document(request: str, model, provider, research: Dict[str, Any], experiments: Dict[str, Dict[str, Any]],
|
|
1014
|
-
chart_path: str = None, max_pages: int = 5) -> str:
|
|
1015
|
-
"""
|
|
1016
|
-
Generate LaTeX document content.
|
|
1017
|
-
|
|
1018
|
-
Args:
|
|
1019
|
-
request: The research topic
|
|
1020
|
-
research: Research results
|
|
1021
|
-
experiments: Experiments data
|
|
1022
|
-
chart_path: Path to the thematic groups chart
|
|
1023
|
-
max_pages: Maximum number of pages (approximate)
|
|
1024
|
-
|
|
1025
|
-
Returns:
|
|
1026
|
-
LaTeX document content as a string
|
|
1027
|
-
"""
|
|
1028
|
-
|
|
1029
|
-
figure_paths = {}
|
|
1030
|
-
if chart_path:
|
|
1031
|
-
|
|
1032
|
-
figure_paths["thematic_groups"] = os.path.basename(chart_path)
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
if isinstance(experiments, dict):
|
|
1037
|
-
for title in experiments.keys():
|
|
1038
|
-
sanitized_title = title.replace(" ", "_")
|
|
1039
|
-
potential_image = f"{sanitized_title}_experiment.png"
|
|
1040
|
-
if os.path.exists(potential_image):
|
|
1041
|
-
figure_paths[sanitized_title] = potential_image
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
figure_path_description_dict = {}
|
|
1045
|
-
for name, path in figure_paths.items():
|
|
1046
|
-
figure_path_description_dict[name] = path
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
prompt = f'''
|
|
1050
|
-
Generate a LaTeX document for a research report on the topic: "{request}"
|
|
1051
|
-
Here is the summary of the research: {research}
|
|
1052
|
-
|
|
1053
|
-
Here is the summary of the experiments: {experiments}''' +"""
|
|
1054
|
-
Write your response in a way that academically details the research, its motivation, and experiments
|
|
1055
|
-
and ensure any place where a citation may be needed is indicated by including an empty '\\cite{citation_needed}'
|
|
1056
|
-
|
|
1057
|
-
IMPORTANT INSTRUCTIONS FOR DOCUMENT PREPARATION:
|
|
1058
|
-
1. DO NOT include \\bibliography{references} or any bibliography commands, as we don't have a references file
|
|
1059
|
-
2. Instead, create a \\begin{thebibliography}{99} ... \\end{thebibliography} section with example references
|
|
1060
|
-
3. For figures, use relative paths like 'figures/thematic_groups.pdf' rather than absolute paths
|
|
1061
|
-
4. Make sure all LaTeX commands are properly formatted and do not use undefined packages
|
|
1062
|
-
5. Keep the document structure simple and robust to avoid compilation errors
|
|
1063
|
-
"""+f"""
|
|
1064
|
-
The figures are located at the following paths: {figure_path_description_dict}
|
|
1065
|
-
"""
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
latex_response = get_llm_response(prompt=prompt, model=model, provider=provider )
|
|
1069
|
-
latex_content = latex_response.get('response', '')
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
latex_content = latex_content.replace('\\bibliography{references}', '')
|
|
1073
|
-
latex_content = latex_content.replace('\\bibliographystyle{plain}', '')
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
latex_content = latex_content.replace('/home/caug/npcww/npcsh/figures/', 'figures/')
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
if '\\begin{thebibliography}' not in latex_content and '\\end{document}' in latex_content:
|
|
1080
|
-
bibliography = """
|
|
1081
|
-
\\begin{thebibliography}{9}
|
|
1082
|
-
\\bibitem{citation1} Author, A. (2023). Title of the work. Journal Name, 10(2), 123-456.
|
|
1083
|
-
\\bibitem{citation2} Researcher, B. (2022). Another relevant publication. Conference Proceedings, 789-012.
|
|
1084
|
-
\\end{thebibliography}
|
|
1085
|
-
"""
|
|
1086
|
-
latex_content = latex_content.replace('\\end{document}', f'{bibliography}\n\\end{{document}}')
|
|
1087
|
-
|
|
1088
|
-
return latex_content
|
|
1089
1055
|
|
|
1056
|
+
def main():
|
|
1057
|
+
parser = argparse.ArgumentParser(description="Alicanto Multi-Agent Research System")
|
|
1058
|
+
parser.add_argument("topic", help="Research topic to investigate")
|
|
1059
|
+
parser.add_argument("--num-agents", type=int, default=3, help="Number of sub-agents to run.")
|
|
1060
|
+
parser.add_argument("--max-steps", type=int, default=10, help="Maximum steps for each sub-agent.")
|
|
1061
|
+
parser.add_argument("--model", default=NPCSH_CHAT_MODEL, help="LLM model to use")
|
|
1062
|
+
parser.add_argument("--provider", default=NPCSH_CHAT_PROVIDER, help="LLM provider to use")
|
|
1063
|
+
|
|
1064
|
+
args = parser.parse_args()
|
|
1065
|
+
|
|
1066
|
+
alicanto(
|
|
1067
|
+
query=args.topic,
|
|
1068
|
+
num_agents=args.num_agents,
|
|
1069
|
+
max_steps=args.max_steps,
|
|
1070
|
+
model=args.model,
|
|
1071
|
+
provider=args.provider
|
|
1072
|
+
)
|
|
1073
|
+
|
|
1074
|
+
if __name__ == "__main__":
|
|
1075
|
+
main()
|