npcsh 0.3.32__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- npcsh/_state.py +942 -0
- npcsh/alicanto.py +1074 -0
- npcsh/guac.py +785 -0
- npcsh/mcp_helpers.py +357 -0
- npcsh/mcp_npcsh.py +822 -0
- npcsh/mcp_server.py +184 -0
- npcsh/npc.py +218 -0
- npcsh/npcsh.py +1161 -0
- npcsh/plonk.py +387 -269
- npcsh/pti.py +234 -0
- npcsh/routes.py +958 -0
- npcsh/spool.py +315 -0
- npcsh/wander.py +550 -0
- npcsh/yap.py +573 -0
- npcsh-1.0.1.dist-info/METADATA +596 -0
- npcsh-1.0.1.dist-info/RECORD +21 -0
- {npcsh-0.3.32.dist-info → npcsh-1.0.1.dist-info}/WHEEL +1 -1
- npcsh-1.0.1.dist-info/entry_points.txt +9 -0
- {npcsh-0.3.32.dist-info → npcsh-1.0.1.dist-info}/licenses/LICENSE +1 -1
- npcsh/audio.py +0 -569
- npcsh/audio_gen.py +0 -1
- npcsh/cli.py +0 -543
- npcsh/command_history.py +0 -566
- npcsh/conversation.py +0 -54
- npcsh/data_models.py +0 -46
- npcsh/dataframes.py +0 -171
- npcsh/embeddings.py +0 -168
- npcsh/helpers.py +0 -646
- npcsh/image.py +0 -298
- npcsh/image_gen.py +0 -79
- npcsh/knowledge_graph.py +0 -1006
- npcsh/llm_funcs.py +0 -2195
- npcsh/load_data.py +0 -83
- npcsh/main.py +0 -5
- npcsh/model_runner.py +0 -189
- npcsh/npc_compiler.py +0 -2879
- npcsh/npc_sysenv.py +0 -388
- npcsh/npc_team/assembly_lines/test_pipeline.py +0 -181
- npcsh/npc_team/corca.npc +0 -13
- npcsh/npc_team/foreman.npc +0 -7
- npcsh/npc_team/npcsh.ctx +0 -11
- npcsh/npc_team/sibiji.npc +0 -4
- npcsh/npc_team/templates/analytics/celona.npc +0 -0
- npcsh/npc_team/templates/hr_support/raone.npc +0 -0
- npcsh/npc_team/templates/humanities/eriane.npc +0 -4
- npcsh/npc_team/templates/it_support/lineru.npc +0 -0
- npcsh/npc_team/templates/marketing/slean.npc +0 -4
- npcsh/npc_team/templates/philosophy/maurawa.npc +0 -0
- npcsh/npc_team/templates/sales/turnic.npc +0 -4
- npcsh/npc_team/templates/software/welxor.npc +0 -0
- npcsh/npc_team/tools/bash_executer.tool +0 -32
- npcsh/npc_team/tools/calculator.tool +0 -8
- npcsh/npc_team/tools/code_executor.tool +0 -16
- npcsh/npc_team/tools/generic_search.tool +0 -27
- npcsh/npc_team/tools/image_generation.tool +0 -25
- npcsh/npc_team/tools/local_search.tool +0 -149
- npcsh/npc_team/tools/npcsh_executor.tool +0 -9
- npcsh/npc_team/tools/screen_cap.tool +0 -27
- npcsh/npc_team/tools/sql_executor.tool +0 -26
- npcsh/response.py +0 -272
- npcsh/search.py +0 -252
- npcsh/serve.py +0 -1467
- npcsh/shell.py +0 -524
- npcsh/shell_helpers.py +0 -3919
- npcsh/stream.py +0 -233
- npcsh/video.py +0 -52
- npcsh/video_gen.py +0 -69
- npcsh-0.3.32.data/data/npcsh/npc_team/bash_executer.tool +0 -32
- npcsh-0.3.32.data/data/npcsh/npc_team/calculator.tool +0 -8
- npcsh-0.3.32.data/data/npcsh/npc_team/celona.npc +0 -0
- npcsh-0.3.32.data/data/npcsh/npc_team/code_executor.tool +0 -16
- npcsh-0.3.32.data/data/npcsh/npc_team/corca.npc +0 -13
- npcsh-0.3.32.data/data/npcsh/npc_team/eriane.npc +0 -4
- npcsh-0.3.32.data/data/npcsh/npc_team/foreman.npc +0 -7
- npcsh-0.3.32.data/data/npcsh/npc_team/generic_search.tool +0 -27
- npcsh-0.3.32.data/data/npcsh/npc_team/image_generation.tool +0 -25
- npcsh-0.3.32.data/data/npcsh/npc_team/lineru.npc +0 -0
- npcsh-0.3.32.data/data/npcsh/npc_team/local_search.tool +0 -149
- npcsh-0.3.32.data/data/npcsh/npc_team/maurawa.npc +0 -0
- npcsh-0.3.32.data/data/npcsh/npc_team/npcsh.ctx +0 -11
- npcsh-0.3.32.data/data/npcsh/npc_team/npcsh_executor.tool +0 -9
- npcsh-0.3.32.data/data/npcsh/npc_team/raone.npc +0 -0
- npcsh-0.3.32.data/data/npcsh/npc_team/screen_cap.tool +0 -27
- npcsh-0.3.32.data/data/npcsh/npc_team/sibiji.npc +0 -4
- npcsh-0.3.32.data/data/npcsh/npc_team/slean.npc +0 -4
- npcsh-0.3.32.data/data/npcsh/npc_team/sql_executor.tool +0 -26
- npcsh-0.3.32.data/data/npcsh/npc_team/test_pipeline.py +0 -181
- npcsh-0.3.32.data/data/npcsh/npc_team/turnic.npc +0 -4
- npcsh-0.3.32.data/data/npcsh/npc_team/welxor.npc +0 -0
- npcsh-0.3.32.dist-info/METADATA +0 -779
- npcsh-0.3.32.dist-info/RECORD +0 -78
- npcsh-0.3.32.dist-info/entry_points.txt +0 -3
- {npcsh-0.3.32.dist-info → npcsh-1.0.1.dist-info}/top_level.txt +0 -0
npcsh/alicanto.py
ADDED
|
@@ -0,0 +1,1074 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import random
|
|
3
|
+
from typing import List, Dict, Any, Optional, Union, Tuple
|
|
4
|
+
import numpy as np
|
|
5
|
+
from collections import defaultdict, Counter
|
|
6
|
+
import itertools
|
|
7
|
+
import matplotlib.pyplot as plt
|
|
8
|
+
from matplotlib.figure import Figure
|
|
9
|
+
from io import BytesIO
|
|
10
|
+
import base64
|
|
11
|
+
import datetime
|
|
12
|
+
import tempfile
|
|
13
|
+
import subprocess
|
|
14
|
+
import networkx as nx
|
|
15
|
+
|
|
16
|
+
from npcpy.npc_compiler import NPC
|
|
17
|
+
from npcpy.llm_funcs import get_llm_response
|
|
18
|
+
from npcsh._state import NPCSH_CHAT_MODEL, NPCSH_CHAT_PROVIDER
|
|
19
|
+
from npcpy.npc_sysenv import print_and_process_stream_with_markdown
|
|
20
|
+
from npcpy.memory.deep_research import consolidate_research
|
|
21
|
+
from npcpy.memory.knowledge_graph import extract_facts, identify_groups, assign_groups_to_fact
|
|
22
|
+
|
|
23
|
+
def generate_random_npcs(num_npcs: int, model: str, provider: str, request: str) -> List[NPC]:
|
|
24
|
+
"""
|
|
25
|
+
Generate a diverse set of NPCs with different expertise and perspectives
|
|
26
|
+
related to the research request.
|
|
27
|
+
"""
|
|
28
|
+
# For single NPC, use a simpler approach to avoid unnecessary LLM calls
|
|
29
|
+
if num_npcs == 1:
|
|
30
|
+
# Generate directly without complex JSON parsing
|
|
31
|
+
name = f"Expert Researcher on {request}"
|
|
32
|
+
expertise = "Interdisciplinary semantic theory researcher"
|
|
33
|
+
background = "Extensive experience in linguistics, cognitive science, and NLP"
|
|
34
|
+
perspective = "Combines formal logic with empirical linguistic evidence"
|
|
35
|
+
quirk = "Uses mathematical metaphors to explain language phenomena"
|
|
36
|
+
biases = "May favor formal approaches over descriptive linguistics"
|
|
37
|
+
|
|
38
|
+
system_prompt = f"""
|
|
39
|
+
You are {name}, {expertise}.
|
|
40
|
+
|
|
41
|
+
Background: {background}
|
|
42
|
+
|
|
43
|
+
Your perspective: {perspective}
|
|
44
|
+
|
|
45
|
+
Your methodological quirk: {quirk}
|
|
46
|
+
|
|
47
|
+
Note: Be aware that you may have these biases: {biases}
|
|
48
|
+
|
|
49
|
+
Your task is to research the given topic thoroughly, focusing on your unique perspective.
|
|
50
|
+
Challenge conventional thinking and identify unexpected connections.
|
|
51
|
+
Your insights should be provocative and novel.
|
|
52
|
+
|
|
53
|
+
IMPORTANT: You must be extremely concise. Limit responses to 50-75 words maximum.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
npc = NPC(name=name, primary_directive=f"Research expert on {request}")
|
|
57
|
+
npc.system_prompt = system_prompt
|
|
58
|
+
return [npc]
|
|
59
|
+
|
|
60
|
+
# Generate diverse expert personas based on the research topic
|
|
61
|
+
prompt = f"""
|
|
62
|
+
For the research topic: "{request}"
|
|
63
|
+
|
|
64
|
+
Generate {num_npcs} diverse expert personas who would have different valuable perspectives on this topic.
|
|
65
|
+
I need truly diverse and unusual viewpoints that can lead to innovative insights.
|
|
66
|
+
|
|
67
|
+
For each expert, provide:
|
|
68
|
+
1. A name
|
|
69
|
+
2. Their field of expertise (be creative - include unconventional and interdisciplinary fields)
|
|
70
|
+
3. Their background/experience (include unusual career paths and perspectives)
|
|
71
|
+
4. Their unique perspective or approach to the topic (emphasize contrarian, minority, or unexpected viewpoints)
|
|
72
|
+
5. A methodological quirk that makes their research approach unusual
|
|
73
|
+
6. Any potential biases they might have
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
response = get_llm_response(
|
|
77
|
+
prompt=prompt,
|
|
78
|
+
model=model,
|
|
79
|
+
provider=provider,
|
|
80
|
+
format="json" # Directly request JSON format
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# Response will be properly structured JSON from get_llm_response
|
|
84
|
+
experts_data = response.get('response', [])
|
|
85
|
+
|
|
86
|
+
# Create NPC instances from expert data
|
|
87
|
+
npcs = []
|
|
88
|
+
|
|
89
|
+
# Handle experts_data safely whether it's a list or not
|
|
90
|
+
if isinstance(experts_data, list):
|
|
91
|
+
experts_to_process = experts_data[:num_npcs]
|
|
92
|
+
else:
|
|
93
|
+
# If not a list, try to convert or use as a single item
|
|
94
|
+
if isinstance(experts_data, dict):
|
|
95
|
+
experts_to_process = [experts_data]
|
|
96
|
+
else:
|
|
97
|
+
# Create a basic expert as fallback
|
|
98
|
+
experts_to_process = [{
|
|
99
|
+
"name": f"Expert_1",
|
|
100
|
+
"expertise": "Interdisciplinary researcher",
|
|
101
|
+
"background": "Diverse academic and practical experience",
|
|
102
|
+
"perspective": "Balanced analysis with focus on innovative connections",
|
|
103
|
+
"methodological_quirk": "Uses unconventional conceptual frameworks",
|
|
104
|
+
"biases": "Tends toward theoretical rather than practical solutions"
|
|
105
|
+
}]
|
|
106
|
+
|
|
107
|
+
for expert in experts_to_process:
|
|
108
|
+
name = expert.get("name", f"Expert_{len(npcs)}")
|
|
109
|
+
|
|
110
|
+
# Create a system prompt that defines this NPC's expertise and perspective
|
|
111
|
+
system_prompt = f"""
|
|
112
|
+
You are {name}, {expert.get('expertise', 'an expert researcher')}.
|
|
113
|
+
|
|
114
|
+
Background: {expert.get('background', 'You have extensive knowledge in your field.')}
|
|
115
|
+
|
|
116
|
+
Your perspective: {expert.get('perspective', 'You provide detailed, balanced analysis.')}
|
|
117
|
+
|
|
118
|
+
Your methodological quirk: {expert.get('methodological_quirk', 'You approach problems in unconventional ways.')}
|
|
119
|
+
|
|
120
|
+
Note: Be aware that you may have these biases: {expert.get('biases', 'None specifically noted.')}
|
|
121
|
+
|
|
122
|
+
Your task is to research the given topic thoroughly, focusing on your unique perspective and methodological approach.
|
|
123
|
+
Challenge conventional thinking, explore neglected angles, and identify unexpected connections or contradictions.
|
|
124
|
+
Your insights should be provocative and novel, not just rehashing mainstream views.
|
|
125
|
+
|
|
126
|
+
IMPORTANT: You must be extremely concise. Limit responses to 50-75 words maximum. Focus on substance over verbosity.
|
|
127
|
+
Prioritize precision, clarity, and insight density. Eliminate unnecessary words and focus on communicating
|
|
128
|
+
the essence of your insights in the most efficient way possible.
|
|
129
|
+
"""
|
|
130
|
+
|
|
131
|
+
# Create NPC with name and primary_directive (required parameters)
|
|
132
|
+
npc = NPC(name=name, primary_directive=f"Research expert on {request}")
|
|
133
|
+
npc.system_prompt = system_prompt
|
|
134
|
+
npcs.append(npc)
|
|
135
|
+
|
|
136
|
+
return npcs
|
|
137
|
+
|
|
138
|
+
def generate_research_chain(request: str, npc: NPC, depth: int, memory: int = 3,
|
|
139
|
+
context: str = None, model: str = None, provider: str = None,
|
|
140
|
+
exploration_factor: float = 0.3,
|
|
141
|
+
creativity_factor: float = 0.5) -> List[str]:
|
|
142
|
+
"""
|
|
143
|
+
Generate a chain of research thoughts from a single NPC, diving deeper with each step.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
request: The research question/topic
|
|
147
|
+
npc: The NPC generating the research
|
|
148
|
+
depth: How many steps of research to perform
|
|
149
|
+
memory: How many previous steps to include in context
|
|
150
|
+
context: Additional context to include
|
|
151
|
+
model: LLM model to use
|
|
152
|
+
provider: LLM provider to use
|
|
153
|
+
exploration_factor: Probability (0-1) of exploring a tangential direction
|
|
154
|
+
creativity_factor: Probability (0-1) of pursuing highly creative or unusual ideas
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
List of research findings/thoughts from this chain
|
|
158
|
+
"""
|
|
159
|
+
chain = []
|
|
160
|
+
|
|
161
|
+
# Initial research prompt
|
|
162
|
+
initial_prompt = f"""
|
|
163
|
+
Research request: {request}
|
|
164
|
+
|
|
165
|
+
{f"Additional context: {context}" if context else ""}
|
|
166
|
+
|
|
167
|
+
As {npc.name}, begin your research process by:
|
|
168
|
+
1. Analyzing what you know about this topic
|
|
169
|
+
2. Identifying key questions that need to be explored
|
|
170
|
+
3. Providing initial insights based on your expertise and unique perspective
|
|
171
|
+
|
|
172
|
+
BE EXTREMELY CONCISE. Focus on substance over wordiness. Provide clear, high-value insights in 50-75 words maximum.
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
response = get_llm_response(prompt=initial_prompt, model=model, provider=provider, npc=npc, temperature=0.7)
|
|
176
|
+
initial_findings = response.get('response', '')
|
|
177
|
+
if isinstance(initial_findings, (list, dict)) or hasattr(initial_findings, '__iter__') and not isinstance(initial_findings, (str, bytes)):
|
|
178
|
+
initial_findings = ''.join([str(chunk) for chunk in initial_findings])
|
|
179
|
+
|
|
180
|
+
chain.append(initial_findings)
|
|
181
|
+
|
|
182
|
+
# For each level of depth, continue the research
|
|
183
|
+
for i in range(1, depth):
|
|
184
|
+
# Get recent memory to include as context
|
|
185
|
+
memory_context = "\n\n".join(chain[-memory:]) if len(chain) > 0 else ""
|
|
186
|
+
|
|
187
|
+
# Simple follow-up prompt without specific research modes
|
|
188
|
+
next_prompt = f"""
|
|
189
|
+
Research request: {request}
|
|
190
|
+
|
|
191
|
+
Recent research findings:
|
|
192
|
+
{memory_context}
|
|
193
|
+
|
|
194
|
+
As {npc.name}, continue your research on this topic. Build on previous insights and explore new aspects.
|
|
195
|
+
|
|
196
|
+
BE EXTREMELY CONCISE. Keep your response to 50-75 words maximum.
|
|
197
|
+
"""
|
|
198
|
+
|
|
199
|
+
response = get_llm_response(prompt=next_prompt, model=model, provider=provider, npc=npc, temperature=0.7)
|
|
200
|
+
next_findings = response.get('response', '')
|
|
201
|
+
if isinstance(next_findings, (list, dict)) or hasattr(next_findings, '__iter__') and not isinstance(next_findings, (str, bytes)):
|
|
202
|
+
next_findings = ''.join([str(chunk) for chunk in next_findings])
|
|
203
|
+
|
|
204
|
+
chain.append(next_findings)
|
|
205
|
+
|
|
206
|
+
return chain
|
|
207
|
+
|
|
208
|
+
def format_facts_list(facts: List[str]) -> str:
|
|
209
|
+
"""Format a list of facts for display in a report"""
|
|
210
|
+
return "\n".join([f"• {fact}" for fact in facts])
|
|
211
|
+
|
|
212
|
+
def simulate_experiments(research: Dict[str, Any], request: str, model: str = None, provider: str = None, max_experiments: int = None) -> Dict[str, Dict[str, Any]]:
|
|
213
|
+
"""
|
|
214
|
+
Simulate thought experiments based on research findings
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
research: Consolidated research data
|
|
218
|
+
request: Original research question
|
|
219
|
+
model: LLM model to use
|
|
220
|
+
provider: LLM provider to use
|
|
221
|
+
max_experiments: Maximum number of experiments to generate
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
Dictionary mapping experiment titles to experiment data
|
|
225
|
+
"""
|
|
226
|
+
# Prepare context with key facts
|
|
227
|
+
facts_context = ""
|
|
228
|
+
|
|
229
|
+
# Add facts from thematic groups
|
|
230
|
+
if "fact_groups" in research:
|
|
231
|
+
for group, facts in list(research["fact_groups"].items())[:5]: # Use top 5 groups
|
|
232
|
+
facts_context += f"\n\nThematic Group: {group}\n"
|
|
233
|
+
facts_context += format_facts_list(facts)
|
|
234
|
+
|
|
235
|
+
# Add insights from combinations
|
|
236
|
+
if "combination_insights" in research:
|
|
237
|
+
facts_context += "\n\nEmergent Insights:\n"
|
|
238
|
+
for combo in research["combination_insights"][:3]: # Use top 3 insights
|
|
239
|
+
facts_context += f"• {combo.get('emergent_insight', '')}\n"
|
|
240
|
+
|
|
241
|
+
# Create prompt to design experiments
|
|
242
|
+
prompt = f"""
|
|
243
|
+
You are a creative research scientist exploring the topic: "{request}"
|
|
244
|
+
|
|
245
|
+
Based on the following research findings:
|
|
246
|
+
|
|
247
|
+
{facts_context}
|
|
248
|
+
|
|
249
|
+
Design {max_experiments if max_experiments else "3-5"} thought experiments that could test, validate, or extend these insights.
|
|
250
|
+
|
|
251
|
+
For each experiment:
|
|
252
|
+
1. Create a descriptive title that captures the experiment's focus
|
|
253
|
+
2. Describe the experimental design/methodology (be specific and detailed)
|
|
254
|
+
3. Predict the potential results and their implications
|
|
255
|
+
4. Explain how these results would advance our understanding of {request}
|
|
256
|
+
|
|
257
|
+
Format your response as JSON with this structure:
|
|
258
|
+
{{
|
|
259
|
+
"experiment_title_1": {{
|
|
260
|
+
"design": "detailed description of experimental design",
|
|
261
|
+
"results": "predicted results and implications"
|
|
262
|
+
}},
|
|
263
|
+
"experiment_title_2": {{
|
|
264
|
+
...
|
|
265
|
+
}}
|
|
266
|
+
}}
|
|
267
|
+
|
|
268
|
+
Be bold and imaginative in your experimental designs. Consider unconventional approaches,
|
|
269
|
+
simulations, thought experiments, and interdisciplinary methods.
|
|
270
|
+
"""
|
|
271
|
+
|
|
272
|
+
response = get_llm_response(prompt=prompt, model=model, provider=provider, temperature=0.8, format="json")
|
|
273
|
+
experiments = response.get("response", {})
|
|
274
|
+
|
|
275
|
+
# Limit experiments if needed
|
|
276
|
+
if max_experiments and isinstance(experiments, dict) and len(experiments) > max_experiments:
|
|
277
|
+
# Sort by title length (approximating complexity/interestingness)
|
|
278
|
+
sorted_exps = sorted(experiments.items(), key=lambda x: len(x[0]), reverse=True)
|
|
279
|
+
experiments = dict(sorted_exps[:max_experiments])
|
|
280
|
+
|
|
281
|
+
return experiments
|
|
282
|
+
|
|
283
|
+
def alicanto(request: str,
|
|
284
|
+
num_npcs: int = 5,
|
|
285
|
+
depth: int = 3, memory: int = 3,
|
|
286
|
+
context: str = None,
|
|
287
|
+
model: str = None,
|
|
288
|
+
provider: str = None,
|
|
289
|
+
exploration_factor: float = 0.3,
|
|
290
|
+
creativity_factor: float = 0.5,
|
|
291
|
+
output_format: str = "report",
|
|
292
|
+
max_facts_per_chain: int = None,
|
|
293
|
+
max_thematic_groups: int = None,
|
|
294
|
+
max_criticisms_per_group: int = None,
|
|
295
|
+
max_conceptual_combinations: int = None,
|
|
296
|
+
max_experiments: int = None,
|
|
297
|
+
generate_pdf: bool = True) -> Dict[str, Any]:
|
|
298
|
+
"""
|
|
299
|
+
Alicanto: Generate diverse research insights by coordinating multiple NPCs with different expertise.
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
request: The research question/topic
|
|
303
|
+
num_npcs: Number of NPCs to generate (with different expertise)
|
|
304
|
+
depth: Depth of research for each NPC
|
|
305
|
+
memory: How many previous steps to include in context
|
|
306
|
+
context: Additional context to include
|
|
307
|
+
model: LLM model to use
|
|
308
|
+
provider: LLM provider to use
|
|
309
|
+
exploration_factor: Probability (0-1) of exploring a tangential direction
|
|
310
|
+
creativity_factor: Probability (0-1) of pursuing highly creative or unusual ideas
|
|
311
|
+
output_format: Format of the output ("report", "json", "markdown")
|
|
312
|
+
max_facts_per_chain: Maximum number of facts to extract per research chain
|
|
313
|
+
max_thematic_groups: Maximum number of thematic groups to identify
|
|
314
|
+
max_criticisms_per_group: Maximum number of criticisms per thematic group
|
|
315
|
+
max_conceptual_combinations: Maximum number of conceptual combinations to generate
|
|
316
|
+
max_experiments: Maximum number of experiments to generate
|
|
317
|
+
generate_pdf: Whether to generate a PDF report
|
|
318
|
+
|
|
319
|
+
Returns:
|
|
320
|
+
Dictionary with research results
|
|
321
|
+
"""
|
|
322
|
+
# Use default model/provider if not specified
|
|
323
|
+
if model is None:
|
|
324
|
+
model = NPCSH_CHAT_MODEL
|
|
325
|
+
if provider is None:
|
|
326
|
+
provider = NPCSH_CHAT_PROVIDER
|
|
327
|
+
|
|
328
|
+
# Generate researcher NPCs with diverse expertise
|
|
329
|
+
print(f"Generating {num_npcs} diverse researcher NPCs...")
|
|
330
|
+
researchers = generate_random_npcs(num_npcs, model, provider, request)
|
|
331
|
+
|
|
332
|
+
# Generate research chains for each NPC
|
|
333
|
+
print(f"Generating research chains (depth={depth})...")
|
|
334
|
+
research_chains = {}
|
|
335
|
+
facts_by_researcher = {}
|
|
336
|
+
|
|
337
|
+
for npc in researchers:
|
|
338
|
+
print(f" Research chain from {npc.name}...")
|
|
339
|
+
chain = generate_research_chain(
|
|
340
|
+
request=request,
|
|
341
|
+
npc=npc,
|
|
342
|
+
depth=depth,
|
|
343
|
+
memory=memory,
|
|
344
|
+
context=context,
|
|
345
|
+
model=model,
|
|
346
|
+
provider=provider,
|
|
347
|
+
exploration_factor=exploration_factor,
|
|
348
|
+
creativity_factor=creativity_factor
|
|
349
|
+
)
|
|
350
|
+
research_chains[npc.name] = chain
|
|
351
|
+
|
|
352
|
+
# Extract facts from chain
|
|
353
|
+
print(f" Extracting facts from {npc.name}'s research...")
|
|
354
|
+
facts = extract_facts("\n\n".join(chain), model=model, provider=provider, npc=npc, context=request)
|
|
355
|
+
|
|
356
|
+
# Limit facts if specified
|
|
357
|
+
if max_facts_per_chain is not None and len(facts) > max_facts_per_chain:
|
|
358
|
+
facts = facts[:max_facts_per_chain]
|
|
359
|
+
|
|
360
|
+
facts_by_researcher[npc.name] = facts
|
|
361
|
+
print({"fact_list": facts})
|
|
362
|
+
|
|
363
|
+
# Identify thematic groups across all research
|
|
364
|
+
print("Identifying thematic groups across all research insights...")
|
|
365
|
+
all_facts = []
|
|
366
|
+
for researcher_facts in facts_by_researcher.values():
|
|
367
|
+
all_facts.extend(researcher_facts)
|
|
368
|
+
|
|
369
|
+
groups = identify_groups(all_facts, model=model, provider=provider)
|
|
370
|
+
|
|
371
|
+
# Limit number of groups if specified
|
|
372
|
+
if max_thematic_groups is not None and len(groups) > max_thematic_groups:
|
|
373
|
+
groups = groups[:max_thematic_groups]
|
|
374
|
+
|
|
375
|
+
# Assign facts to groups
|
|
376
|
+
fact_groups = {group: [] for group in groups}
|
|
377
|
+
for fact in all_facts:
|
|
378
|
+
group_assignments = assign_groups_to_fact(fact, groups, model=model, provider=provider)
|
|
379
|
+
assigned_groups = group_assignments.get("groups", [])
|
|
380
|
+
for group in assigned_groups:
|
|
381
|
+
if group in fact_groups:
|
|
382
|
+
fact_groups[group].append(fact)
|
|
383
|
+
|
|
384
|
+
# Evaluate thematic groups
|
|
385
|
+
print("Evaluating thematic groups for quality and risk...")
|
|
386
|
+
group_evaluations = evaluate_thematic_groups(
|
|
387
|
+
fact_groups,
|
|
388
|
+
request,
|
|
389
|
+
model=model,
|
|
390
|
+
provider=provider,
|
|
391
|
+
max_criticisms=max_criticisms_per_group
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
# Generate group summaries
|
|
395
|
+
group_summaries = {}
|
|
396
|
+
for group_name, facts in fact_groups.items():
|
|
397
|
+
if not facts:
|
|
398
|
+
continue
|
|
399
|
+
|
|
400
|
+
prompt = f"""
|
|
401
|
+
Summarize the key insights from this thematic group of research findings on the topic:
|
|
402
|
+
"{request}"
|
|
403
|
+
|
|
404
|
+
Thematic Group: {group_name}
|
|
405
|
+
|
|
406
|
+
Findings:
|
|
407
|
+
{format_facts_list(facts)}
|
|
408
|
+
|
|
409
|
+
Provide a concise, coherent synthesis that captures the core ideas,
|
|
410
|
+
emphasizes what's most novel or significant, and suggests potential implications.
|
|
411
|
+
Keep your response to 200-300 words.
|
|
412
|
+
"""
|
|
413
|
+
|
|
414
|
+
response = get_llm_response(prompt=prompt, model=model, provider=provider)
|
|
415
|
+
summary = response.get('response', '')
|
|
416
|
+
if isinstance(summary, (list, dict)) or hasattr(summary, '__iter__') and not isinstance(summary, (str, bytes)):
|
|
417
|
+
summary = ''.join([str(chunk) for chunk in summary])
|
|
418
|
+
|
|
419
|
+
group_summaries[group_name] = summary
|
|
420
|
+
|
|
421
|
+
# Generate conceptual combinations to spark novel ideas
|
|
422
|
+
print("Generating conceptual combinations to spark novel insights...")
|
|
423
|
+
fact_lists = list(facts_by_researcher.values())
|
|
424
|
+
combinations = generate_conceptual_combinations(
|
|
425
|
+
fact_lists,
|
|
426
|
+
sample_size=min(3, len(all_facts)),
|
|
427
|
+
num_combinations=max_conceptual_combinations if max_conceptual_combinations is not None else 5
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
# Analyze combinations for emergent insights
|
|
431
|
+
print("Analyzing conceptual combinations for emergent insights...")
|
|
432
|
+
combination_insights = analyze_conceptual_combinations(
|
|
433
|
+
combinations,
|
|
434
|
+
request,
|
|
435
|
+
model=model,
|
|
436
|
+
provider=provider
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
# Identify meta-patterns
|
|
440
|
+
print("Identifying meta-patterns across research approaches...")
|
|
441
|
+
meta_patterns = identify_patterns_across_chains(research_chains, model=model, provider=provider)
|
|
442
|
+
|
|
443
|
+
# Generate consolidated research summary
|
|
444
|
+
print("Consolidating research into comprehensive synthesis...")
|
|
445
|
+
|
|
446
|
+
# Extract key points for integration
|
|
447
|
+
integration_points = []
|
|
448
|
+
|
|
449
|
+
# Add top facts from each thematic group
|
|
450
|
+
for group, facts in fact_groups.items():
|
|
451
|
+
if facts:
|
|
452
|
+
integration_points.append(f"From thematic group '{group}':")
|
|
453
|
+
for fact in facts[:3]: # Top 3 facts per group
|
|
454
|
+
integration_points.append(f"- {fact}")
|
|
455
|
+
|
|
456
|
+
# Add insights from combinations
|
|
457
|
+
for insight in combination_insights[:3]: # Top 3 insights
|
|
458
|
+
integration_points.append(f"Emergent insight: {insight.get('emergent_insight', '')}")
|
|
459
|
+
|
|
460
|
+
# Add key points from meta-analysis
|
|
461
|
+
integration_points.append(f"Meta-analysis insight: {meta_patterns.get('meta_analysis', '')[:300]}...")
|
|
462
|
+
|
|
463
|
+
# Generate integration
|
|
464
|
+
integration_prompt = f"""
|
|
465
|
+
Consolidate these diverse research findings into a comprehensive, integrative analysis of the topic:
|
|
466
|
+
"{request}"
|
|
467
|
+
|
|
468
|
+
Key points from the research:
|
|
469
|
+
{format_facts_list(integration_points)}
|
|
470
|
+
|
|
471
|
+
Your consolidation should:
|
|
472
|
+
1. Provide a coherent synthesis of the diverse perspectives
|
|
473
|
+
2. Identify the most significant findings and patterns
|
|
474
|
+
3. Note any tensions, contradictions, or complementary insights
|
|
475
|
+
4. Suggest an integrated framework for understanding the topic
|
|
476
|
+
5. Briefly outline implications and future directions
|
|
477
|
+
|
|
478
|
+
Aim for a comprehensive, balanced, and insightful analysis (300-500 words).
|
|
479
|
+
"""
|
|
480
|
+
|
|
481
|
+
integration_response = get_llm_response(integration_prompt, model=model, provider=provider)
|
|
482
|
+
integration = integration_response.get('response', '')
|
|
483
|
+
if isinstance(integration, (list, dict)) or hasattr(integration, '__iter__') and not isinstance(integration, (str, bytes)):
|
|
484
|
+
integration = ''.join([str(chunk) for chunk in integration])
|
|
485
|
+
|
|
486
|
+
# Create concise summary
|
|
487
|
+
summary_prompt = f"""
|
|
488
|
+
Create a concise executive summary (150 words max) of this research on:
|
|
489
|
+
"{request}"
|
|
490
|
+
|
|
491
|
+
Integration:
|
|
492
|
+
{integration}
|
|
493
|
+
|
|
494
|
+
Focus on the most significant findings and implications. This should be suitable for someone who only has time to read a brief overview.
|
|
495
|
+
"""
|
|
496
|
+
|
|
497
|
+
summary_response = get_llm_response(summary_prompt, model=model, provider=provider)
|
|
498
|
+
ideas_summarized = summary_response.get('response', '')
|
|
499
|
+
if isinstance(ideas_summarized, (list, dict)) or hasattr(ideas_summarized, '__iter__') and not isinstance(ideas_summarized, (str, bytes)):
|
|
500
|
+
ideas_summarized = ''.join([str(chunk) for chunk in ideas_summarized])
|
|
501
|
+
|
|
502
|
+
# Simulate experiments
|
|
503
|
+
print("Generating simulated experiments...")
|
|
504
|
+
research_results = {
|
|
505
|
+
"research_request": request,
|
|
506
|
+
"research_chains": research_chains,
|
|
507
|
+
"fact_groups": fact_groups,
|
|
508
|
+
"group_evaluations": group_evaluations,
|
|
509
|
+
"group_summaries": group_summaries,
|
|
510
|
+
"combination_insights": combination_insights,
|
|
511
|
+
"meta_patterns": meta_patterns,
|
|
512
|
+
"integration": integration,
|
|
513
|
+
"ideas_summarized": ideas_summarized
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
experiments = simulate_experiments(
|
|
517
|
+
research_results,
|
|
518
|
+
request,
|
|
519
|
+
model=model,
|
|
520
|
+
provider=provider,
|
|
521
|
+
max_experiments=max_experiments
|
|
522
|
+
)
|
|
523
|
+
|
|
524
|
+
# Generate PDF report if requested
|
|
525
|
+
pdf_path = None
|
|
526
|
+
if generate_pdf:
|
|
527
|
+
pdf_path = generate_pdf_report(request, model, provider, research_results, experiments)
|
|
528
|
+
|
|
529
|
+
# Final research results
|
|
530
|
+
research_results["experiments"] = experiments
|
|
531
|
+
research_results["pdf_path"] = pdf_path
|
|
532
|
+
|
|
533
|
+
return research_results
|
|
534
|
+
|
|
535
|
+
def evaluate_thematic_groups(fact_groups: Dict[str, List[str]], request: str, model: str = None, provider: str = None, max_criticisms: int = None) -> Dict[str, Dict[str, int]]:
|
|
536
|
+
"""
|
|
537
|
+
Evaluate each thematic group for quality, potential risks, and biases.
|
|
538
|
+
|
|
539
|
+
Args:
|
|
540
|
+
fact_groups: Dictionary mapping group names to lists of facts
|
|
541
|
+
request: The original research question
|
|
542
|
+
model: LLM model to use
|
|
543
|
+
provider: LLM provider to use
|
|
544
|
+
max_criticisms: Maximum number of criticisms to generate per group
|
|
545
|
+
|
|
546
|
+
Returns:
|
|
547
|
+
Dictionary mapping group names to evaluation metrics
|
|
548
|
+
"""
|
|
549
|
+
evaluations = {}
|
|
550
|
+
|
|
551
|
+
for group_name, facts in fact_groups.items():
|
|
552
|
+
facts_text = format_facts_list(facts)
|
|
553
|
+
|
|
554
|
+
prompt = f"""
|
|
555
|
+
Evaluate this thematic group of research insights on the topic:
|
|
556
|
+
"{request}"
|
|
557
|
+
|
|
558
|
+
Thematic Group: {group_name}
|
|
559
|
+
|
|
560
|
+
Insights:
|
|
561
|
+
{facts_text}
|
|
562
|
+
|
|
563
|
+
Evaluate this group of insights on a scale of 1-10 (where 10 is highest) for:
|
|
564
|
+
1. Novelty: How original and non-obvious are these insights?
|
|
565
|
+
2. Depth: How deeply do they explore the underlying concepts?
|
|
566
|
+
3. Practicality: How useful are these insights for further research or application?
|
|
567
|
+
4. Evidence: How well-supported do these claims appear to be?
|
|
568
|
+
5. Risk: What is the chance that these insights lead to problematic directions or dead ends?
|
|
569
|
+
|
|
570
|
+
Then identify potential weaknesses, biases, or limitations in these insights.
|
|
571
|
+
{f"Provide exactly {max_criticisms} criticisms." if max_criticisms is not None else ""}
|
|
572
|
+
|
|
573
|
+
Format your response as:
|
|
574
|
+
Novelty: [score]
|
|
575
|
+
Depth: [score]
|
|
576
|
+
Practicality: [score]
|
|
577
|
+
Evidence: [score]
|
|
578
|
+
Risk: [score]
|
|
579
|
+
|
|
580
|
+
Criticisms:
|
|
581
|
+
1. [First criticism]
|
|
582
|
+
2. [Second criticism]
|
|
583
|
+
...
|
|
584
|
+
"""
|
|
585
|
+
|
|
586
|
+
response = get_llm_response(prompt=prompt, model=model, provider=provider)
|
|
587
|
+
eval_text = response.get('response', '')
|
|
588
|
+
if isinstance(eval_text, (list, dict)) or hasattr(eval_text, '__iter__') and not isinstance(eval_text, (str, bytes)):
|
|
589
|
+
eval_text = ''.join([str(chunk) for chunk in eval_text])
|
|
590
|
+
|
|
591
|
+
# Parse scores
|
|
592
|
+
scores = {}
|
|
593
|
+
criticisms = []
|
|
594
|
+
in_criticisms = False
|
|
595
|
+
|
|
596
|
+
for line in eval_text.split('\n'):
|
|
597
|
+
line = line.strip()
|
|
598
|
+
if not line:
|
|
599
|
+
continue
|
|
600
|
+
|
|
601
|
+
if line.lower() == "criticisms:":
|
|
602
|
+
in_criticisms = True
|
|
603
|
+
continue
|
|
604
|
+
|
|
605
|
+
if in_criticisms:
|
|
606
|
+
# Parse criticisms
|
|
607
|
+
if line[0].isdigit() and line[1:].startswith('. '):
|
|
608
|
+
criticism = line[line.find(' ')+1:].strip()
|
|
609
|
+
criticisms.append(criticism)
|
|
610
|
+
else:
|
|
611
|
+
# Parse scores
|
|
612
|
+
if ':' in line:
|
|
613
|
+
metric, score_str = line.split(':', 1)
|
|
614
|
+
metric = metric.strip()
|
|
615
|
+
try:
|
|
616
|
+
score = int(score_str.strip())
|
|
617
|
+
scores[metric] = score
|
|
618
|
+
except ValueError:
|
|
619
|
+
pass
|
|
620
|
+
|
|
621
|
+
# Apply criticism limit if specified
|
|
622
|
+
if max_criticisms is not None and len(criticisms) > max_criticisms:
|
|
623
|
+
criticisms = criticisms[:max_criticisms]
|
|
624
|
+
|
|
625
|
+
evaluations[group_name] = {
|
|
626
|
+
**scores,
|
|
627
|
+
"criticisms": criticisms
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
return evaluations
|
|
631
|
+
|
|
632
|
+
def generate_conceptual_combinations(fact_lists: List[List[str]], sample_size: int = 3, num_combinations: int = 5) -> List[Dict]:
|
|
633
|
+
"""
|
|
634
|
+
Generate interesting combinations of facts from different researchers to spark novel ideas.
|
|
635
|
+
|
|
636
|
+
Args:
|
|
637
|
+
fact_lists: List of fact lists from different NPCs
|
|
638
|
+
sample_size: Number of facts to include in each combination
|
|
639
|
+
num_combinations: Number of combinations to generate
|
|
640
|
+
|
|
641
|
+
Returns:
|
|
642
|
+
List of dictionaries containing the combinations and generated insights
|
|
643
|
+
"""
|
|
644
|
+
# Flatten facts with researcher ID
|
|
645
|
+
all_facts_with_source = []
|
|
646
|
+
for i, facts in enumerate(fact_lists):
|
|
647
|
+
for fact in facts:
|
|
648
|
+
all_facts_with_source.append((i, fact))
|
|
649
|
+
|
|
650
|
+
# Generate random combinations
|
|
651
|
+
combinations = []
|
|
652
|
+
for _ in range(num_combinations):
|
|
653
|
+
if len(all_facts_with_source) <= sample_size:
|
|
654
|
+
sample = all_facts_with_source
|
|
655
|
+
else:
|
|
656
|
+
sample = random.sample(all_facts_with_source, sample_size)
|
|
657
|
+
|
|
658
|
+
combinations.append({
|
|
659
|
+
"facts": [fact for _, fact in sample],
|
|
660
|
+
"sources": [source for source, _ in sample]
|
|
661
|
+
})
|
|
662
|
+
|
|
663
|
+
return combinations
|
|
664
|
+
|
|
665
|
+
def analyze_conceptual_combinations(combinations: List[Dict], request: str, model: str = None, provider: str = None) -> List[Dict]:
|
|
666
|
+
"""
|
|
667
|
+
Analyze combinations of facts to identify emergent patterns and generate novel hypotheses.
|
|
668
|
+
|
|
669
|
+
Args:
|
|
670
|
+
combinations: List of fact combinations
|
|
671
|
+
request: The original research question
|
|
672
|
+
model: LLM model to use
|
|
673
|
+
provider: LLM provider to use
|
|
674
|
+
|
|
675
|
+
Returns:
|
|
676
|
+
List of dictionaries with analysis results
|
|
677
|
+
"""
|
|
678
|
+
results = []
|
|
679
|
+
|
|
680
|
+
for i, combo in enumerate(combinations):
|
|
681
|
+
facts_formatted = format_facts_list(combo["facts"])
|
|
682
|
+
|
|
683
|
+
prompt = f"""
|
|
684
|
+
Consider these seemingly unrelated insights from different researchers exploring the topic:
|
|
685
|
+
"{request}"
|
|
686
|
+
|
|
687
|
+
{facts_formatted}
|
|
688
|
+
|
|
689
|
+
Your task is to identify a non-obvious connection, pattern, or insight that emerges when these ideas are juxtaposed.
|
|
690
|
+
Focus on discovering something truly novel that none of the individual researchers may have recognized.
|
|
691
|
+
|
|
692
|
+
1. Identify a surprising emergent pattern or connection
|
|
693
|
+
2. Develop a novel hypothesis or research question based on this pattern
|
|
694
|
+
3. Explain how this insight challenges or extends conventional thinking on the topic
|
|
695
|
+
4. Suggest an unconventional methodology or approach to explore this new direction
|
|
696
|
+
|
|
697
|
+
Be bold, imaginative, and interdisciplinary in your thinking.
|
|
698
|
+
"""
|
|
699
|
+
|
|
700
|
+
response = get_llm_response(prompt=prompt, model=model, provider=provider, temperature=0.9)
|
|
701
|
+
insight = response.get('response', '')
|
|
702
|
+
if isinstance(insight, (list, dict)) or hasattr(insight, '__iter__') and not isinstance(insight, (str, bytes)):
|
|
703
|
+
insight = ''.join([str(chunk) for chunk in insight])
|
|
704
|
+
|
|
705
|
+
results.append({
|
|
706
|
+
"combination_id": i+1,
|
|
707
|
+
"facts": combo["facts"],
|
|
708
|
+
"sources": combo["sources"],
|
|
709
|
+
"emergent_insight": insight
|
|
710
|
+
})
|
|
711
|
+
|
|
712
|
+
return results
|
|
713
|
+
|
|
714
|
+
def identify_patterns_across_chains(chains: Dict[str, List[str]], model: str = None, provider: str = None) -> Dict:
|
|
715
|
+
"""
|
|
716
|
+
Identify meta-patterns across research chains, searching for higher-order insights.
|
|
717
|
+
|
|
718
|
+
Args:
|
|
719
|
+
chains: Dictionary mapping NPC names to their research chains
|
|
720
|
+
model: LLM model to use
|
|
721
|
+
provider: LLM provider to use
|
|
722
|
+
|
|
723
|
+
Returns:
|
|
724
|
+
Dictionary with meta-analysis results
|
|
725
|
+
"""
|
|
726
|
+
# Prepare a summary of each research chain
|
|
727
|
+
chain_summaries = {}
|
|
728
|
+
for name, chain in chains.items():
|
|
729
|
+
full_text = "\n\n".join(chain)
|
|
730
|
+
|
|
731
|
+
summary_prompt = f"""
|
|
732
|
+
Summarize the key themes, methodologies, and unusual perspectives in this research chain:
|
|
733
|
+
|
|
734
|
+
{full_text[:2000]}...
|
|
735
|
+
|
|
736
|
+
Focus on what makes this researcher's approach unique or valuable. Identify their core assumptions,
|
|
737
|
+
methodological innovations, and blindspots (150-200 words).
|
|
738
|
+
"""
|
|
739
|
+
|
|
740
|
+
response = get_llm_response(prompt=summary_prompt, model=model, provider=provider)
|
|
741
|
+
summary = response.get('response', '')
|
|
742
|
+
if isinstance(summary, (list, dict)) or hasattr(summary, '__iter__') and not isinstance(summary, (str, bytes)):
|
|
743
|
+
summary = ''.join([str(chunk) for chunk in summary])
|
|
744
|
+
|
|
745
|
+
chain_summaries[name] = summary
|
|
746
|
+
|
|
747
|
+
# Generate meta-analysis across all chains
|
|
748
|
+
all_summaries = "\n\n".join([f"[{name}]\n{summary}" for name, summary in chain_summaries.items()])
|
|
749
|
+
|
|
750
|
+
meta_analysis_prompt = f"""
|
|
751
|
+
Analyze these research approaches on the topic:
|
|
752
|
+
|
|
753
|
+
{all_summaries}
|
|
754
|
+
|
|
755
|
+
Identify:
|
|
756
|
+
1. Surprising methodological patterns - how are researchers approaching this problem in innovative ways?
|
|
757
|
+
2. Conceptual blindspots - what aspects seem to be collectively overlooked?
|
|
758
|
+
3. Emerging paradigms - are there new frameworks or models taking shape across multiple perspectives?
|
|
759
|
+
4. Productive tensions - where do disagreements or contradictions suggest valuable new research directions?
|
|
760
|
+
5. The topology of the problem space - how might we map the conceptual territory in a novel way?
|
|
761
|
+
|
|
762
|
+
Focus on identifying higher-order insights that emerge from comparing these different approaches.
|
|
763
|
+
Your analysis should challenge conventions and suggest new ways of framing the entire research domain.
|
|
764
|
+
"""
|
|
765
|
+
|
|
766
|
+
response = get_llm_response(prompt=meta_analysis_prompt, model=model, provider=provider, temperature=0.8)
|
|
767
|
+
meta_analysis = response.get('response', '')
|
|
768
|
+
if isinstance(meta_analysis, (list, dict)) or hasattr(meta_analysis, '__iter__') and not isinstance(meta_analysis, (str, bytes)):
|
|
769
|
+
meta_analysis = ''.join([str(chunk) for chunk in meta_analysis])
|
|
770
|
+
|
|
771
|
+
# Generate innovative research directions
|
|
772
|
+
directions_prompt = f"""
|
|
773
|
+
Based on this meta-analysis of research approaches to the topic:
|
|
774
|
+
|
|
775
|
+
{meta_analysis}
|
|
776
|
+
|
|
777
|
+
Propose 5 highly innovative research directions that could transform this field.
|
|
778
|
+
For each direction:
|
|
779
|
+
1. Frame a provocative research question
|
|
780
|
+
2. Explain why it's both important and neglected
|
|
781
|
+
3. Suggest an unconventional methodology to explore it
|
|
782
|
+
4. Describe what a breakthrough in this direction might look like
|
|
783
|
+
|
|
784
|
+
Your suggestions should be bold, interdisciplinary, and challenge fundamental assumptions.
|
|
785
|
+
Aim for directions that most researchers haven't considered but that could lead to significant advances.
|
|
786
|
+
"""
|
|
787
|
+
|
|
788
|
+
response = get_llm_response(prompt=directions_prompt, model=model, provider=provider, temperature=0.9)
|
|
789
|
+
new_directions = response.get('response', '')
|
|
790
|
+
if isinstance(new_directions, (list, dict)) or hasattr(new_directions, '__iter__') and not isinstance(new_directions, (str, bytes)):
|
|
791
|
+
new_directions = ''.join([str(chunk) for chunk in new_directions])
|
|
792
|
+
|
|
793
|
+
return {
|
|
794
|
+
"chain_summaries": chain_summaries,
|
|
795
|
+
"meta_analysis": meta_analysis,
|
|
796
|
+
"innovative_directions": new_directions
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
def preprocess_content_for_pdf(content: str, model: str = None, provider: str = None, max_words: int = 2000, concise_mode: bool = False) -> str:
|
|
800
|
+
"""
|
|
801
|
+
Quick and lightweight preprocessing for PDF generation.
|
|
802
|
+
|
|
803
|
+
Args:
|
|
804
|
+
content: Raw content to preprocess
|
|
805
|
+
model: LLM model to use (optional)
|
|
806
|
+
provider: LLM provider to use (optional)
|
|
807
|
+
max_words: Maximum word count (default 2000)
|
|
808
|
+
concise_mode: If True, creates a very short summary instead of full formatting
|
|
809
|
+
|
|
810
|
+
Returns:
|
|
811
|
+
Formatted content ready for PDF generation
|
|
812
|
+
"""
|
|
813
|
+
# Handle non-string content
|
|
814
|
+
if not isinstance(content, str):
|
|
815
|
+
content = str(content)
|
|
816
|
+
|
|
817
|
+
# If in concise mode, create a drastically shortened version
|
|
818
|
+
if concise_mode:
|
|
819
|
+
|
|
820
|
+
if model is None:
|
|
821
|
+
model = NPCSH_CHAT_MODEL
|
|
822
|
+
if provider is None:
|
|
823
|
+
provider = NPCSH_CHAT_PROVIDER
|
|
824
|
+
|
|
825
|
+
concise_prompt = f"""
|
|
826
|
+
Summarize the following content into an extremely concise, no-bullshit format with maximum 500 words:
|
|
827
|
+
{content}
|
|
828
|
+
|
|
829
|
+
- Use clear section headings
|
|
830
|
+
- Use bullet points for key ideas
|
|
831
|
+
- Focus only on essential insights
|
|
832
|
+
- No verbose academic language
|
|
833
|
+
- No padding or fillers
|
|
834
|
+
- Just the core ideas in simple language
|
|
835
|
+
"""
|
|
836
|
+
|
|
837
|
+
response = get_llm_response(prompt=concise_prompt, model=model, provider=provider)
|
|
838
|
+
content = response.get('response', '')
|
|
839
|
+
|
|
840
|
+
# Basic cleanup for any problematic characters that cause PDF issues
|
|
841
|
+
for char, replacement in {
|
|
842
|
+
'%': '',
|
|
843
|
+
'#': '-',
|
|
844
|
+
'_': '-',
|
|
845
|
+
'~': '-',
|
|
846
|
+
'^': '',
|
|
847
|
+
'\\': '/',
|
|
848
|
+
'{': '(',
|
|
849
|
+
'}': ')'
|
|
850
|
+
}.items():
|
|
851
|
+
content = content.replace(char, replacement)
|
|
852
|
+
|
|
853
|
+
# Apply word count limit if the content is too long
|
|
854
|
+
words = content.split()
|
|
855
|
+
if len(words) > max_words:
|
|
856
|
+
content = ' '.join(words[:max_words]) + '... [truncated]'
|
|
857
|
+
|
|
858
|
+
return content.strip()
|
|
859
|
+
|
|
860
|
+
def generate_pdf_report(request: str,
|
|
861
|
+
model,
|
|
862
|
+
provider,
|
|
863
|
+
research: Dict[str, Any],
|
|
864
|
+
experiments: Dict[str, Dict[str, Any]],
|
|
865
|
+
output_path: str = None,
|
|
866
|
+
max_pages: int = 5) -> str:
|
|
867
|
+
"""
|
|
868
|
+
Generate a professional PDF report using LaTeX for superior formatting, typesetting, and layout.
|
|
869
|
+
|
|
870
|
+
Args:
|
|
871
|
+
request: The original research question
|
|
872
|
+
research: The consolidated research results
|
|
873
|
+
experiments: The simulated experiments and their results
|
|
874
|
+
output_path: Path to save the PDF report (default: current directory)
|
|
875
|
+
fast_mode: If True, uses simpler formatting
|
|
876
|
+
concise_mode: If True, drastically reduces content length
|
|
877
|
+
max_pages: Maximum number of pages to generate (approximate)
|
|
878
|
+
|
|
879
|
+
Returns:
|
|
880
|
+
Path to the generated PDF file
|
|
881
|
+
"""
|
|
882
|
+
if output_path is None:
|
|
883
|
+
output_path = os.getcwd()
|
|
884
|
+
|
|
885
|
+
# Create filename
|
|
886
|
+
sanitized_request = "".join(c for c in request if c.isalnum() or c.isspace()).strip()
|
|
887
|
+
sanitized_request = sanitized_request.replace(" ", "_")[:50]
|
|
888
|
+
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
889
|
+
filename = f"{sanitized_request}_{timestamp}"
|
|
890
|
+
|
|
891
|
+
# Check for LaTeX installation
|
|
892
|
+
try:
|
|
893
|
+
subprocess.run(["which", "pdflatex"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
894
|
+
except subprocess.CalledProcessError:
|
|
895
|
+
print("LaTeX not installed. Attempting to install...")
|
|
896
|
+
try:
|
|
897
|
+
subprocess.run(["apt-get", "update"], check=True)
|
|
898
|
+
subprocess.run(["apt-get", "install", "-y", "texlive-latex-base", "texlive-fonts-recommended",
|
|
899
|
+
"texlive-fonts-extra", "texlive-latex-extra"], check=True)
|
|
900
|
+
except subprocess.CalledProcessError as e:
|
|
901
|
+
print(f"Error installing LaTeX: {str(e)}")
|
|
902
|
+
return None
|
|
903
|
+
# Create chart for thematic groups using matplotlib
|
|
904
|
+
chart_path = None
|
|
905
|
+
try:
|
|
906
|
+
if "group_evaluations" in research and research["group_evaluations"]:
|
|
907
|
+
# Create basic folder for figures
|
|
908
|
+
figures_dir = os.path.join(output_path, "figures")
|
|
909
|
+
os.makedirs(figures_dir, exist_ok=True)
|
|
910
|
+
|
|
911
|
+
fig, ax = plt.subplots(figsize=(7.5, 4))
|
|
912
|
+
plt.style.use('ggplot') # Clean style without seaborn
|
|
913
|
+
|
|
914
|
+
groups = []
|
|
915
|
+
scores = []
|
|
916
|
+
|
|
917
|
+
for group_name, eval_data in research["group_evaluations"].items():
|
|
918
|
+
groups.append(group_name[:30]) # Truncate long names
|
|
919
|
+
quality_score = (eval_data.get("Novelty", 5) + eval_data.get("Depth", 5) +
|
|
920
|
+
eval_data.get("Practicality", 5) + eval_data.get("Evidence", 5)) / 4
|
|
921
|
+
scores.append(quality_score)
|
|
922
|
+
|
|
923
|
+
# Sort by score
|
|
924
|
+
sorted_data = sorted(zip(groups, scores), key=lambda x: x[1], reverse=True)
|
|
925
|
+
groups = [x[0] for x in sorted_data]
|
|
926
|
+
scores = [x[1] for x in sorted_data]
|
|
927
|
+
|
|
928
|
+
# Create horizontal bar chart
|
|
929
|
+
y_pos = range(len(groups))
|
|
930
|
+
ax.barh(y_pos, scores, color='steelblue')
|
|
931
|
+
ax.set_yticks(y_pos)
|
|
932
|
+
ax.set_yticklabels(groups)
|
|
933
|
+
ax.set_xlabel('Quality Score (1-10)')
|
|
934
|
+
ax.set_title('Thematic Groups by Quality Score')
|
|
935
|
+
plt.tight_layout()
|
|
936
|
+
|
|
937
|
+
# Save chart
|
|
938
|
+
chart_path = os.path.join(figures_dir, f"thematic_groups.pdf")
|
|
939
|
+
plt.savefig(chart_path, dpi=300, bbox_inches='tight', format='pdf')
|
|
940
|
+
plt.close()
|
|
941
|
+
except Exception as e:
|
|
942
|
+
print(f"Warning: Could not generate chart: {str(e)}")
|
|
943
|
+
|
|
944
|
+
# Create LaTeX document
|
|
945
|
+
latex_content = generate_latex_document(request, model, provider, research, experiments, chart_path, max_pages)
|
|
946
|
+
|
|
947
|
+
# Write LaTeX to file
|
|
948
|
+
tex_path = os.path.join(output_path, f"{filename}.tex")
|
|
949
|
+
with open(tex_path, "w") as f:
|
|
950
|
+
f.write(latex_content)
|
|
951
|
+
|
|
952
|
+
# Use subprocess to run pdflatex without check=True to prevent exceptions
|
|
953
|
+
try:
|
|
954
|
+
# First run
|
|
955
|
+
result = subprocess.run(
|
|
956
|
+
["pdflatex", "-interaction=nonstopmode", "-output-directory", output_path, tex_path],
|
|
957
|
+
stdout=subprocess.PIPE,
|
|
958
|
+
stderr=subprocess.PIPE
|
|
959
|
+
)
|
|
960
|
+
|
|
961
|
+
if result.returncode != 0:
|
|
962
|
+
print(f"Warning: First LaTeX run had issues (exit code {result.returncode})")
|
|
963
|
+
# Still continue - sometimes the second run fixes things
|
|
964
|
+
|
|
965
|
+
# Second run for references
|
|
966
|
+
result = subprocess.run(
|
|
967
|
+
["pdflatex", "-interaction=nonstopmode", "-output-directory", output_path, tex_path],
|
|
968
|
+
stdout=subprocess.PIPE,
|
|
969
|
+
stderr=subprocess.PIPE
|
|
970
|
+
)
|
|
971
|
+
|
|
972
|
+
if result.returncode != 0:
|
|
973
|
+
print(f"Warning: Second LaTeX run had issues (exit code {result.returncode})")
|
|
974
|
+
# Write LaTeX log for debugging
|
|
975
|
+
log_path = os.path.join(output_path, f"{filename}.log")
|
|
976
|
+
if os.path.exists(log_path):
|
|
977
|
+
print(f"Check LaTeX log for details: {log_path}")
|
|
978
|
+
except Exception as e:
|
|
979
|
+
print(f"Error during LaTeX compilation: {str(e)}")
|
|
980
|
+
return None
|
|
981
|
+
|
|
982
|
+
# Clean up temporary files
|
|
983
|
+
for ext in [".aux", ".out", ".toc"]:
|
|
984
|
+
try:
|
|
985
|
+
os.remove(os.path.join(output_path, f"{filename}{ext}"))
|
|
986
|
+
except OSError:
|
|
987
|
+
pass
|
|
988
|
+
|
|
989
|
+
# Check if PDF was generated successfully
|
|
990
|
+
pdf_path = os.path.join(output_path, f"{filename}.pdf")
|
|
991
|
+
if os.path.exists(pdf_path):
|
|
992
|
+
print(f"PDF report successfully generated using LaTeX: {pdf_path}")
|
|
993
|
+
return pdf_path
|
|
994
|
+
else:
|
|
995
|
+
print(f"PDF generation failed. Check the LaTeX log for details.")
|
|
996
|
+
return None
|
|
997
|
+
|
|
998
|
+
def generate_latex_document(request: str, model, provider, research: Dict[str, Any], experiments: Dict[str, Dict[str, Any]],
|
|
999
|
+
chart_path: str = None, max_pages: int = 5) -> str:
|
|
1000
|
+
"""
|
|
1001
|
+
Generate LaTeX document content.
|
|
1002
|
+
|
|
1003
|
+
Args:
|
|
1004
|
+
request: The research topic
|
|
1005
|
+
research: Research results
|
|
1006
|
+
experiments: Experiments data
|
|
1007
|
+
chart_path: Path to the thematic groups chart
|
|
1008
|
+
max_pages: Maximum number of pages (approximate)
|
|
1009
|
+
|
|
1010
|
+
Returns:
|
|
1011
|
+
LaTeX document content as a string
|
|
1012
|
+
"""
|
|
1013
|
+
# Collect experiment images that might be available
|
|
1014
|
+
figure_paths = {}
|
|
1015
|
+
if chart_path:
|
|
1016
|
+
# Use relative path instead of absolute path for figure
|
|
1017
|
+
figure_paths["thematic_groups"] = os.path.basename(chart_path)
|
|
1018
|
+
|
|
1019
|
+
# Check for experiment images in the current directory
|
|
1020
|
+
# Ensure experiments is a dictionary before trying to get keys
|
|
1021
|
+
if isinstance(experiments, dict):
|
|
1022
|
+
for title in experiments.keys():
|
|
1023
|
+
sanitized_title = title.replace(" ", "_")
|
|
1024
|
+
potential_image = f"{sanitized_title}_experiment.png"
|
|
1025
|
+
if os.path.exists(potential_image):
|
|
1026
|
+
figure_paths[sanitized_title] = potential_image
|
|
1027
|
+
|
|
1028
|
+
# Describe available figures to the LLM
|
|
1029
|
+
figure_path_description_dict = {}
|
|
1030
|
+
for name, path in figure_paths.items():
|
|
1031
|
+
figure_path_description_dict[name] = path
|
|
1032
|
+
|
|
1033
|
+
# Create the prompt for generating LaTeX content
|
|
1034
|
+
prompt = f'''
|
|
1035
|
+
Generate a LaTeX document for a research report on the topic: "{request}"
|
|
1036
|
+
Here is the summary of the research: {research}
|
|
1037
|
+
|
|
1038
|
+
Here is the summary of the experiments: {experiments}''' +"""
|
|
1039
|
+
Write your response in a way that academically details the research, its motivation, and experiments
|
|
1040
|
+
and ensure any place where a citation may be needed is indicated by including an empty '\\cite{citation_needed}'
|
|
1041
|
+
|
|
1042
|
+
IMPORTANT INSTRUCTIONS FOR DOCUMENT PREPARATION:
|
|
1043
|
+
1. DO NOT include \\bibliography{references} or any bibliography commands, as we don't have a references file
|
|
1044
|
+
2. Instead, create a \\begin{thebibliography}{99} ... \\end{thebibliography} section with example references
|
|
1045
|
+
3. For figures, use relative paths like 'figures/thematic_groups.pdf' rather than absolute paths
|
|
1046
|
+
4. Make sure all LaTeX commands are properly formatted and do not use undefined packages
|
|
1047
|
+
5. Keep the document structure simple and robust to avoid compilation errors
|
|
1048
|
+
"""+f"""
|
|
1049
|
+
The figures are located at the following paths: {figure_path_description_dict}
|
|
1050
|
+
"""
|
|
1051
|
+
|
|
1052
|
+
|
|
1053
|
+
latex_response = get_llm_response(prompt=prompt, model=model, provider=provider )
|
|
1054
|
+
latex_content = latex_response.get('response', '')
|
|
1055
|
+
|
|
1056
|
+
# Post-process the LaTeX content to fix common issues
|
|
1057
|
+
latex_content = latex_content.replace('\\bibliography{references}', '')
|
|
1058
|
+
latex_content = latex_content.replace('\\bibliographystyle{plain}', '')
|
|
1059
|
+
|
|
1060
|
+
# Replace absolute figure paths with relative paths
|
|
1061
|
+
latex_content = latex_content.replace('/home/caug/npcww/npcsh/figures/', 'figures/')
|
|
1062
|
+
|
|
1063
|
+
# Add a simple bibliography if none exists
|
|
1064
|
+
if '\\begin{thebibliography}' not in latex_content and '\\end{document}' in latex_content:
|
|
1065
|
+
bibliography = """
|
|
1066
|
+
\\begin{thebibliography}{9}
|
|
1067
|
+
\\bibitem{citation1} Author, A. (2023). Title of the work. Journal Name, 10(2), 123-456.
|
|
1068
|
+
\\bibitem{citation2} Researcher, B. (2022). Another relevant publication. Conference Proceedings, 789-012.
|
|
1069
|
+
\\end{thebibliography}
|
|
1070
|
+
"""
|
|
1071
|
+
latex_content = latex_content.replace('\\end{document}', f'{bibliography}\n\\end{{document}}')
|
|
1072
|
+
|
|
1073
|
+
return latex_content
|
|
1074
|
+
|