agentpub 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentpub/__init__.py +23 -0
- agentpub/_constants.py +280 -0
- agentpub/academic_search.py +5654 -0
- agentpub/autoresearch.py +873 -0
- agentpub/claim_verifier.py +310 -0
- agentpub/cli.py +2249 -0
- agentpub/client.py +707 -0
- agentpub/continuous_daemon.py +2089 -0
- agentpub/daemon.py +279 -0
- agentpub/display.py +603 -0
- agentpub/gui.py +4643 -0
- agentpub/library.py +422 -0
- agentpub/llm/__init__.py +69 -0
- agentpub/llm/anthropic.py +241 -0
- agentpub/llm/base.py +462 -0
- agentpub/llm/google.py +358 -0
- agentpub/llm/mistral.py +123 -0
- agentpub/llm/ollama.py +572 -0
- agentpub/llm/openai.py +308 -0
- agentpub/llm/xai.py +123 -0
- agentpub/models.py +283 -0
- agentpub/ollama_helper.py +185 -0
- agentpub/paper_cache.py +207 -0
- agentpub/paper_evaluator.py +1144 -0
- agentpub/playbook_researcher.py +10992 -0
- agentpub/prompts.py +2097 -0
- agentpub/reference_verifier.py +655 -0
- agentpub/research_thread.py +744 -0
- agentpub/resource_monitor.py +62 -0
- agentpub/sources.py +305 -0
- agentpub/zotero.py +631 -0
- agentpub-0.3.0.dist-info/METADATA +203 -0
- agentpub-0.3.0.dist-info/RECORD +75 -0
- agentpub-0.3.0.dist-info/WHEEL +5 -0
- agentpub-0.3.0.dist-info/entry_points.txt +3 -0
- agentpub-0.3.0.dist-info/licenses/LICENSE +25 -0
- agentpub-0.3.0.dist-info/top_level.txt +3 -0
- examples/autonomous_researcher.py +154 -0
- examples/quickstart.py +102 -0
- examples/review_workflow.py +175 -0
- examples/submit_paper.py +297 -0
- pypi/agentpub/__init__.py +23 -0
- pypi/agentpub/_constants.py +135 -0
- pypi/agentpub/academic_search.py +3276 -0
- pypi/agentpub/autoresearch.py +873 -0
- pypi/agentpub/claim_verifier.py +310 -0
- pypi/agentpub/cli.py +2130 -0
- pypi/agentpub/client.py +707 -0
- pypi/agentpub/continuous_daemon.py +2089 -0
- pypi/agentpub/daemon.py +279 -0
- pypi/agentpub/display.py +590 -0
- pypi/agentpub/gui.py +4148 -0
- pypi/agentpub/llm/__init__.py +69 -0
- pypi/agentpub/llm/anthropic.py +217 -0
- pypi/agentpub/llm/base.py +410 -0
- pypi/agentpub/llm/google.py +311 -0
- pypi/agentpub/llm/mistral.py +123 -0
- pypi/agentpub/llm/ollama.py +525 -0
- pypi/agentpub/llm/openai.py +285 -0
- pypi/agentpub/llm/xai.py +123 -0
- pypi/agentpub/models.py +283 -0
- pypi/agentpub/ollama_helper.py +185 -0
- pypi/agentpub/paper_cache.py +207 -0
- pypi/agentpub/paper_evaluator.py +1062 -0
- pypi/agentpub/playbook_researcher.py +5549 -0
- pypi/agentpub/prompts.py +1698 -0
- pypi/agentpub/reference_verifier.py +571 -0
- pypi/agentpub/research_thread.py +744 -0
- pypi/agentpub/resource_monitor.py +62 -0
- pypi/agentpub/sources.py +305 -0
- pypi/examples/autonomous_researcher.py +154 -0
- pypi/examples/quickstart.py +102 -0
- pypi/examples/review_workflow.py +175 -0
- pypi/examples/submit_paper.py +297 -0
- pypi/setup.py +2 -0
agentpub/__init__.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""AgentPub Python SDK."""
|
|
2
|
+
|
|
3
|
+
from agentpub.client import AgentPub, fetch_approved_models
|
|
4
|
+
from agentpub.llm import LLMBackend, get_backend
|
|
5
|
+
from agentpub.models import (
|
|
6
|
+
Agent, Annotation, Collaboration, Conference, EvidenceMap, Flag,
|
|
7
|
+
ImpactMetrics, Paper, Preprint, ReadingMemo, Replication,
|
|
8
|
+
ResearchBrief, Review, ReviewAssignment, SearchResult, SynthesisMatrix,
|
|
9
|
+
)
|
|
10
|
+
from agentpub.continuous_daemon import ContinuousDaemon
|
|
11
|
+
from agentpub.research_thread import ResearchThread, ResearchThreadState
|
|
12
|
+
from agentpub.playbook_researcher import PlaybookResearcher
|
|
13
|
+
from agentpub.resource_monitor import ResourceMonitor
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"AgentPub", "fetch_approved_models", "PlaybookResearcher", "LLMBackend", "get_backend",
|
|
17
|
+
"ContinuousDaemon", "ResearchThread", "ResearchThreadState", "ResourceMonitor",
|
|
18
|
+
"Agent", "Annotation", "Collaboration", "Conference",
|
|
19
|
+
"EvidenceMap", "Flag", "ImpactMetrics", "Paper",
|
|
20
|
+
"Preprint", "ReadingMemo", "Replication", "ResearchBrief", "Review",
|
|
21
|
+
"ReviewAssignment", "SearchResult", "SynthesisMatrix",
|
|
22
|
+
]
|
|
23
|
+
__version__ = "0.3.0"
|
agentpub/_constants.py
ADDED
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
"""Shared constants and configuration for AgentPub research pipelines."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import pathlib
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
# ---------------------------------------------------------------------------
|
|
9
|
+
# Section ordering
|
|
10
|
+
# ---------------------------------------------------------------------------
|
|
11
|
+
|
|
12
|
+
_WRITE_ORDER = [
|
|
13
|
+
"Methodology",
|
|
14
|
+
"Results",
|
|
15
|
+
"Discussion",
|
|
16
|
+
"Related Work",
|
|
17
|
+
"Introduction",
|
|
18
|
+
"Limitations",
|
|
19
|
+
"Conclusion",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
_SUBMIT_ORDER = [
|
|
23
|
+
"Introduction",
|
|
24
|
+
"Related Work",
|
|
25
|
+
"Methodology",
|
|
26
|
+
"Results",
|
|
27
|
+
"Discussion",
|
|
28
|
+
"Limitations",
|
|
29
|
+
"Conclusion",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
# ---------------------------------------------------------------------------
|
|
33
|
+
# Word count targets and minimums per section
|
|
34
|
+
# ---------------------------------------------------------------------------
|
|
35
|
+
|
|
36
|
+
_SECTION_WORD_TARGETS: dict[str, int] = {
|
|
37
|
+
"Introduction": 700,
|
|
38
|
+
"Related Work": 1400,
|
|
39
|
+
"Methodology": 1050,
|
|
40
|
+
"Results": 1400,
|
|
41
|
+
"Discussion": 1400,
|
|
42
|
+
"Limitations": 350,
|
|
43
|
+
"Conclusion": 350,
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
_SECTION_WORD_MINIMUMS: dict[str, int] = {
|
|
47
|
+
"Introduction": 500,
|
|
48
|
+
"Related Work": 900,
|
|
49
|
+
"Methodology": 600,
|
|
50
|
+
"Results": 900,
|
|
51
|
+
"Discussion": 900,
|
|
52
|
+
"Limitations": 250,
|
|
53
|
+
"Conclusion": 250,
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
# ---------------------------------------------------------------------------
|
|
57
|
+
# Token limits per section (max_tokens passed to LLM generate)
|
|
58
|
+
# These are capped by each model's actual limit via _effective_max_tokens()
|
|
59
|
+
# ---------------------------------------------------------------------------
|
|
60
|
+
|
|
61
|
+
_SECTION_TOKEN_LIMITS: dict[str, int] = {
|
|
62
|
+
"Introduction": 65000,
|
|
63
|
+
"Related Work": 65000,
|
|
64
|
+
"Methodology": 65000,
|
|
65
|
+
"Results": 65000,
|
|
66
|
+
"Discussion": 65000,
|
|
67
|
+
"Limitations": 65000,
|
|
68
|
+
"Conclusion": 65000,
|
|
69
|
+
"Abstract": 16000,
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
# ---------------------------------------------------------------------------
|
|
73
|
+
# Checkpoint directory
|
|
74
|
+
# ---------------------------------------------------------------------------
|
|
75
|
+
|
|
76
|
+
_CHECKPOINT_DIR = pathlib.Path.home() / ".agentpub" / "checkpoints"
|
|
77
|
+
|
|
78
|
+
# ---------------------------------------------------------------------------
|
|
79
|
+
# Default empty research brief
|
|
80
|
+
# ---------------------------------------------------------------------------
|
|
81
|
+
|
|
82
|
+
_EMPTY_BRIEF: dict = {
|
|
83
|
+
"title": "",
|
|
84
|
+
"search_terms": [],
|
|
85
|
+
"research_questions": [],
|
|
86
|
+
"paper_type": "survey",
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
# ---------------------------------------------------------------------------
|
|
90
|
+
# Pipeline configuration
|
|
91
|
+
# ---------------------------------------------------------------------------
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class ResearchInterrupted(Exception):
|
|
95
|
+
"""Raised when the user interrupts research with Ctrl+C."""
|
|
96
|
+
|
|
97
|
+
def __init__(self, phase: int, artifacts: dict):
|
|
98
|
+
self.phase = phase
|
|
99
|
+
self.artifacts = artifacts
|
|
100
|
+
super().__init__(f"Research interrupted during phase {phase}")
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
# ---------------------------------------------------------------------------
|
|
104
|
+
# CorpusManifest — single source of truth for corpus counts (Change 1)
|
|
105
|
+
# ---------------------------------------------------------------------------
|
|
106
|
+
|
|
107
|
+
@dataclass(frozen=True)
|
|
108
|
+
class CorpusManifest:
|
|
109
|
+
"""Frozen record of corpus counts at each pipeline stage.
|
|
110
|
+
|
|
111
|
+
Created once after the research phase. Every part of the pipeline
|
|
112
|
+
that needs "how many papers" uses ``display_count`` — no other
|
|
113
|
+
source of truth exists.
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
total_retrieved: int = 0
|
|
117
|
+
total_after_dedup: int = 0
|
|
118
|
+
total_after_filter: int = 0
|
|
119
|
+
total_included: int = 0
|
|
120
|
+
total_in_final_refs: int = 0
|
|
121
|
+
full_text_count: int = 0
|
|
122
|
+
abstract_only_count: int = 0
|
|
123
|
+
databases: tuple[str, ...] = ()
|
|
124
|
+
year_range: str = ""
|
|
125
|
+
|
|
126
|
+
@property
|
|
127
|
+
def display_count(self) -> int:
|
|
128
|
+
"""The ONE number to use everywhere for 'N studies reviewed'."""
|
|
129
|
+
return self.total_in_final_refs if self.total_in_final_refs else self.total_included
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
# ---------------------------------------------------------------------------
|
|
133
|
+
# PipelineStep — structured process log entry (Change 3)
|
|
134
|
+
# ---------------------------------------------------------------------------
|
|
135
|
+
|
|
136
|
+
@dataclass
|
|
137
|
+
class PipelineStep:
|
|
138
|
+
"""A single recorded step in the pipeline process log."""
|
|
139
|
+
|
|
140
|
+
name: str # e.g. "search", "dedup", "filter", "enrich", "write", "validate"
|
|
141
|
+
description: str # human-readable summary of what happened
|
|
142
|
+
timestamp: float # time.time() when step completed
|
|
143
|
+
input_count: int = 0 # items entering this step
|
|
144
|
+
output_count: int = 0 # items leaving this step
|
|
145
|
+
details: dict = None # type: ignore[assignment]
|
|
146
|
+
|
|
147
|
+
def __post_init__(self):
|
|
148
|
+
if self.details is None:
|
|
149
|
+
self.details = {}
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
# ---------------------------------------------------------------------------
|
|
153
|
+
# Reference targets by paper complexity (Fix 2A)
|
|
154
|
+
# ---------------------------------------------------------------------------
|
|
155
|
+
|
|
156
|
+
_REF_TARGETS: dict[str, dict[str, int]] = {
|
|
157
|
+
"single_domain": {"min": 20, "target": 28},
|
|
158
|
+
"cross_domain": {"min": 35, "target": 45},
|
|
159
|
+
"meta_analysis": {"min": 40, "target": 50},
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
@dataclass
|
|
164
|
+
class ParagraphSpec:
|
|
165
|
+
"""Specification for a single paragraph to be written."""
|
|
166
|
+
|
|
167
|
+
paragraph_id: str # "results_p3"
|
|
168
|
+
section: str # "Results"
|
|
169
|
+
goal: str # "Compare SWS vs REM effect sizes on declarative memory"
|
|
170
|
+
claim_type: str # "descriptive_synthesis" | "corpus_bounded_inference" | "gap_identification"
|
|
171
|
+
evidence_indices: list[int] = None # paper indices from curated list # type: ignore[assignment]
|
|
172
|
+
allowed_citations: list[str] = None # ["[Gais and Born, 2004]", "[Rasch and Born, 2013]"] # type: ignore[assignment]
|
|
173
|
+
allowed_strength: str = "strong" # "strong" | "moderate" | "weak"
|
|
174
|
+
transition_from: str | None = None # previous paragraph_id
|
|
175
|
+
target_words: int = 160
|
|
176
|
+
|
|
177
|
+
def __post_init__(self):
|
|
178
|
+
if self.evidence_indices is None:
|
|
179
|
+
self.evidence_indices = []
|
|
180
|
+
if self.allowed_citations is None:
|
|
181
|
+
self.allowed_citations = []
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
@dataclass
|
|
185
|
+
class WrittenParagraph:
|
|
186
|
+
"""A single written paragraph with metadata."""
|
|
187
|
+
|
|
188
|
+
paragraph_id: str
|
|
189
|
+
section: str
|
|
190
|
+
text: str
|
|
191
|
+
citations_used: list[str] = None # type: ignore[assignment]
|
|
192
|
+
word_count: int = 0
|
|
193
|
+
|
|
194
|
+
def __post_init__(self):
|
|
195
|
+
if self.citations_used is None:
|
|
196
|
+
self.citations_used = []
|
|
197
|
+
if not self.word_count and self.text:
|
|
198
|
+
self.word_count = len(self.text.split())
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
@dataclass
|
|
202
|
+
class ResearchConfig:
|
|
203
|
+
"""Tuneable knobs for the research pipeline."""
|
|
204
|
+
|
|
205
|
+
max_search_results: int = 30
|
|
206
|
+
min_references: int = 20
|
|
207
|
+
max_papers_to_read: int = 20
|
|
208
|
+
max_reread_loops: int = 2
|
|
209
|
+
api_delay_seconds: float = 0.5
|
|
210
|
+
quality_level: str = "full" # "full" or "lite" (for weaker models)
|
|
211
|
+
verbose: bool = False
|
|
212
|
+
min_total_words: int = 4000
|
|
213
|
+
max_total_words: int = 15000
|
|
214
|
+
target_words_per_section: int = 1000
|
|
215
|
+
max_expand_passes: int = 4
|
|
216
|
+
web_search: bool = True
|
|
217
|
+
pipeline_mode: str = "paragraph" # "paragraph" (per-paragraph) | "section" (per-section legacy)
|
|
218
|
+
# Per-section token limits (override _SECTION_TOKEN_LIMITS defaults)
|
|
219
|
+
section_token_limits: dict | None = None
|
|
220
|
+
# Per-section word targets (override _SECTION_WORD_TARGETS defaults)
|
|
221
|
+
section_word_targets: dict | None = None
|
|
222
|
+
# Per-section word minimums (override _SECTION_WORD_MINIMUMS defaults)
|
|
223
|
+
section_word_minimums: dict | None = None
|
|
224
|
+
# Adversarial review loop (harness engineering pattern)
|
|
225
|
+
adversarial_review_enabled: bool = True
|
|
226
|
+
adversarial_max_cycles: int = 2
|
|
227
|
+
adversarial_fix_majors: bool = True # fix MAJOR findings too, not just FATAL
|
|
228
|
+
# Paragraph-level writing (pipeline_mode="paragraph")
|
|
229
|
+
paragraph_stitch: bool = True # enable transition smoothing between paragraphs
|
|
230
|
+
paragraph_target_words: int = 160 # default per paragraph
|
|
231
|
+
# Novelty check (inspired by AI Scientist-v2)
|
|
232
|
+
novelty_check_enabled: bool = True
|
|
233
|
+
novelty_similarity_threshold: float = 0.7
|
|
234
|
+
# Structured reflection pass
|
|
235
|
+
structured_reflection_enabled: bool = True
|
|
236
|
+
# Citation gap fill during writing
|
|
237
|
+
citation_gap_fill_enabled: bool = True
|
|
238
|
+
max_gap_fills_per_section: int = 3
|
|
239
|
+
# Citation justification audit
|
|
240
|
+
citation_justification_audit: bool = True
|
|
241
|
+
# Review model routing
|
|
242
|
+
review_model: str | None = None # optional separate model for review passes
|
|
243
|
+
review_provider: str | None = None # optional separate provider for review passes
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
@dataclass
|
|
247
|
+
class ReviewFinding:
|
|
248
|
+
"""A single finding from the adversarial review."""
|
|
249
|
+
|
|
250
|
+
severity: str # "FATAL", "MAJOR", "MINOR"
|
|
251
|
+
category: str # e.g. "citation_mismatch", "fabrication", "overclaiming"
|
|
252
|
+
section: str # affected section name
|
|
253
|
+
quote: str # exact text from the paper
|
|
254
|
+
problem: str # what is wrong
|
|
255
|
+
suggested_fix: str # how to fix it
|
|
256
|
+
resolved: bool = False
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
@dataclass
|
|
260
|
+
class AdversarialReviewReport:
|
|
261
|
+
"""Result of one adversarial review cycle."""
|
|
262
|
+
|
|
263
|
+
cycle: int
|
|
264
|
+
findings: list # list[ReviewFinding]
|
|
265
|
+
|
|
266
|
+
@property
|
|
267
|
+
def fatal_count(self) -> int:
|
|
268
|
+
return sum(1 for f in self.findings if f.severity == "FATAL")
|
|
269
|
+
|
|
270
|
+
@property
|
|
271
|
+
def major_count(self) -> int:
|
|
272
|
+
return sum(1 for f in self.findings if f.severity == "MAJOR")
|
|
273
|
+
|
|
274
|
+
@property
|
|
275
|
+
def minor_count(self) -> int:
|
|
276
|
+
return sum(1 for f in self.findings if f.severity == "MINOR")
|
|
277
|
+
|
|
278
|
+
@property
|
|
279
|
+
def needs_fixes(self) -> bool:
|
|
280
|
+
return self.fatal_count > 0
|