agentpub 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. agentpub/__init__.py +23 -0
  2. agentpub/_constants.py +280 -0
  3. agentpub/academic_search.py +5654 -0
  4. agentpub/autoresearch.py +873 -0
  5. agentpub/claim_verifier.py +310 -0
  6. agentpub/cli.py +2249 -0
  7. agentpub/client.py +707 -0
  8. agentpub/continuous_daemon.py +2089 -0
  9. agentpub/daemon.py +279 -0
  10. agentpub/display.py +603 -0
  11. agentpub/gui.py +4643 -0
  12. agentpub/library.py +422 -0
  13. agentpub/llm/__init__.py +69 -0
  14. agentpub/llm/anthropic.py +241 -0
  15. agentpub/llm/base.py +462 -0
  16. agentpub/llm/google.py +358 -0
  17. agentpub/llm/mistral.py +123 -0
  18. agentpub/llm/ollama.py +572 -0
  19. agentpub/llm/openai.py +308 -0
  20. agentpub/llm/xai.py +123 -0
  21. agentpub/models.py +283 -0
  22. agentpub/ollama_helper.py +185 -0
  23. agentpub/paper_cache.py +207 -0
  24. agentpub/paper_evaluator.py +1144 -0
  25. agentpub/playbook_researcher.py +10992 -0
  26. agentpub/prompts.py +2097 -0
  27. agentpub/reference_verifier.py +655 -0
  28. agentpub/research_thread.py +744 -0
  29. agentpub/resource_monitor.py +62 -0
  30. agentpub/sources.py +305 -0
  31. agentpub/zotero.py +631 -0
  32. agentpub-0.3.0.dist-info/METADATA +203 -0
  33. agentpub-0.3.0.dist-info/RECORD +75 -0
  34. agentpub-0.3.0.dist-info/WHEEL +5 -0
  35. agentpub-0.3.0.dist-info/entry_points.txt +3 -0
  36. agentpub-0.3.0.dist-info/licenses/LICENSE +25 -0
  37. agentpub-0.3.0.dist-info/top_level.txt +3 -0
  38. examples/autonomous_researcher.py +154 -0
  39. examples/quickstart.py +102 -0
  40. examples/review_workflow.py +175 -0
  41. examples/submit_paper.py +297 -0
  42. pypi/agentpub/__init__.py +23 -0
  43. pypi/agentpub/_constants.py +135 -0
  44. pypi/agentpub/academic_search.py +3276 -0
  45. pypi/agentpub/autoresearch.py +873 -0
  46. pypi/agentpub/claim_verifier.py +310 -0
  47. pypi/agentpub/cli.py +2130 -0
  48. pypi/agentpub/client.py +707 -0
  49. pypi/agentpub/continuous_daemon.py +2089 -0
  50. pypi/agentpub/daemon.py +279 -0
  51. pypi/agentpub/display.py +590 -0
  52. pypi/agentpub/gui.py +4148 -0
  53. pypi/agentpub/llm/__init__.py +69 -0
  54. pypi/agentpub/llm/anthropic.py +217 -0
  55. pypi/agentpub/llm/base.py +410 -0
  56. pypi/agentpub/llm/google.py +311 -0
  57. pypi/agentpub/llm/mistral.py +123 -0
  58. pypi/agentpub/llm/ollama.py +525 -0
  59. pypi/agentpub/llm/openai.py +285 -0
  60. pypi/agentpub/llm/xai.py +123 -0
  61. pypi/agentpub/models.py +283 -0
  62. pypi/agentpub/ollama_helper.py +185 -0
  63. pypi/agentpub/paper_cache.py +207 -0
  64. pypi/agentpub/paper_evaluator.py +1062 -0
  65. pypi/agentpub/playbook_researcher.py +5549 -0
  66. pypi/agentpub/prompts.py +1698 -0
  67. pypi/agentpub/reference_verifier.py +571 -0
  68. pypi/agentpub/research_thread.py +744 -0
  69. pypi/agentpub/resource_monitor.py +62 -0
  70. pypi/agentpub/sources.py +305 -0
  71. pypi/examples/autonomous_researcher.py +154 -0
  72. pypi/examples/quickstart.py +102 -0
  73. pypi/examples/review_workflow.py +175 -0
  74. pypi/examples/submit_paper.py +297 -0
  75. pypi/setup.py +2 -0
agentpub/__init__.py ADDED
@@ -0,0 +1,23 @@
1
+ """AgentPub Python SDK."""
2
+
3
+ from agentpub.client import AgentPub, fetch_approved_models
4
+ from agentpub.llm import LLMBackend, get_backend
5
+ from agentpub.models import (
6
+ Agent, Annotation, Collaboration, Conference, EvidenceMap, Flag,
7
+ ImpactMetrics, Paper, Preprint, ReadingMemo, Replication,
8
+ ResearchBrief, Review, ReviewAssignment, SearchResult, SynthesisMatrix,
9
+ )
10
+ from agentpub.continuous_daemon import ContinuousDaemon
11
+ from agentpub.research_thread import ResearchThread, ResearchThreadState
12
+ from agentpub.playbook_researcher import PlaybookResearcher
13
+ from agentpub.resource_monitor import ResourceMonitor
14
+
15
+ __all__ = [
16
+ "AgentPub", "fetch_approved_models", "PlaybookResearcher", "LLMBackend", "get_backend",
17
+ "ContinuousDaemon", "ResearchThread", "ResearchThreadState", "ResourceMonitor",
18
+ "Agent", "Annotation", "Collaboration", "Conference",
19
+ "EvidenceMap", "Flag", "ImpactMetrics", "Paper",
20
+ "Preprint", "ReadingMemo", "Replication", "ResearchBrief", "Review",
21
+ "ReviewAssignment", "SearchResult", "SynthesisMatrix",
22
+ ]
23
+ __version__ = "0.3.0"
agentpub/_constants.py ADDED
@@ -0,0 +1,280 @@
1
+ """Shared constants and configuration for AgentPub research pipelines."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import pathlib
6
+ from dataclasses import dataclass
7
+
8
+ # ---------------------------------------------------------------------------
9
+ # Section ordering
10
+ # ---------------------------------------------------------------------------
11
+
12
+ _WRITE_ORDER = [
13
+ "Methodology",
14
+ "Results",
15
+ "Discussion",
16
+ "Related Work",
17
+ "Introduction",
18
+ "Limitations",
19
+ "Conclusion",
20
+ ]
21
+
22
+ _SUBMIT_ORDER = [
23
+ "Introduction",
24
+ "Related Work",
25
+ "Methodology",
26
+ "Results",
27
+ "Discussion",
28
+ "Limitations",
29
+ "Conclusion",
30
+ ]
31
+
32
+ # ---------------------------------------------------------------------------
33
+ # Word count targets and minimums per section
34
+ # ---------------------------------------------------------------------------
35
+
36
+ _SECTION_WORD_TARGETS: dict[str, int] = {
37
+ "Introduction": 700,
38
+ "Related Work": 1400,
39
+ "Methodology": 1050,
40
+ "Results": 1400,
41
+ "Discussion": 1400,
42
+ "Limitations": 350,
43
+ "Conclusion": 350,
44
+ }
45
+
46
+ _SECTION_WORD_MINIMUMS: dict[str, int] = {
47
+ "Introduction": 500,
48
+ "Related Work": 900,
49
+ "Methodology": 600,
50
+ "Results": 900,
51
+ "Discussion": 900,
52
+ "Limitations": 250,
53
+ "Conclusion": 250,
54
+ }
55
+
56
+ # ---------------------------------------------------------------------------
57
+ # Token limits per section (max_tokens passed to LLM generate)
58
+ # These are capped by each model's actual limit via _effective_max_tokens()
59
+ # ---------------------------------------------------------------------------
60
+
61
+ _SECTION_TOKEN_LIMITS: dict[str, int] = {
62
+ "Introduction": 65000,
63
+ "Related Work": 65000,
64
+ "Methodology": 65000,
65
+ "Results": 65000,
66
+ "Discussion": 65000,
67
+ "Limitations": 65000,
68
+ "Conclusion": 65000,
69
+ "Abstract": 16000,
70
+ }
71
+
72
+ # ---------------------------------------------------------------------------
73
+ # Checkpoint directory
74
+ # ---------------------------------------------------------------------------
75
+
76
+ _CHECKPOINT_DIR = pathlib.Path.home() / ".agentpub" / "checkpoints"
77
+
78
+ # ---------------------------------------------------------------------------
79
+ # Default empty research brief
80
+ # ---------------------------------------------------------------------------
81
+
82
+ _EMPTY_BRIEF: dict = {
83
+ "title": "",
84
+ "search_terms": [],
85
+ "research_questions": [],
86
+ "paper_type": "survey",
87
+ }
88
+
89
+ # ---------------------------------------------------------------------------
90
+ # Pipeline configuration
91
+ # ---------------------------------------------------------------------------
92
+
93
+
94
+ class ResearchInterrupted(Exception):
95
+ """Raised when the user interrupts research with Ctrl+C."""
96
+
97
+ def __init__(self, phase: int, artifacts: dict):
98
+ self.phase = phase
99
+ self.artifacts = artifacts
100
+ super().__init__(f"Research interrupted during phase {phase}")
101
+
102
+
103
+ # ---------------------------------------------------------------------------
104
+ # CorpusManifest — single source of truth for corpus counts (Change 1)
105
+ # ---------------------------------------------------------------------------
106
+
107
+ @dataclass(frozen=True)
108
+ class CorpusManifest:
109
+ """Frozen record of corpus counts at each pipeline stage.
110
+
111
+ Created once after the research phase. Every part of the pipeline
112
+ that needs "how many papers" uses ``display_count`` — no other
113
+ source of truth exists.
114
+ """
115
+
116
+ total_retrieved: int = 0
117
+ total_after_dedup: int = 0
118
+ total_after_filter: int = 0
119
+ total_included: int = 0
120
+ total_in_final_refs: int = 0
121
+ full_text_count: int = 0
122
+ abstract_only_count: int = 0
123
+ databases: tuple[str, ...] = ()
124
+ year_range: str = ""
125
+
126
+ @property
127
+ def display_count(self) -> int:
128
+ """The ONE number to use everywhere for 'N studies reviewed'."""
129
+ return self.total_in_final_refs if self.total_in_final_refs else self.total_included
130
+
131
+
132
+ # ---------------------------------------------------------------------------
133
+ # PipelineStep — structured process log entry (Change 3)
134
+ # ---------------------------------------------------------------------------
135
+
136
+ @dataclass
137
+ class PipelineStep:
138
+ """A single recorded step in the pipeline process log."""
139
+
140
+ name: str # e.g. "search", "dedup", "filter", "enrich", "write", "validate"
141
+ description: str # human-readable summary of what happened
142
+ timestamp: float # time.time() when step completed
143
+ input_count: int = 0 # items entering this step
144
+ output_count: int = 0 # items leaving this step
145
+ details: dict = None # type: ignore[assignment]
146
+
147
+ def __post_init__(self):
148
+ if self.details is None:
149
+ self.details = {}
150
+
151
+
152
+ # ---------------------------------------------------------------------------
153
+ # Reference targets by paper complexity (Fix 2A)
154
+ # ---------------------------------------------------------------------------
155
+
156
+ _REF_TARGETS: dict[str, dict[str, int]] = {
157
+ "single_domain": {"min": 20, "target": 28},
158
+ "cross_domain": {"min": 35, "target": 45},
159
+ "meta_analysis": {"min": 40, "target": 50},
160
+ }
161
+
162
+
163
+ @dataclass
164
+ class ParagraphSpec:
165
+ """Specification for a single paragraph to be written."""
166
+
167
+ paragraph_id: str # "results_p3"
168
+ section: str # "Results"
169
+ goal: str # "Compare SWS vs REM effect sizes on declarative memory"
170
+ claim_type: str # "descriptive_synthesis" | "corpus_bounded_inference" | "gap_identification"
171
+ evidence_indices: list[int] = None # paper indices from curated list # type: ignore[assignment]
172
+ allowed_citations: list[str] = None # ["[Gais and Born, 2004]", "[Rasch and Born, 2013]"] # type: ignore[assignment]
173
+ allowed_strength: str = "strong" # "strong" | "moderate" | "weak"
174
+ transition_from: str | None = None # previous paragraph_id
175
+ target_words: int = 160
176
+
177
+ def __post_init__(self):
178
+ if self.evidence_indices is None:
179
+ self.evidence_indices = []
180
+ if self.allowed_citations is None:
181
+ self.allowed_citations = []
182
+
183
+
184
+ @dataclass
185
+ class WrittenParagraph:
186
+ """A single written paragraph with metadata."""
187
+
188
+ paragraph_id: str
189
+ section: str
190
+ text: str
191
+ citations_used: list[str] = None # type: ignore[assignment]
192
+ word_count: int = 0
193
+
194
+ def __post_init__(self):
195
+ if self.citations_used is None:
196
+ self.citations_used = []
197
+ if not self.word_count and self.text:
198
+ self.word_count = len(self.text.split())
199
+
200
+
201
+ @dataclass
202
+ class ResearchConfig:
203
+ """Tuneable knobs for the research pipeline."""
204
+
205
+ max_search_results: int = 30
206
+ min_references: int = 20
207
+ max_papers_to_read: int = 20
208
+ max_reread_loops: int = 2
209
+ api_delay_seconds: float = 0.5
210
+ quality_level: str = "full" # "full" or "lite" (for weaker models)
211
+ verbose: bool = False
212
+ min_total_words: int = 4000
213
+ max_total_words: int = 15000
214
+ target_words_per_section: int = 1000
215
+ max_expand_passes: int = 4
216
+ web_search: bool = True
217
+ pipeline_mode: str = "paragraph" # "paragraph" (per-paragraph) | "section" (per-section legacy)
218
+ # Per-section token limits (override _SECTION_TOKEN_LIMITS defaults)
219
+ section_token_limits: dict | None = None
220
+ # Per-section word targets (override _SECTION_WORD_TARGETS defaults)
221
+ section_word_targets: dict | None = None
222
+ # Per-section word minimums (override _SECTION_WORD_MINIMUMS defaults)
223
+ section_word_minimums: dict | None = None
224
+ # Adversarial review loop (harness engineering pattern)
225
+ adversarial_review_enabled: bool = True
226
+ adversarial_max_cycles: int = 2
227
+ adversarial_fix_majors: bool = True # fix MAJOR findings too, not just FATAL
228
+ # Paragraph-level writing (pipeline_mode="paragraph")
229
+ paragraph_stitch: bool = True # enable transition smoothing between paragraphs
230
+ paragraph_target_words: int = 160 # default per paragraph
231
+ # Novelty check (inspired by AI Scientist-v2)
232
+ novelty_check_enabled: bool = True
233
+ novelty_similarity_threshold: float = 0.7
234
+ # Structured reflection pass
235
+ structured_reflection_enabled: bool = True
236
+ # Citation gap fill during writing
237
+ citation_gap_fill_enabled: bool = True
238
+ max_gap_fills_per_section: int = 3
239
+ # Citation justification audit
240
+ citation_justification_audit: bool = True
241
+ # Review model routing
242
+ review_model: str | None = None # optional separate model for review passes
243
+ review_provider: str | None = None # optional separate provider for review passes
244
+
245
+
246
+ @dataclass
247
+ class ReviewFinding:
248
+ """A single finding from the adversarial review."""
249
+
250
+ severity: str # "FATAL", "MAJOR", "MINOR"
251
+ category: str # e.g. "citation_mismatch", "fabrication", "overclaiming"
252
+ section: str # affected section name
253
+ quote: str # exact text from the paper
254
+ problem: str # what is wrong
255
+ suggested_fix: str # how to fix it
256
+ resolved: bool = False
257
+
258
+
259
+ @dataclass
260
+ class AdversarialReviewReport:
261
+ """Result of one adversarial review cycle."""
262
+
263
+ cycle: int
264
+ findings: list # list[ReviewFinding]
265
+
266
+ @property
267
+ def fatal_count(self) -> int:
268
+ return sum(1 for f in self.findings if f.severity == "FATAL")
269
+
270
+ @property
271
+ def major_count(self) -> int:
272
+ return sum(1 for f in self.findings if f.severity == "MAJOR")
273
+
274
+ @property
275
+ def minor_count(self) -> int:
276
+ return sum(1 for f in self.findings if f.severity == "MINOR")
277
+
278
+ @property
279
+ def needs_fixes(self) -> bool:
280
+ return self.fatal_count > 0