@fbraza/pi-cite 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,401 @@
1
+ """
2
+ Preclinical Experiment Extraction Module
3
+
4
+ Parse abstracts to extract structured in vitro and in vivo experiment details.
5
+ Uses keyword-based extraction to identify cell lines, assays, animal models,
6
+ endpoints, and key findings from each paper.
7
+ """
8
+
9
+ import re
10
+ import os
11
+ from typing import List, Dict, Tuple
12
+ import pandas as pd
13
+
14
+
15
+ # ---------------------------------------------------------------------------
16
+ # Keyword dictionaries
17
+ # ---------------------------------------------------------------------------
18
+
19
+ # In vitro indicators
20
+ IN_VITRO_KEYWORDS = [
21
+ "cell line", "cell lines", "cell culture", "in vitro", "cultured cells",
22
+ "transfect", "transduct", "knockdown", "overexpress", "overexpression",
23
+ "siRNA", "shRNA", "CRISPR", "sgRNA",
24
+ "co-culture", "monolayer", "spheroid", "organoid",
25
+ ]
26
+
27
+ # Common cell line names (case-insensitive matching handled separately)
28
+ CELL_LINE_NAMES = [
29
+ "MCF-7", "MCF7", "MDA-MB-231", "MDA-MB-468", "T47D", "BT-474", "BT474",
30
+ "BT-549", "BT549", "MDA-MB-453", "CAL-51", "HCC1937", "HCC1806",
31
+ "SK-BR-3", "SKBR3", "ZR-75", "4T1", "EMT6",
32
+ "HeLa", "HEK293", "HEK-293", "293T", "HEK293T",
33
+ "A549", "H1299", "H460", "H1975", "PC9", "HCC827",
34
+ "HCT116", "HT29", "SW480", "SW620", "LoVo", "Caco-2",
35
+ "U87", "U251", "T98G", "LN229",
36
+ "PC3", "PC-3", "LNCaP", "DU145", "22Rv1", "VCaP",
37
+ "K562", "HL60", "HL-60", "Jurkat", "THP-1", "U937",
38
+ "HepG2", "Hep3B", "Huh7", "SMMC-7721",
39
+ "PANC-1", "MiaPaCa-2", "BxPC-3", "AsPC-1",
40
+ "A375", "SK-MEL-28", "B16", "B16F10",
41
+ "OVCAR3", "SKOV3", "A2780",
42
+ "CHO", "NIH3T3", "3T3", "COS-7",
43
+ "Raji", "Ramos", "Daudi",
44
+ "SH-SY5Y", "Neuro-2a", "N2a",
45
+ "RAW264.7", "RAW 264.7", "J774",
46
+ ]
47
+
48
+ # Assay keyword categories
49
+ ASSAY_KEYWORDS = {
50
+ "viability": [
51
+ "viability", "MTT", "CCK-8", "CCK8", "WST", "cell counting",
52
+ "CellTiter", "MTS", "XTT", "alamarBlue", "resazurin",
53
+ "cytotoxicity", "IC50", "EC50", "dose-response",
54
+ ],
55
+ "proliferation": [
56
+ "proliferation", "colony formation", "clonogenic", "BrdU", "EdU",
57
+ "Ki-67", "Ki67", "cell growth", "growth curve", "doubling time",
58
+ ],
59
+ "apoptosis": [
60
+ "apoptosis", "annexin", "caspase", "TUNEL", "cell death",
61
+ "sub-G1", "programmed cell death", "Bcl-2", "BAX",
62
+ "cleaved PARP", "cytochrome c release",
63
+ ],
64
+ "migration_invasion": [
65
+ "migration", "invasion", "wound healing", "transwell", "Boyden",
66
+ "scratch assay", "chemotaxis", "Matrigel",
67
+ ],
68
+ "gene_expression": [
69
+ "qPCR", "RT-PCR", "real-time PCR", "qRT-PCR",
70
+ "mRNA expression", "RNA-seq", "RNAseq", "transcriptom",
71
+ "gene expression", "Northern blot",
72
+ ],
73
+ "protein_analysis": [
74
+ "Western blot", "immunoblot", "ELISA", "immunoprecipitation",
75
+ "phosphorylation", "Co-IP", "pull-down", "mass spectrometry",
76
+ "proteomics", "immunofluorescence",
77
+ ],
78
+ "flow_cytometry": [
79
+ "flow cytometry", "FACS", "cell cycle", "cell sorting",
80
+ "intracellular staining", "surface marker",
81
+ ],
82
+ "reporter": [
83
+ "luciferase", "reporter assay", "GFP", "fluorescent reporter",
84
+ "dual-luciferase", "beta-galactosidase",
85
+ ],
86
+ "cell_signaling": [
87
+ "signaling assay", "signaling pathway analysis",
88
+ "phospho-", "kinase activity", "kinase assay",
89
+ "pathway activation assay", "phosphoproteomics",
90
+ ],
91
+ }
92
+
93
+ # In vivo indicators
94
+ IN_VIVO_KEYWORDS = [
95
+ "in vivo", "mouse", "mice", "murine", "rat", "rats",
96
+ "xenograft", "allograft", "PDX", "patient-derived xenograft",
97
+ "orthotopic", "subcutaneous", "tumor-bearing",
98
+ "nude mice", "BALB/c", "C57BL/6", "SCID", "NSG", "NOD",
99
+ "transgenic", "knockout mice", "knock-in",
100
+ "animal model", "animal experiment", "preclinical model",
101
+ ]
102
+
103
+ # Animal model categories
104
+ ANIMAL_MODEL_KEYWORDS = {
105
+ "xenograft": [
106
+ "xenograft", "subcutaneous tumor", "subcutaneous implant",
107
+ "orthotopic implant", "orthotopic model",
108
+ "human tumor", "nude mice xenograft",
109
+ ],
110
+ "pdx": [
111
+ "PDX", "patient-derived xenograft", "patient-derived model",
112
+ ],
113
+ "syngeneic": [
114
+ "syngeneic", "allograft", "immunocompetent",
115
+ "4T1", "CT26", "B16", "MC38", "LLC", "EMT6",
116
+ ],
117
+ "transgenic": [
118
+ "transgenic", "knockout", "knock-in", "conditional knockout",
119
+ "Cre-lox", "GEMM", "genetically engineered",
120
+ ],
121
+ "metastasis": [
122
+ "metastasis model", "tail vein injection", "intracardiac",
123
+ "metastatic", "lung metastasis", "liver metastasis",
124
+ "spontaneous metastasis",
125
+ ],
126
+ }
127
+
128
+ # In vivo endpoint categories
129
+ ENDPOINT_KEYWORDS = {
130
+ "tumor_growth": [
131
+ "tumor volume", "tumor growth", "tumor weight", "tumor size",
132
+ "tumor regression", "tumor inhibition", "anti-tumor",
133
+ "tumor growth inhibition", "TGI",
134
+ ],
135
+ "survival": [
136
+ "survival", "overall survival", "Kaplan-Meier",
137
+ "median survival", "survival rate", "lifespan",
138
+ ],
139
+ "biomarker": [
140
+ "biomarker", "serum level", "plasma level", "circulating",
141
+ "pharmacodynamic", "PD marker",
142
+ ],
143
+ "imaging": [
144
+ "bioluminescence", "in vivo imaging", "PET", "MRI", "CT scan",
145
+ "IVIS", "fluorescence imaging", "ultrasound",
146
+ ],
147
+ "histology": [
148
+ "histology", "immunohistochemistry", "IHC", "H&E",
149
+ "histopathology", "tissue staining", "TUNEL staining",
150
+ ],
151
+ "pharmacokinetics": [
152
+ "pharmacokinetic", "PK", "half-life", "bioavailability",
153
+ "AUC", "Cmax", "clearance", "distribution",
154
+ ],
155
+ "toxicity": [
156
+ "toxicity", "body weight", "adverse", "tolerability",
157
+ "maximum tolerated dose", "MTD", "safety", "organ toxicity",
158
+ ],
159
+ }
160
+
161
+ # Finding / result keywords
162
+ FINDING_KEYWORDS = [
163
+ "significantly", "inhibited", "reduced", "suppressed", "attenuated",
164
+ "enhanced", "increased", "promoted", "induced", "abolished",
165
+ "demonstrated", "showed", "revealed", "observed",
166
+ "decreased", "elevated", "impaired", "restored", "abrogated",
167
+ "potentiated", "synergistic", "additive", "antagonistic",
168
+ ]
169
+
170
+
171
+ # ---------------------------------------------------------------------------
172
+ # Public API
173
+ # ---------------------------------------------------------------------------
174
+
175
+ def extract_all_experiments(
176
+ results: List[Dict],
177
+ output_dir: str = "preclinical_results"
178
+ ) -> List[Dict]:
179
+ """
180
+ Extract in vitro and in vivo experiment details from all paper abstracts.
181
+
182
+ Parameters
183
+ ----------
184
+ results : List[Dict]
185
+ Search results from preclinical_search
186
+ output_dir : str
187
+ Output directory for experiment_extraction.csv
188
+
189
+ Returns
190
+ -------
191
+ List[Dict]
192
+ List of experiment extraction dicts, one per paper
193
+
194
+ Verification
195
+ ------------
196
+ Prints "✓ Experiment extraction completed successfully!"
197
+ """
198
+ print("\n" + "=" * 70)
199
+ print("EXTRACTING EXPERIMENTS FROM ABSTRACTS")
200
+ print("=" * 70)
201
+
202
+ experiments = []
203
+ in_vitro_count = 0
204
+ in_vivo_count = 0
205
+ both_count = 0
206
+
207
+ for i, paper in enumerate(results):
208
+ abstract = paper.get("abstract", "")
209
+ title = paper.get("title", "")
210
+ text = f"{title} {abstract}"
211
+
212
+ # Classify experiment type
213
+ exp_type = _classify_experiment_type(text)
214
+
215
+ # Extract details
216
+ in_vitro = _extract_in_vitro(text)
217
+ in_vivo = _extract_in_vivo(text)
218
+ findings = _extract_findings(text)
219
+
220
+ experiment = {
221
+ "pmid": paper.get("pmid", ""),
222
+ "doi": paper.get("doi", ""),
223
+ "title": paper.get("title", ""),
224
+ "authors": paper.get("authors", ""),
225
+ "publication_date": paper.get("publication_date", ""),
226
+ "experiment_type": exp_type,
227
+ # In vitro details
228
+ "cell_lines": "; ".join(in_vitro["cell_lines"]) if in_vitro["cell_lines"] else "",
229
+ "assays": "; ".join(in_vitro["assays"]) if in_vitro["assays"] else "",
230
+ "in_vitro_findings": " | ".join(in_vitro["findings"][:3]) if in_vitro["findings"] else "",
231
+ # In vivo details
232
+ "animal_models": "; ".join(in_vivo["animal_models"]) if in_vivo["animal_models"] else "",
233
+ "endpoints": "; ".join(in_vivo["endpoints"]) if in_vivo["endpoints"] else "",
234
+ "in_vivo_findings": " | ".join(in_vivo["findings"][:3]) if in_vivo["findings"] else "",
235
+ # General findings
236
+ "key_findings": " | ".join(findings[:3]) if findings else "",
237
+ }
238
+
239
+ experiments.append(experiment)
240
+
241
+ if exp_type == "in_vitro":
242
+ in_vitro_count += 1
243
+ elif exp_type == "in_vivo":
244
+ in_vivo_count += 1
245
+ elif exp_type == "both":
246
+ both_count += 1
247
+
248
+ # Summary
249
+ unclassified = len(experiments) - in_vitro_count - in_vivo_count - both_count
250
+ print(f"\n Processed {len(experiments)} papers:")
251
+ print(f" In vitro only: {in_vitro_count}")
252
+ print(f" In vivo only: {in_vivo_count}")
253
+ print(f" Both: {both_count}")
254
+ print(f" Unclassified: {unclassified}")
255
+
256
+ # Save CSV
257
+ os.makedirs(output_dir, exist_ok=True)
258
+ output_file = os.path.join(output_dir, "experiment_extraction.csv")
259
+ df = pd.DataFrame(experiments)
260
+ df.to_csv(output_file, index=False, encoding="utf-8")
261
+ print(f"\n Saved extraction results to {output_file}")
262
+
263
+ print(f"\n✓ Experiment extraction completed successfully!")
264
+ return experiments
265
+
266
+
267
+ # ---------------------------------------------------------------------------
268
+ # Internal helpers
269
+ # ---------------------------------------------------------------------------
270
+
271
+ def _keyword_match(text: str, keywords: list, case_sensitive: bool = False) -> bool:
272
+ """Check if any keyword matches as a whole word in text."""
273
+ for kw in keywords:
274
+ if case_sensitive:
275
+ pattern = r'\b' + re.escape(kw) + r'\b'
276
+ if re.search(pattern, text):
277
+ return True
278
+ else:
279
+ pattern = r'\b' + re.escape(kw.lower()) + r'\b'
280
+ if re.search(pattern, text.lower()):
281
+ return True
282
+ return False
283
+
284
+
285
+ def _classify_experiment_type(text: str) -> str:
286
+ """Classify whether a paper describes in vitro, in vivo, or both experiments."""
287
+ has_in_vitro = _keyword_match(text, IN_VITRO_KEYWORDS)
288
+ has_in_vivo = _keyword_match(text, IN_VIVO_KEYWORDS)
289
+
290
+ # Also check for cell line names (case-sensitive for some)
291
+ if not has_in_vitro:
292
+ has_in_vitro = any(re.search(r'\b' + re.escape(cl) + r'\b', text) for cl in CELL_LINE_NAMES)
293
+
294
+ if has_in_vitro and has_in_vivo:
295
+ return "both"
296
+ elif has_in_vitro:
297
+ return "in_vitro"
298
+ elif has_in_vivo:
299
+ return "in_vivo"
300
+ else:
301
+ return "unclassified"
302
+
303
+
304
+ def _extract_in_vitro(text: str) -> Dict:
305
+ """Extract in vitro experiment details from text."""
306
+ result = {
307
+ "cell_lines": [],
308
+ "assays": [],
309
+ "findings": [],
310
+ }
311
+
312
+ # Detect cell lines (word boundary matching to avoid partial matches)
313
+ found_cell_lines = set()
314
+ for cl_name in CELL_LINE_NAMES:
315
+ pattern = r'\b' + re.escape(cl_name) + r'\b'
316
+ if re.search(pattern, text):
317
+ found_cell_lines.add(cl_name)
318
+
319
+ result["cell_lines"] = sorted(found_cell_lines)
320
+
321
+ # Detect assay types
322
+ found_assays = set()
323
+ for assay_category, keywords in ASSAY_KEYWORDS.items():
324
+ if _keyword_match(text, keywords):
325
+ found_assays.add(assay_category)
326
+
327
+ result["assays"] = sorted(found_assays)
328
+
329
+ # Extract in vitro finding sentences
330
+ result["findings"] = _extract_finding_sentences(text, IN_VITRO_KEYWORDS)
331
+
332
+ return result
333
+
334
+
335
+ def _extract_in_vivo(text: str) -> Dict:
336
+ """Extract in vivo experiment details from text."""
337
+ result = {
338
+ "animal_models": [],
339
+ "endpoints": [],
340
+ "findings": [],
341
+ }
342
+
343
+ # Detect animal model types
344
+ found_models = set()
345
+ for model_category, keywords in ANIMAL_MODEL_KEYWORDS.items():
346
+ if _keyword_match(text, keywords):
347
+ found_models.add(model_category)
348
+
349
+ result["animal_models"] = sorted(found_models)
350
+
351
+ # Detect endpoints
352
+ found_endpoints = set()
353
+ for endpoint_category, keywords in ENDPOINT_KEYWORDS.items():
354
+ if _keyword_match(text, keywords):
355
+ found_endpoints.add(endpoint_category)
356
+
357
+ result["endpoints"] = sorted(found_endpoints)
358
+
359
+ # Extract in vivo finding sentences
360
+ result["findings"] = _extract_finding_sentences(text, IN_VIVO_KEYWORDS)
361
+
362
+ return result
363
+
364
+
365
+ def _extract_findings(text: str) -> List[str]:
366
+ """Extract general key finding sentences from text."""
367
+ sentences = re.split(r'(?<=[.!?])\s+', text)
368
+ findings = []
369
+
370
+ for sentence in sentences:
371
+ sentence = sentence.strip()
372
+ if len(sentence.split()) < 8:
373
+ continue
374
+
375
+ if _keyword_match(sentence, FINDING_KEYWORDS):
376
+ if len(sentence) > 300:
377
+ sentence = sentence[:297] + "..."
378
+ findings.append(sentence)
379
+
380
+ return findings[:5]
381
+
382
+
383
+ def _extract_finding_sentences(text: str, context_keywords: List[str]) -> List[str]:
384
+ """Extract finding sentences that also mention context keywords."""
385
+ sentences = re.split(r'(?<=[.!?])\s+', text)
386
+ findings = []
387
+
388
+ for sentence in sentences:
389
+ sentence = sentence.strip()
390
+ if len(sentence.split()) < 8:
391
+ continue
392
+
393
+ has_finding = _keyword_match(sentence, FINDING_KEYWORDS)
394
+ has_context = _keyword_match(sentence, context_keywords)
395
+
396
+ if has_finding and has_context:
397
+ if len(sentence) > 300:
398
+ sentence = sentence[:297] + "..."
399
+ findings.append(sentence)
400
+
401
+ return findings[:3]
@@ -0,0 +1,94 @@
1
+ """Generate the unified per-paper summary table for the literature skill."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import csv
6
+ from typing import Dict, Iterable, List
7
+
8
+ from synthesis import classify_evidence_quality, classify_study_type
9
+
10
+
11
+ def _authors_year(paper: Dict) -> str:
12
+ authors = paper.get("authors") or []
13
+ if isinstance(authors, str):
14
+ authors = [a.strip() for a in authors.split(";") if a.strip()]
15
+ lead = authors[0] if authors else "Unknown"
16
+ if len(authors) > 1 and "et al." not in lead:
17
+ lead = f"{lead} et al."
18
+ year = paper.get("year") or paper.get("publication_date", "")[:4] or "n.d."
19
+ return f"{lead} ({year})"
20
+
21
+
22
+ def _identifier(paper: Dict) -> str:
23
+ if paper.get("pmid"):
24
+ return f"PMID:{paper['pmid']}"
25
+ if paper.get("doi"):
26
+ return paper["doi"]
27
+ return "NA"
28
+
29
+
30
+ def _truncate(text: str, limit: int = 160) -> str:
31
+ text = " ".join(str(text or "").split())
32
+ if len(text) <= limit:
33
+ return text
34
+ return text[: limit - 1].rstrip() + "…"
35
+
36
+
37
+ def build_table_rows(papers: List[Dict], experiments: List[Dict] | None = None, mode: str = "general") -> List[Dict]:
38
+ experiment_map = {}
39
+ for exp in experiments or []:
40
+ key = exp.get("pmid") or exp.get("doi")
41
+ if key:
42
+ experiment_map[key] = exp
43
+
44
+ rows = []
45
+ for idx, paper in enumerate(papers, start=1):
46
+ key = paper.get("pmid") or paper.get("doi")
47
+ exp = experiment_map.get(key, {})
48
+ row = {
49
+ "#": idx,
50
+ "PMID/DOI": _identifier(paper),
51
+ "Authors (year)": _authors_year(paper),
52
+ "Key Message": _truncate(paper.get("title") or ""),
53
+ "Key Results": _truncate(paper.get("abstract") or exp.get("key_findings") or ""),
54
+ "Key Methods": _truncate(
55
+ "; ".join(filter(None, [
56
+ ", ".join(paper.get("publication_types", [])[:3]) if isinstance(paper.get("publication_types"), list) else "",
57
+ exp.get("assays", ""),
58
+ exp.get("endpoints", ""),
59
+ ]))
60
+ ),
61
+ "Study Type": classify_study_type(paper),
62
+ "Evidence Quality": classify_evidence_quality(paper),
63
+ }
64
+ if mode == "preclinical":
65
+ row.update({
66
+ "Experiment Type": exp.get("experiment_type", ""),
67
+ "Model System": exp.get("cell_lines") or exp.get("animal_models") or "",
68
+ "Assay/Endpoint": "; ".join(filter(None, [exp.get("assays", ""), exp.get("endpoints", "")])),
69
+ "Finding Direction": exp.get("key_findings", ""),
70
+ })
71
+ rows.append(row)
72
+ return rows
73
+
74
+
75
+ def rows_to_markdown(rows: Iterable[Dict]) -> str:
76
+ rows = list(rows)
77
+ if not rows:
78
+ return "## Paper Summary Table\n\n_No papers available._\n"
79
+ headers = list(rows[0].keys())
80
+ out = ["## Paper Summary Table", "", "| " + " | ".join(headers) + " |", "|" + "---|" * len(headers)]
81
+ for row in rows:
82
+ out.append("| " + " | ".join(str(row.get(header, "")).replace("\n", " ") for header in headers) + " |")
83
+ return "\n".join(out) + "\n"
84
+
85
+
86
+ def write_csv(rows: Iterable[Dict], output_path: str) -> None:
87
+ rows = list(rows)
88
+ if not rows:
89
+ return
90
+ headers = list(rows[0].keys())
91
+ with open(output_path, "w", newline="", encoding="utf-8") as handle:
92
+ writer = csv.DictWriter(handle, fieldnames=headers)
93
+ writer.writeheader()
94
+ writer.writerows(rows)
@@ -0,0 +1,94 @@
1
+ """
2
+ Unified literature synthesis helpers.
3
+
4
+ This module combines general literature review summarisation with the
5
+ preclinical extraction summary used by the legacy merged literature workflow.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from collections import Counter
11
+ from typing import Dict, List, Optional
12
+
13
+
14
+ def classify_study_type(paper: Dict) -> str:
15
+ publication_types = [str(x).lower() for x in paper.get("publication_types", [])]
16
+ text = " ".join(publication_types)
17
+ if "meta-analysis" in text or "systematic review" in text:
18
+ return "Systematic review / meta-analysis"
19
+ if "randomized controlled trial" in text:
20
+ return "Randomized controlled trial"
21
+ if "clinical trial" in text:
22
+ return "Clinical study"
23
+ if paper.get("is_preprint"):
24
+ return "Preprint"
25
+
26
+ abstract = f"{paper.get('title', '')} {paper.get('abstract', '')}".lower()
27
+ if any(x in abstract for x in ["xenograft", "mouse", "mice", "in vivo"]):
28
+ if any(y in abstract for y in ["cell line", "in vitro", "organoid"]):
29
+ return "In vitro + in vivo"
30
+ return "In vivo"
31
+ if any(x in abstract for x in ["cell line", "in vitro", "organoid", "crispr"]):
32
+ return "In vitro"
33
+ return "Observational / other"
34
+
35
+
36
+ def classify_evidence_quality(paper: Dict) -> str:
37
+ study_type = classify_study_type(paper)
38
+ if study_type in {"Systematic review / meta-analysis", "Randomized controlled trial"}:
39
+ return "High"
40
+ if study_type in {"Clinical study", "In vitro + in vivo"}:
41
+ return "Moderate"
42
+ if paper.get("is_preprint"):
43
+ return "Preliminary (preprint)"
44
+ if study_type == "In vivo":
45
+ return "Moderate"
46
+ if study_type == "In vitro":
47
+ return "Preliminary"
48
+ return "Preliminary"
49
+
50
+
51
+ def summarize_papers(papers: List[Dict]) -> Dict:
52
+ study_types = Counter(classify_study_type(p) for p in papers)
53
+ evidence = Counter(classify_evidence_quality(p) for p in papers)
54
+ years = [int(p.get("year")) for p in papers if str(p.get("year", "")).isdigit()]
55
+ return {
56
+ "total_papers": len(papers),
57
+ "study_type_breakdown": dict(study_types),
58
+ "evidence_quality_breakdown": dict(evidence),
59
+ "year_range": [min(years), max(years)] if years else None,
60
+ }
61
+
62
+
63
+ def generate_narrative(papers: List[Dict], topic: str = "") -> str:
64
+ summary = summarize_papers(papers)
65
+ lead = f"Literature synthesis for **{topic}**." if topic else "Literature synthesis."
66
+ lines = [lead, "", f"- Papers reviewed: {summary['total_papers']}"]
67
+ if summary["year_range"]:
68
+ lines.append(f"- Year range: {summary['year_range'][0]}-{summary['year_range'][1]}")
69
+ if summary["study_type_breakdown"]:
70
+ lines.append("- Study types: " + ", ".join(f"{k} ({v})" for k, v in summary["study_type_breakdown"].items()))
71
+ if summary["evidence_quality_breakdown"]:
72
+ lines.append("- Evidence quality: " + ", ".join(f"{k} ({v})" for k, v in summary["evidence_quality_breakdown"].items()))
73
+
74
+ top_titles = [p.get("title", "Untitled") for p in papers[:5]]
75
+ if top_titles:
76
+ lines.extend(["", "Top prioritised papers:"])
77
+ lines.extend([f"{i + 1}. {title}" for i, title in enumerate(top_titles)])
78
+ return "\n".join(lines)
79
+
80
+
81
+ def synthesize_literature(
82
+ papers: List[Dict],
83
+ experiments: Optional[List[Dict]] = None,
84
+ topic: str = "",
85
+ mode: str = "general",
86
+ ) -> Dict:
87
+ summary = summarize_papers(papers)
88
+ summary["mode"] = mode
89
+ summary["topic"] = topic
90
+ summary["narrative_markdown"] = generate_narrative(papers, topic=topic)
91
+ if experiments:
92
+ summary["experiment_type_breakdown"] = dict(Counter(e.get("experiment_type", "unclassified") for e in experiments))
93
+ summary["model_systems"] = dict(Counter(filter(None, [e.get("cell_lines") or e.get("animal_models") for e in experiments])))
94
+ return summary
package/src/index.ts CHANGED
@@ -1,12 +1,8 @@
1
1
  import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
2
- import { registerFetchFulltextTool } from "./fulltext.ts";
3
2
  import { registerLiteratureSearchTool } from "./literature-search.ts";
4
3
  import { registerPubmedSearchTool } from "./pubmed.ts";
5
- import { registerSemanticScholarSearchTool } from "./semantic-scholar.ts";
6
4
 
7
5
  export default function literatureToolsExtension(pi: ExtensionAPI) {
8
6
  registerLiteratureSearchTool(pi);
9
7
  registerPubmedSearchTool(pi);
10
- registerSemanticScholarSearchTool(pi);
11
- registerFetchFulltextTool(pi);
12
8
  }