celltype-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. celltype_cli-0.1.0.dist-info/METADATA +267 -0
  2. celltype_cli-0.1.0.dist-info/RECORD +89 -0
  3. celltype_cli-0.1.0.dist-info/WHEEL +4 -0
  4. celltype_cli-0.1.0.dist-info/entry_points.txt +2 -0
  5. celltype_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
  6. ct/__init__.py +3 -0
  7. ct/agent/__init__.py +0 -0
  8. ct/agent/case_studies.py +426 -0
  9. ct/agent/config.py +523 -0
  10. ct/agent/doctor.py +544 -0
  11. ct/agent/knowledge.py +523 -0
  12. ct/agent/loop.py +99 -0
  13. ct/agent/mcp_server.py +478 -0
  14. ct/agent/orchestrator.py +733 -0
  15. ct/agent/runner.py +656 -0
  16. ct/agent/sandbox.py +481 -0
  17. ct/agent/session.py +145 -0
  18. ct/agent/system_prompt.py +186 -0
  19. ct/agent/trace_store.py +228 -0
  20. ct/agent/trajectory.py +169 -0
  21. ct/agent/types.py +182 -0
  22. ct/agent/workflows.py +462 -0
  23. ct/api/__init__.py +1 -0
  24. ct/api/app.py +211 -0
  25. ct/api/config.py +120 -0
  26. ct/api/engine.py +124 -0
  27. ct/cli.py +1448 -0
  28. ct/data/__init__.py +0 -0
  29. ct/data/compute_providers.json +59 -0
  30. ct/data/cro_database.json +395 -0
  31. ct/data/downloader.py +238 -0
  32. ct/data/loaders.py +252 -0
  33. ct/kb/__init__.py +5 -0
  34. ct/kb/benchmarks.py +147 -0
  35. ct/kb/governance.py +106 -0
  36. ct/kb/ingest.py +415 -0
  37. ct/kb/reasoning.py +129 -0
  38. ct/kb/schema_monitor.py +162 -0
  39. ct/kb/substrate.py +387 -0
  40. ct/models/__init__.py +0 -0
  41. ct/models/llm.py +370 -0
  42. ct/tools/__init__.py +195 -0
  43. ct/tools/_compound_resolver.py +297 -0
  44. ct/tools/biomarker.py +368 -0
  45. ct/tools/cellxgene.py +282 -0
  46. ct/tools/chemistry.py +1371 -0
  47. ct/tools/claude.py +390 -0
  48. ct/tools/clinical.py +1153 -0
  49. ct/tools/clue.py +249 -0
  50. ct/tools/code.py +1069 -0
  51. ct/tools/combination.py +397 -0
  52. ct/tools/compute.py +402 -0
  53. ct/tools/cro.py +413 -0
  54. ct/tools/data_api.py +2114 -0
  55. ct/tools/design.py +295 -0
  56. ct/tools/dna.py +575 -0
  57. ct/tools/experiment.py +604 -0
  58. ct/tools/expression.py +655 -0
  59. ct/tools/files.py +957 -0
  60. ct/tools/genomics.py +1387 -0
  61. ct/tools/http_client.py +146 -0
  62. ct/tools/imaging.py +319 -0
  63. ct/tools/intel.py +223 -0
  64. ct/tools/literature.py +743 -0
  65. ct/tools/network.py +422 -0
  66. ct/tools/notification.py +111 -0
  67. ct/tools/omics.py +3330 -0
  68. ct/tools/ops.py +1230 -0
  69. ct/tools/parity.py +649 -0
  70. ct/tools/pk.py +245 -0
  71. ct/tools/protein.py +678 -0
  72. ct/tools/regulatory.py +643 -0
  73. ct/tools/remote_data.py +179 -0
  74. ct/tools/report.py +181 -0
  75. ct/tools/repurposing.py +376 -0
  76. ct/tools/safety.py +1280 -0
  77. ct/tools/shell.py +178 -0
  78. ct/tools/singlecell.py +533 -0
  79. ct/tools/statistics.py +552 -0
  80. ct/tools/structure.py +882 -0
  81. ct/tools/target.py +901 -0
  82. ct/tools/translational.py +123 -0
  83. ct/tools/viability.py +218 -0
  84. ct/ui/__init__.py +0 -0
  85. ct/ui/markdown.py +31 -0
  86. ct/ui/status.py +258 -0
  87. ct/ui/suggestions.py +567 -0
  88. ct/ui/terminal.py +1456 -0
  89. ct/ui/traces.py +112 -0
ct/tools/literature.py ADDED
@@ -0,0 +1,743 @@
1
+ """
2
+ Literature and database tools: PubMed, OpenAlex, ChEMBL API queries.
3
+
4
+ These are REST API wrappers -- no local data required.
5
+ """
6
+
7
+ import re as _re
8
+
9
+ from ct.tools import registry
10
+ from ct.tools.http_client import request, request_json
11
+
12
+
13
+ def _normalize_pubmed_query(query: str) -> str:
14
+ """Normalize a PubMed query for NCBI E-utilities.
15
+
16
+ - Uppercase standalone boolean operators (and→AND, or→OR, not→NOT)
17
+ - Preserve text inside quoted phrases
18
+ - Normalize whitespace
19
+ """
20
+ # Split on quoted phrases to preserve them
21
+ parts = _re.split(r'(".*?")', query)
22
+ normalized = []
23
+ for i, part in enumerate(parts):
24
+ if part.startswith('"'):
25
+ # Quoted phrase — keep as-is
26
+ normalized.append(part)
27
+ else:
28
+ # Uppercase standalone boolean operators
29
+ part = _re.sub(r'\b(and)\b', 'AND', part, flags=_re.IGNORECASE)
30
+ part = _re.sub(r'\b(or)\b', 'OR', part, flags=_re.IGNORECASE)
31
+ part = _re.sub(r'\b(not)\b', 'NOT', part, flags=_re.IGNORECASE)
32
+ normalized.append(part)
33
+ result = "".join(normalized)
34
+ # Normalize whitespace
35
+ return " ".join(result.split())
36
+
37
+
38
+ def _simplify_query(query: str) -> list[str]:
39
+ """Generate progressively simpler queries by dropping terms.
40
+
41
+ PubMed ANDs all terms by default, so long queries (8+ terms) often return
42
+ zero results. We try shorter versions as fallbacks.
43
+ """
44
+ # Remove parenthesized groups and quoted phrases for counting
45
+ clean = _re.sub(r'\([^)]*\)', '', query)
46
+ clean = _re.sub(r'"[^"]*"', '', clean)
47
+ # Split on whitespace, ignoring boolean operators
48
+ words = [w for w in query.split() if w.upper() not in ("AND", "OR", "NOT")]
49
+
50
+ if len(words) <= 4:
51
+ return [] # Already short enough
52
+
53
+ # Try keeping just the most distinctive terms (drop common qualifiers)
54
+ # Strategy: take first N words from the original query
55
+ shorter = []
56
+ if len(words) > 6:
57
+ shorter.append(" ".join(words[:5]))
58
+ if len(words) > 4:
59
+ shorter.append(" ".join(words[:3]))
60
+ return shorter
61
+
62
+
63
+ @registry.register(
64
+ name="literature.pubmed_search",
65
+ description="Search PubMed for publications via NCBI E-utilities API",
66
+ category="literature",
67
+ parameters={
68
+ "query": "Search query (e.g. 'molecular glue degrader CRBN')",
69
+ "max_results": "Maximum number of results (default 20)",
70
+ },
71
+ usage_guide="You need recent publications on a target, compound, or mechanism. Use to support or challenge computational findings with published evidence.",
72
+ )
73
+ def pubmed_search(query: str, max_results: int = 20, **kwargs) -> dict:
74
+ """Search PubMed using NCBI E-utilities (ESearch + ESummary)."""
75
+ try:
76
+ import httpx
77
+ except ImportError:
78
+ return {"error": "httpx required (pip install httpx)", "summary": "httpx required (pip install httpx)"}
79
+ base = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
80
+
81
+ # Step 1: ESearch to get PMIDs
82
+ search_url = f"{base}/esearch.fcgi"
83
+ normalized = _normalize_pubmed_query(query)
84
+ params = {
85
+ "db": "pubmed",
86
+ "term": normalized,
87
+ "retmax": max_results,
88
+ "retmode": "json",
89
+ "sort": "relevance",
90
+ }
91
+
92
+ search_data, error = request_json(
93
+ "GET",
94
+ search_url,
95
+ params=params,
96
+ timeout=30,
97
+ retries=2,
98
+ )
99
+ if error:
100
+ return {"error": f"PubMed search failed: {error}", "summary": f"PubMed search failed: {error}"}
101
+ result = search_data.get("esearchresult", {})
102
+ pmids = result.get("idlist", [])
103
+ total_count = int(result.get("count", 0))
104
+
105
+ # If no results with a long query, retry with progressively simpler versions
106
+ used_query = query
107
+ if not pmids:
108
+ for simpler in _simplify_query(query):
109
+ params["term"] = _normalize_pubmed_query(simpler)
110
+ search_data, fallback_error = request_json(
111
+ "GET",
112
+ search_url,
113
+ params=params,
114
+ timeout=30,
115
+ retries=2,
116
+ )
117
+ if fallback_error:
118
+ continue
119
+ result = search_data.get("esearchresult", {})
120
+ pmids = result.get("idlist", [])
121
+ total_count = int(result.get("count", 0))
122
+ if pmids:
123
+ used_query = simpler
124
+ break
125
+
126
+ if not pmids:
127
+ return {"summary": f"No results for '{query}'", "total_count": 0, "articles": []}
128
+
129
+ # Step 2: ESummary for article details
130
+ summary_url = f"{base}/esummary.fcgi"
131
+ params = {
132
+ "db": "pubmed",
133
+ "id": ",".join(pmids),
134
+ "retmode": "json",
135
+ }
136
+
137
+ summary_data, error = request_json(
138
+ "GET",
139
+ summary_url,
140
+ params=params,
141
+ timeout=30,
142
+ retries=2,
143
+ )
144
+ if error:
145
+ return {"error": f"PubMed summary failed: {error}", "summary": f"PubMed summary failed: {error}"}
146
+ articles = []
147
+ for pmid in pmids:
148
+ info = summary_data.get("result", {}).get(pmid, {})
149
+ if not info or pmid == "uids":
150
+ continue
151
+
152
+ authors = info.get("authors", [])
153
+ first_author = authors[0].get("name", "") if authors else ""
154
+
155
+ articles.append({
156
+ "pmid": pmid,
157
+ "title": info.get("title", ""),
158
+ "first_author": first_author,
159
+ "journal": info.get("source", ""),
160
+ "pub_date": info.get("pubdate", ""),
161
+ "doi": next((a.get("value", "") for a in info.get("articleids", [])
162
+ if a.get("idtype") == "doi"), ""),
163
+ })
164
+
165
+ summary = f"PubMed search '{used_query}': {total_count} total, showing {len(articles)}"
166
+ if used_query != query:
167
+ summary += f" (simplified from: '{query}')"
168
+
169
+ return {
170
+ "summary": summary,
171
+ "query": used_query,
172
+ "original_query": query,
173
+ "total_count": total_count,
174
+ "articles": articles,
175
+ }
176
+
177
+
178
+ @registry.register(
179
+ name="literature.chembl_query",
180
+ description="Query ChEMBL for compound bioactivity, targets, and SAR data",
181
+ category="literature",
182
+ parameters={
183
+ "query": "Compound name, SMILES, or ChEMBL ID",
184
+ "query_type": "'molecule', 'target', 'activity', or 'similarity'",
185
+ "max_results": "Maximum results (default 20)",
186
+ },
187
+ usage_guide="You want to look up known bioactivity data, find related compounds, or check if a target has known ligands. Use ChEMBL for chemical and pharmacological context.",
188
+ )
189
+ def chembl_query(query: str, query_type: str = "molecule", max_results: int = 20, **kwargs) -> dict:
190
+ """Query ChEMBL database for compound/target/activity data."""
191
+ try:
192
+ import httpx
193
+ except ImportError:
194
+ return {"error": "httpx required (pip install httpx)", "summary": "httpx required (pip install httpx)"}
195
+ query = str(query or "").strip()
196
+ query_type_raw = str(query_type or "molecule").strip().lower()
197
+ query_type_aliases = {
198
+ "compound": "molecule",
199
+ "drug": "molecule",
200
+ "molecules": "molecule",
201
+ "compounds": "molecule",
202
+ "protein": "target",
203
+ "gene": "target",
204
+ "bioactivity": "activity",
205
+ "activities": "activity",
206
+ "similar": "similarity",
207
+ }
208
+ query_type = query_type_aliases.get(query_type_raw, query_type_raw)
209
+ base = "https://www.ebi.ac.uk/chembl/api/data"
210
+ headers = {"Accept": "application/json"}
211
+
212
+ # Accept common aliases
213
+ if query_type == "compound":
214
+ query_type = "molecule"
215
+
216
+ try:
217
+ if query_type == "molecule":
218
+ url = f"{base}/molecule/search.json"
219
+ params = {"q": query, "limit": max_results}
220
+ data, error = request_json(
221
+ "GET",
222
+ url,
223
+ params=params,
224
+ headers=headers,
225
+ timeout=30,
226
+ retries=2,
227
+ )
228
+ if error:
229
+ return {"error": f"ChEMBL query failed: {error}", "summary": f"ChEMBL query failed: {error}"}
230
+ molecules = []
231
+ for mol in data.get("molecules", []):
232
+ props = mol.get("molecule_properties", {}) or {}
233
+ molecules.append({
234
+ "chembl_id": mol.get("molecule_chembl_id", ""),
235
+ "pref_name": mol.get("pref_name", ""),
236
+ "molecule_type": mol.get("molecule_type", ""),
237
+ "max_phase": mol.get("max_phase", 0),
238
+ "mw": props.get("full_mwt"),
239
+ "logp": props.get("alogp"),
240
+ "smiles": (mol.get("molecule_structures", {}) or {}).get("canonical_smiles", ""),
241
+ })
242
+
243
+ return {
244
+ "summary": f"ChEMBL molecule search '{query}': {len(molecules)} hits",
245
+ "query": query,
246
+ "molecules": molecules,
247
+ }
248
+
249
+ elif query_type == "target":
250
+ url = f"{base}/target/search.json"
251
+ params = {"q": query, "limit": max_results}
252
+ data, error = request_json(
253
+ "GET",
254
+ url,
255
+ params=params,
256
+ headers=headers,
257
+ timeout=30,
258
+ retries=2,
259
+ )
260
+ if error:
261
+ return {"error": f"ChEMBL query failed: {error}", "summary": f"ChEMBL query failed: {error}"}
262
+ targets = []
263
+ for tgt in data.get("targets", []):
264
+ targets.append({
265
+ "chembl_id": tgt.get("target_chembl_id", ""),
266
+ "pref_name": tgt.get("pref_name", ""),
267
+ "organism": tgt.get("organism", ""),
268
+ "target_type": tgt.get("target_type", ""),
269
+ })
270
+
271
+ return {
272
+ "summary": f"ChEMBL target search '{query}': {len(targets)} hits",
273
+ "query": query,
274
+ "targets": targets,
275
+ }
276
+
277
+ elif query_type == "activity":
278
+ # Support both target and molecule ChEMBL IDs
279
+ # If query starts with CHEMBL, determine if it's a target or molecule
280
+ # Also support compound names: resolve to molecule ChEMBL ID first
281
+ molecule_id = None
282
+ target_id = None
283
+
284
+ if query.startswith("CHEMBL"):
285
+ # Could be target or molecule — try molecule activity first
286
+ molecule_id = query
287
+ else:
288
+ # Try to resolve compound name to ChEMBL molecule ID
289
+ search_url = f"{base}/molecule/search.json"
290
+ search_params = {"q": query, "limit": 5}
291
+ search_data, search_error = request_json(
292
+ "GET",
293
+ search_url,
294
+ params=search_params,
295
+ headers=headers,
296
+ timeout=30,
297
+ retries=2,
298
+ )
299
+ if not search_error:
300
+ mols = search_data.get("molecules", [])
301
+ if mols:
302
+ molecule_id = mols[0].get("molecule_chembl_id", "")
303
+
304
+ # Query activities by molecule ChEMBL ID
305
+ activities = []
306
+ if molecule_id:
307
+ url = f"{base}/activity.json"
308
+ params = {
309
+ "molecule_chembl_id": molecule_id,
310
+ "limit": max_results,
311
+ }
312
+ data, error = request_json(
313
+ "GET",
314
+ url,
315
+ params=params,
316
+ headers=headers,
317
+ timeout=30,
318
+ retries=2,
319
+ )
320
+ if not error:
321
+ for act in data.get("activities", []):
322
+ activities.append({
323
+ "molecule_chembl_id": act.get("molecule_chembl_id", ""),
324
+ "molecule_name": act.get("molecule_pref_name", ""),
325
+ "target_chembl_id": act.get("target_chembl_id", ""),
326
+ "target_name": act.get("target_pref_name", ""),
327
+ "standard_type": act.get("standard_type", ""),
328
+ "standard_value": act.get("standard_value"),
329
+ "standard_units": act.get("standard_units", ""),
330
+ "pchembl_value": act.get("pchembl_value"),
331
+ "assay_type": act.get("assay_type", ""),
332
+ "assay_description": (act.get("assay_description", "") or "")[:200],
333
+ })
334
+
335
+ # If no results from molecule lookup, try target lookup
336
+ if not activities:
337
+ target_id = query if query.startswith("CHEMBL") else None
338
+ if target_id:
339
+ url = f"{base}/activity.json"
340
+ params = {
341
+ "target_chembl_id": target_id,
342
+ "limit": max_results,
343
+ "standard_type__in": "IC50,Ki,Kd,EC50",
344
+ }
345
+ data, error = request_json(
346
+ "GET",
347
+ url,
348
+ params=params,
349
+ headers=headers,
350
+ timeout=30,
351
+ retries=2,
352
+ )
353
+ if not error:
354
+ for act in data.get("activities", []):
355
+ activities.append({
356
+ "molecule_chembl_id": act.get("molecule_chembl_id", ""),
357
+ "molecule_name": act.get("molecule_pref_name", ""),
358
+ "target_chembl_id": act.get("target_chembl_id", ""),
359
+ "target_name": act.get("target_pref_name", ""),
360
+ "standard_type": act.get("standard_type", ""),
361
+ "standard_value": act.get("standard_value"),
362
+ "standard_units": act.get("standard_units", ""),
363
+ "pchembl_value": act.get("pchembl_value"),
364
+ "assay_type": act.get("assay_type", ""),
365
+ "assay_description": (act.get("assay_description", "") or "")[:200],
366
+ })
367
+
368
+ resolved_id = molecule_id or target_id or query
369
+ return {
370
+ "summary": f"ChEMBL activities for {query} ({resolved_id}): {len(activities)} results",
371
+ "query": query,
372
+ "chembl_id": resolved_id,
373
+ "activities": activities,
374
+ }
375
+
376
+ elif query_type == "similarity":
377
+ url = f"{base}/similarity/{query}/70.json"
378
+ params = {"limit": max_results}
379
+ data, error = request_json(
380
+ "GET",
381
+ url,
382
+ params=params,
383
+ headers=headers,
384
+ timeout=30,
385
+ retries=2,
386
+ )
387
+ if error:
388
+ return {"error": f"ChEMBL query failed: {error}", "summary": f"ChEMBL query failed: {error}"}
389
+ hits = []
390
+ for mol in data.get("molecules", []):
391
+ hits.append({
392
+ "chembl_id": mol.get("molecule_chembl_id", ""),
393
+ "pref_name": mol.get("pref_name", ""),
394
+ "similarity": mol.get("similarity", 0),
395
+ "smiles": (mol.get("molecule_structures", {}) or {}).get("canonical_smiles", ""),
396
+ })
397
+
398
+ return {
399
+ "summary": f"ChEMBL similarity search: {len(hits)} hits (>70% similar)",
400
+ "query": query,
401
+ "hits": hits,
402
+ }
403
+
404
+ else:
405
+ return {"error": f"Unknown query_type: {query_type_raw}. Use 'molecule', 'target', 'activity', or 'similarity'", "summary": f"Unknown query_type: {query_type_raw}. Use 'molecule', 'target', 'activity', or 'similarity'"}
406
+ except Exception as e:
407
+ return {"error": f"ChEMBL query failed: {e}", "summary": f"ChEMBL query failed: {e}"}
408
+ @registry.register(
409
+ name="literature.openalex_search",
410
+ description="Search OpenAlex for academic publications with citation data and open access links",
411
+ category="literature",
412
+ parameters={
413
+ "query": "Search query",
414
+ "max_results": "Maximum results (default 20)",
415
+ },
416
+ usage_guide="You want academic publications with citation metrics and open access links. Broader than PubMed — covers all scientific literature. Use for comprehensive literature reviews.",
417
+ )
418
+ def openalex_search(query: str, max_results: int = 20, **kwargs) -> dict:
419
+ """Search OpenAlex for publications with citation metrics."""
420
+ try:
421
+ import httpx
422
+ except ImportError:
423
+ return {"error": "httpx required (pip install httpx)", "summary": "httpx required (pip install httpx)"}
424
+ url = "https://api.openalex.org/works"
425
+ params = {
426
+ "search": query,
427
+ "per_page": max_results,
428
+ "sort": "relevance_score:desc",
429
+ "mailto": "ct@celltype.bio",
430
+ }
431
+
432
+ data, error = request_json(
433
+ "GET",
434
+ url,
435
+ params=params,
436
+ timeout=30,
437
+ retries=2,
438
+ )
439
+ if error:
440
+ return {"error": f"OpenAlex search failed: {error}", "summary": f"OpenAlex search failed: {error}"}
441
+ results_data = data.get("results", [])
442
+ total_count = data.get("meta", {}).get("count", 0)
443
+
444
+ articles = []
445
+ for work in results_data:
446
+ authorships = work.get("authorships", [])
447
+ first_author = ""
448
+ if authorships:
449
+ author_info = authorships[0].get("author", {})
450
+ first_author = author_info.get("display_name", "")
451
+
452
+ primary_loc = work.get("primary_location") or {}
453
+ source = primary_loc.get("source") or {}
454
+
455
+ articles.append({
456
+ "title": work.get("title", ""),
457
+ "first_author": first_author,
458
+ "publication_year": work.get("publication_year"),
459
+ "cited_by_count": work.get("cited_by_count", 0),
460
+ "doi": work.get("doi", ""),
461
+ "open_access": (work.get("open_access") or {}).get("is_oa", False),
462
+ "source": source.get("display_name", ""),
463
+ "type": work.get("type", ""),
464
+ })
465
+
466
+ return {
467
+ "summary": f"OpenAlex search '{query}': {total_count} total, showing {len(articles)}",
468
+ "query": query,
469
+ "total_count": total_count,
470
+ "articles": articles,
471
+ }
472
+
473
+
474
+ @registry.register(
475
+ name="literature.patent_search",
476
+ description="Search patent databases for drug discovery-relevant patents (Lens.org, EPO OPS, or PubMed fallback)",
477
+ category="literature",
478
+ parameters={
479
+ "query": "Patent search query (e.g. 'CRBN molecular glue degrader')",
480
+ "max_results": "Maximum number of results (default 20)",
481
+ },
482
+ usage_guide="You need to find relevant patents for a target, compound class, or technology. Use to assess patent landscape, freedom to operate, or find prior art. Tries Lens.org API first (if api.lens_key configured), then EPO Open Patent Services, then falls back to PubMed patent-related literature.",
483
+ )
484
+ def patent_search(query: str, max_results: int = 20, **kwargs) -> dict:
485
+ """Search patent databases for drug discovery-relevant patents.
486
+
487
+ Uses a tiered approach:
488
+ 1. Lens.org Patent API (if API key is configured via api.lens_key)
489
+ 2. EPO Open Patent Services (free, no key required for basic search)
490
+ 3. PubMed fallback (searches for patent-related publications)
491
+ """
492
+ # Try Lens.org first
493
+ session = kwargs.get("_session", None)
494
+ lens_key = None
495
+ if session and hasattr(session, "config"):
496
+ lens_key = session.config.get("api.lens_key", None)
497
+
498
+ if lens_key:
499
+ result = _patent_search_lens(query, max_results, lens_key)
500
+ if result and "error" not in result:
501
+ return result
502
+
503
+ # Try EPO OPS (free, no key required)
504
+ result = _patent_search_epo(query, max_results)
505
+ if result and "error" not in result:
506
+ return result
507
+
508
+ # Fall back to PubMed patent search
509
+ return _patent_search_pubmed_fallback(query, max_results)
510
+
511
+
512
+ def _patent_search_lens(query: str, max_results: int, api_key: str) -> dict:
513
+ """Search Lens.org Patent API."""
514
+ url = "https://api.lens.org/patent/search"
515
+ headers = {
516
+ "Authorization": f"Bearer {api_key}",
517
+ "Content-Type": "application/json",
518
+ }
519
+
520
+ payload = {
521
+ "query": {
522
+ "match": query,
523
+ },
524
+ "size": max_results,
525
+ "sort": [{"relevance": "desc"}],
526
+ "include": [
527
+ "lens_id", "title", "abstract", "applicant",
528
+ "publication_date", "publication_key", "jurisdiction",
529
+ "doc_number", "kind",
530
+ ],
531
+ }
532
+
533
+ resp, error = request(
534
+ "POST",
535
+ url,
536
+ json=payload,
537
+ headers=headers,
538
+ timeout=30,
539
+ retries=2,
540
+ raise_for_status=False,
541
+ )
542
+ if error:
543
+ return {"error": f"Lens.org API request failed: {error}", "summary": f"Lens.org API request failed: {error}"}
544
+ if resp.status_code != 200:
545
+ return {"error": f"Lens.org API returned status {resp.status_code}", "summary": f"Lens.org API returned status {resp.status_code}"}
546
+ try:
547
+ data = resp.json()
548
+ except Exception:
549
+ return {"error": "Lens.org API returned invalid JSON", "summary": "Lens.org API returned invalid JSON"}
550
+ results = data.get("data", [])
551
+ total = data.get("total", 0)
552
+
553
+ patents = []
554
+ for item in results:
555
+ title_obj = item.get("title", [])
556
+ title = title_obj[0].get("text", "") if title_obj else ""
557
+
558
+ abstract_obj = item.get("abstract", [])
559
+ abstract = abstract_obj[0].get("text", "")[:300] if abstract_obj else ""
560
+
561
+ applicants = item.get("applicant", [])
562
+ applicant_names = [a.get("name", "") for a in applicants[:3]] if applicants else []
563
+
564
+ patents.append({
565
+ "lens_id": item.get("lens_id", ""),
566
+ "title": title,
567
+ "abstract": abstract,
568
+ "applicants": applicant_names,
569
+ "publication_date": item.get("publication_date", ""),
570
+ "doc_number": item.get("doc_number", ""),
571
+ "jurisdiction": item.get("jurisdiction", ""),
572
+ "kind": item.get("kind", ""),
573
+ })
574
+
575
+ # Date range for summary
576
+ dates = [p["publication_date"] for p in patents if p["publication_date"]]
577
+ date_range = ""
578
+ if dates:
579
+ years = sorted(set(d[:4] for d in dates if len(d) >= 4))
580
+ if years:
581
+ date_range = f" ({years[0]}-{years[-1]})"
582
+
583
+ return {
584
+ "summary": f"Patent search '{query}': {total} total, showing {len(patents)}{date_range}",
585
+ "source": "lens.org",
586
+ "query": query,
587
+ "total_count": total,
588
+ "patents": patents,
589
+ }
590
+
591
+
592
+ def _patent_search_epo(query: str, max_results: int) -> dict:
593
+ """Search EPO Open Patent Services (Espacenet OPS) — free, no key required."""
594
+ import xml.etree.ElementTree as ET
595
+
596
+ # EPO OPS biblio search endpoint
597
+ url = "https://ops.epo.org/3.2/rest-services/published-data/search/biblio"
598
+ params = {
599
+ "q": query,
600
+ "Range": f"1-{min(max_results, 100)}",
601
+ }
602
+ headers = {
603
+ "Accept": "application/xml",
604
+ }
605
+
606
+ resp, error = request(
607
+ "GET",
608
+ url,
609
+ params=params,
610
+ headers=headers,
611
+ timeout=30,
612
+ retries=0,
613
+ raise_for_status=False,
614
+ )
615
+ if error:
616
+ return {"error": f"EPO OPS request failed: {error}", "summary": f"EPO OPS request failed: {error}"}
617
+ if resp.status_code == 404:
618
+ return {"error": "No patents found via EPO OPS", "summary": "No patents found via EPO OPS"}
619
+ if resp.status_code == 403:
620
+ # Rate limited or auth required
621
+ return {"error": "EPO OPS rate limited or requires authentication", "summary": "EPO OPS rate limited or requires authentication"}
622
+ if resp.status_code != 200:
623
+ return {"error": f"EPO OPS returned status {resp.status_code}", "summary": f"EPO OPS returned status {resp.status_code}"}
624
+ # Validate Content-Type before XML parsing
625
+ content_type = ""
626
+ try:
627
+ ct_raw = resp.headers.get("content-type", "")
628
+ if isinstance(ct_raw, str):
629
+ content_type = ct_raw.lower()
630
+ except Exception:
631
+ pass
632
+ if content_type and "xml" not in content_type and "text/plain" not in content_type:
633
+ return {"error": f"EPO OPS returned {content_type}, expected XML", "summary": "EPO OPS returned non-XML response"}
634
+
635
+ # Parse XML response
636
+ try:
637
+ root = ET.fromstring(resp.text)
638
+ except ET.ParseError as e:
639
+ return {"error": f"Failed to parse EPO OPS XML: {e}", "summary": "Failed to parse EPO patent XML"}
640
+
641
+ # EPO OPS XML namespaces
642
+ ns = {
643
+ "ops": "http://ops.epo.org",
644
+ "epo": "http://www.epo.org/exchange",
645
+ "exch": "http://www.epo.org/exchange",
646
+ }
647
+
648
+ patents = []
649
+ total_count = 0
650
+
651
+ # Try to get total count
652
+ total_elem = root.find(".//ops:biblio-search", ns)
653
+ if total_elem is not None:
654
+ total_count = int(total_elem.get("total-result-count", 0))
655
+
656
+ # Extract patent documents
657
+ for doc in root.findall(".//exch:exchange-document", ns):
658
+ doc_id = doc.get("doc-number", "")
659
+ country = doc.get("country", "")
660
+ kind = doc.get("kind", "")
661
+
662
+ # Title
663
+ title = ""
664
+ for title_elem in doc.findall(".//exch:invention-title", ns):
665
+ if title_elem.get("lang", "") == "en" or not title:
666
+ title = title_elem.text or ""
667
+
668
+ # Applicants
669
+ applicants = []
670
+ for app in doc.findall(".//exch:applicant/exch:applicant-name/exch:name", ns):
671
+ if app.text:
672
+ applicants.append(app.text)
673
+
674
+ # Publication date
675
+ pub_date = ""
676
+ for pub_ref in doc.findall(".//exch:publication-reference//exch:date", ns):
677
+ if pub_ref.text:
678
+ pub_date = pub_ref.text
679
+ break
680
+
681
+ # Abstract
682
+ abstract = ""
683
+ for abs_elem in doc.findall(".//exch:abstract", ns):
684
+ if abs_elem.get("lang", "") == "en" or not abstract:
685
+ parts = []
686
+ for p in abs_elem.findall(".//exch:p", ns):
687
+ if p.text:
688
+ parts.append(p.text)
689
+ if parts:
690
+ abstract = " ".join(parts)[:300]
691
+
692
+ patent_number = f"{country}{doc_id}{kind}" if country else doc_id
693
+
694
+ patents.append({
695
+ "patent_number": patent_number,
696
+ "title": title,
697
+ "abstract": abstract,
698
+ "applicants": applicants[:3],
699
+ "publication_date": pub_date,
700
+ "country": country,
701
+ "kind": kind,
702
+ })
703
+
704
+ if not patents:
705
+ return {"error": "EPO OPS returned no parseable patents", "summary": "EPO OPS returned no parseable patents"}
706
+ # Date range for summary
707
+ dates = [p["publication_date"] for p in patents if p["publication_date"]]
708
+ date_range = ""
709
+ if dates:
710
+ years = sorted(set(d[:4] for d in dates if len(d) >= 4))
711
+ if years:
712
+ date_range = f" ({years[0]}-{years[-1]})"
713
+
714
+ return {
715
+ "summary": f"Patent search '{query}': {total_count} patents found via EPO{date_range}",
716
+ "source": "epo_ops",
717
+ "query": query,
718
+ "total_count": total_count,
719
+ "patents": patents,
720
+ }
721
+
722
+
723
+ def _patent_search_pubmed_fallback(query: str, max_results: int) -> dict:
724
+ """Fall back to PubMed search for patent-related publications."""
725
+ # Add patent-related terms to the query
726
+ patent_query = f"({query}) AND (patent OR intellectual property OR claims OR USPTO OR EPO)"
727
+
728
+ result = pubmed_search(query=patent_query, max_results=max_results)
729
+
730
+ if "error" in result:
731
+ return result
732
+
733
+ # Re-label for clarity
734
+ return {
735
+ "summary": f"Patent search '{query}' (PubMed fallback): {result.get('total_count', 0)} "
736
+ f"patent-related publications found",
737
+ "source": "pubmed_fallback",
738
+ "query": query,
739
+ "note": "No patent API available — showing patent-related PubMed publications. "
740
+ "Configure api.lens_key for direct patent search via Lens.org.",
741
+ "total_count": result.get("total_count", 0),
742
+ "articles": result.get("articles", []),
743
+ }