cite-agent 1.3.5__py3-none-any.whl → 1.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cite-agent might be problematic. Click here for more details.

Files changed (37) hide show
  1. cite_agent/__version__.py +1 -1
  2. cite_agent/cli.py +22 -2
  3. cite_agent/enhanced_ai_agent.py +407 -82
  4. cite_agent/project_detector.py +148 -0
  5. {cite_agent-1.3.5.dist-info → cite_agent-1.3.7.dist-info}/METADATA +1 -1
  6. cite_agent-1.3.7.dist-info/RECORD +31 -0
  7. {cite_agent-1.3.5.dist-info → cite_agent-1.3.7.dist-info}/top_level.txt +0 -1
  8. cite_agent-1.3.5.dist-info/RECORD +0 -56
  9. src/__init__.py +0 -1
  10. src/services/__init__.py +0 -132
  11. src/services/auth_service/__init__.py +0 -3
  12. src/services/auth_service/auth_manager.py +0 -33
  13. src/services/graph/__init__.py +0 -1
  14. src/services/graph/knowledge_graph.py +0 -194
  15. src/services/llm_service/__init__.py +0 -5
  16. src/services/llm_service/llm_manager.py +0 -495
  17. src/services/paper_service/__init__.py +0 -5
  18. src/services/paper_service/openalex.py +0 -231
  19. src/services/performance_service/__init__.py +0 -1
  20. src/services/performance_service/rust_performance.py +0 -395
  21. src/services/research_service/__init__.py +0 -23
  22. src/services/research_service/chatbot.py +0 -2056
  23. src/services/research_service/citation_manager.py +0 -436
  24. src/services/research_service/context_manager.py +0 -1441
  25. src/services/research_service/conversation_manager.py +0 -597
  26. src/services/research_service/critical_paper_detector.py +0 -577
  27. src/services/research_service/enhanced_research.py +0 -121
  28. src/services/research_service/enhanced_synthesizer.py +0 -375
  29. src/services/research_service/query_generator.py +0 -777
  30. src/services/research_service/synthesizer.py +0 -1273
  31. src/services/search_service/__init__.py +0 -5
  32. src/services/search_service/indexer.py +0 -186
  33. src/services/search_service/search_engine.py +0 -342
  34. src/services/simple_enhanced_main.py +0 -287
  35. {cite_agent-1.3.5.dist-info → cite_agent-1.3.7.dist-info}/WHEEL +0 -0
  36. {cite_agent-1.3.5.dist-info → cite_agent-1.3.7.dist-info}/entry_points.txt +0 -0
  37. {cite_agent-1.3.5.dist-info → cite_agent-1.3.7.dist-info}/licenses/LICENSE +0 -0
@@ -1,231 +0,0 @@
1
- """Asynchronous client for the OpenAlex scholarly metadata API.
2
-
3
- The implementation focuses on resilience. It maintains a small in-memory cache
4
- and provides graceful fallbacks when the upstream service is unavailable so that
5
- advanced features inside `SophisticatedResearchEngine` can continue operating
6
- in restricted CI environments.
7
- """
8
-
9
- from __future__ import annotations
10
-
11
- import asyncio
12
- import logging
13
- import os
14
- import re
15
- from datetime import datetime
16
- from typing import Any, Dict, Iterable, List, Optional, Tuple
17
-
18
- import httpx
19
-
20
- logger = logging.getLogger(__name__)
21
-
22
- _OPENALEX_HOST = os.getenv("OPENALEX_BASE_URL", "https://api.openalex.org")
23
- _DEFAULT_MAILTO = os.getenv("OPENALEX_MAILTO", "research@nocturnal.dev")
24
- _CACHE_TTL_SECONDS = int(os.getenv("OPENALEX_CACHE_TTL", "1800"))
25
- _DEFAULT_TIMEOUT = float(os.getenv("OPENALEX_TIMEOUT", "12.0"))
26
-
27
-
28
- class OpenAlexClient:
29
- """Thin asynchronous wrapper around OpenAlex endpoints with caching."""
30
-
31
- def __init__(
32
- self,
33
- *,
34
- api_key: Optional[str] = None,
35
- mailto: Optional[str] = None,
36
- timeout: float = _DEFAULT_TIMEOUT,
37
- cache_ttl: int = _CACHE_TTL_SECONDS,
38
- ) -> None:
39
- self.api_key = api_key or os.getenv("OPENALEX_API_KEY")
40
- self.mailto = mailto or _DEFAULT_MAILTO
41
- self.timeout = timeout
42
- self.cache_ttl = cache_ttl
43
- self._cache: Dict[str, Tuple[float, Any]] = {}
44
- self._cache_lock = asyncio.Lock()
45
- self._session_lock = asyncio.Lock()
46
- self._session: Optional[httpx.AsyncClient] = None
47
-
48
- # ------------------------------------------------------------------
49
- async def get_paper_by_id(self, paper_id: str) -> Optional[Dict[str, Any]]:
50
- """Retrieve a single paper by OpenAlex ID or DOI.
51
-
52
- Returns `None` if the paper cannot be retrieved. When the OpenAlex API
53
- is unreachable a best-effort synthetic document is returned so downstream
54
- synthesis can continue in offline environments.
55
- """
56
-
57
- normalized_id = self._normalise_id(paper_id)
58
- cache_key = f"work:{normalized_id}"
59
- cached = await self._read_cache(cache_key)
60
- if cached is not None:
61
- return cached
62
-
63
- params = {"mailto": self.mailto}
64
- if self.api_key:
65
- params["api_key"] = self.api_key
66
-
67
- url = f"{_OPENALEX_HOST}/works/{normalized_id}"
68
- try:
69
- session = await self._get_session()
70
- response = await session.get(url, params=params)
71
- response.raise_for_status()
72
- data = response.json()
73
- await self._write_cache(cache_key, data)
74
- return data
75
- except Exception as exc:
76
- logger.warning("OpenAlex work lookup failed", extra={"paper_id": paper_id, "error": str(exc)})
77
- fallback = self._fallback_document(normalized_id)
78
- await self._write_cache(cache_key, fallback)
79
- return fallback
80
-
81
- async def get_papers_bulk(self, paper_ids: Iterable[str]) -> List[Dict[str, Any]]:
82
- """Retrieve multiple papers concurrently with caching."""
83
-
84
- tasks = [self.get_paper_by_id(pid) for pid in paper_ids]
85
- results = await asyncio.gather(*tasks, return_exceptions=True)
86
- papers = []
87
- for result in results:
88
- if isinstance(result, dict) and result:
89
- papers.append(result)
90
- return papers
91
-
92
- async def search_works(
93
- self,
94
- query: str,
95
- *,
96
- limit: int = 10,
97
- filters: Optional[Dict[str, str]] = None,
98
- sort: str = "relevance_score:desc",
99
- ) -> Dict[str, Any]:
100
- """Execute a search against OpenAlex works endpoint."""
101
-
102
- limit = max(1, min(limit, 200))
103
- params: Dict[str, Any] = {
104
- "search": query,
105
- "per-page": limit,
106
- "page": 1,
107
- "sort": sort,
108
- "mailto": self.mailto,
109
- }
110
- if filters:
111
- params["filter"] = ",".join(f"{k}:{v}" for k, v in filters.items())
112
- if self.api_key:
113
- params["api_key"] = self.api_key
114
-
115
- cache_key = self._make_cache_key("search", params)
116
- cached = await self._read_cache(cache_key)
117
- if cached is not None:
118
- return cached
119
-
120
- url = f"{_OPENALEX_HOST}/works"
121
- try:
122
- session = await self._get_session()
123
- response = await session.get(url, params=params)
124
- response.raise_for_status()
125
- payload = response.json()
126
- await self._write_cache(cache_key, payload)
127
- return payload
128
- except Exception as exc:
129
- logger.warning("OpenAlex search failed", extra={"query": query, "error": str(exc)})
130
- # Provide deterministic empty payload to callers
131
- empty = {"results": [], "meta": {"count": 0, "page": 1}}
132
- await self._write_cache(cache_key, empty)
133
- return empty
134
-
135
- async def get_related_works(self, paper_id: str, limit: int = 5) -> List[Dict[str, Any]]:
136
- """Fetch related works leveraging OpenAlex's recommendation endpoint."""
137
-
138
- normalized_id = self._normalise_id(paper_id)
139
- params = {"per-page": max(1, min(limit, 50)), "mailto": self.mailto}
140
- if self.api_key:
141
- params["api_key"] = self.api_key
142
-
143
- url = f"{_OPENALEX_HOST}/works/{normalized_id}/related"
144
- cache_key = self._make_cache_key("related", normalized_id, params)
145
- cached = await self._read_cache(cache_key)
146
- if cached is not None:
147
- return cached
148
-
149
- try:
150
- session = await self._get_session()
151
- response = await session.get(url, params=params)
152
- response.raise_for_status()
153
- data = response.json().get("results", [])
154
- await self._write_cache(cache_key, data)
155
- return data
156
- except Exception as exc:
157
- logger.info("OpenAlex related works unavailable", extra={"paper": paper_id, "error": str(exc)})
158
- await self._write_cache(cache_key, [])
159
- return []
160
-
161
- async def close(self) -> None:
162
- async with self._session_lock:
163
- if self._session is not None:
164
- try:
165
- await self._session.aclose()
166
- finally:
167
- self._session = None
168
-
169
- # ------------------------------------------------------------------
170
- async def _get_session(self) -> httpx.AsyncClient:
171
- async with self._session_lock:
172
- if self._session is None:
173
- headers = {
174
- "User-Agent": "Nocturnal-Archive/advanced-research (contact@nocturnal.dev)",
175
- "Accept": "application/json",
176
- }
177
- self._session = httpx.AsyncClient(timeout=self.timeout, headers=headers)
178
- return self._session
179
-
180
- def _normalise_id(self, paper_id: str) -> str:
181
- if paper_id.startswith("http"):
182
- return paper_id.rstrip("/").split("/")[-1]
183
- if paper_id.startswith("doi:"):
184
- return paper_id
185
- if "/" in paper_id and not paper_id.startswith("W"):
186
- return f"doi:{paper_id}"
187
- return paper_id
188
-
189
- def _make_cache_key(self, namespace: str, *parts: Any) -> str:
190
- raw = "|".join(str(part) for part in parts)
191
- return f"{namespace}:{hash(raw) & 0xFFFFFFFFFFFF:x}"
192
-
193
- async def _read_cache(self, key: str) -> Optional[Any]:
194
- async with self._cache_lock:
195
- entry = self._cache.get(key)
196
- if not entry:
197
- return None
198
- expires_at, value = entry
199
- if datetime.utcnow().timestamp() > expires_at:
200
- self._cache.pop(key, None)
201
- return None
202
- return value
203
-
204
- async def _write_cache(self, key: str, value: Any) -> None:
205
- async with self._cache_lock:
206
- self._cache[key] = (datetime.utcnow().timestamp() + self.cache_ttl, value)
207
-
208
- def _fallback_document(self, paper_id: str) -> Dict[str, Any]:
209
- """Generate a deterministic placeholder when OpenAlex is unreachable."""
210
-
211
- safe_id = re.sub(r"[^A-Za-z0-9]", "", paper_id) or "paper"
212
- title = f"Placeholder synthesis for {safe_id}"
213
- abstract = (
214
- "OpenAlex was unavailable during retrieval. This placeholder combines "
215
- "the paper identifier with contextual heuristics so downstream "
216
- "components can continue operating."
217
- )
218
- return {
219
- "id": paper_id,
220
- "title": title,
221
- "abstract": abstract,
222
- "concepts": [],
223
- "authorships": [],
224
- "publication_year": datetime.utcnow().year,
225
- "cited_by_count": 0,
226
- "doi": paper_id if paper_id.startswith("doi:") else "",
227
- "fallback": True,
228
- }
229
-
230
-
231
- __all__ = ["OpenAlexClient"]
@@ -1 +0,0 @@
1
- """Performance service package for high-performance operations"""
@@ -1,395 +0,0 @@
1
- """
2
- High-performance Rust-powered services for web scraping and text processing.
3
- This module provides Python bindings to the Rust performance library.
4
- """
5
-
6
- import asyncio
7
- import logging
8
- from typing import List, Dict, Optional, Any
9
- from dataclasses import dataclass
10
- from datetime import datetime, timezone
11
- import json
12
-
13
- logger = logging.getLogger(__name__)
14
-
15
-
16
- def _utc_now() -> datetime:
17
- return datetime.now(timezone.utc)
18
-
19
- try:
20
- import nocturnal_performance as rust_perf # type: ignore[import]
21
- RUST_AVAILABLE = True
22
- logger.info("Rust performance module loaded successfully")
23
- except ImportError:
24
- RUST_AVAILABLE = False
25
- logger.warning("Rust performance module not available, falling back to Python implementations")
26
-
27
- @dataclass
28
- class ScrapedContent:
29
- """Represents scraped content from a URL."""
30
- url: str
31
- title: str
32
- content: str
33
- metadata: Dict[str, str]
34
- timestamp: datetime
35
-
36
- @dataclass
37
- class ProcessedText:
38
- """Represents processed text with various analyses."""
39
- original: str
40
- cleaned: str
41
- chunks: List[str]
42
- keywords: List[str]
43
- summary: str
44
-
45
- class HighPerformanceService:
46
- """High-performance service using Rust backend when available."""
47
-
48
- def __init__(self, max_concurrent: int = 10):
49
- self.max_concurrent = max_concurrent
50
- self.rust_scraper = None
51
-
52
- if RUST_AVAILABLE:
53
- try:
54
- self.rust_scraper = rust_perf.HighPerformanceScraper(max_concurrent)
55
- logger.info("Initialized Rust performance scraper")
56
- except Exception as e:
57
- logger.error(f"Failed to initialize Rust scraper: {e}")
58
- self.rust_scraper = None
59
-
60
- async def scrape_urls(self, urls: List[str]) -> List[ScrapedContent]:
61
- """Scrape multiple URLs concurrently with high performance."""
62
- if not urls:
63
- return []
64
-
65
- if self.rust_scraper and RUST_AVAILABLE:
66
- try:
67
- # Use Rust implementation
68
- rust_results = await self.rust_scraper.scrape_urls(urls)
69
-
70
- # Convert Rust results to Python objects
71
- results = []
72
- for result in rust_results:
73
- scraped = ScrapedContent(
74
- url=result["url"],
75
- title=result["title"],
76
- content=result["content"],
77
- metadata=result["metadata"],
78
- timestamp=datetime.fromisoformat(result["timestamp"].replace("Z", "+00:00"))
79
- )
80
- results.append(scraped)
81
-
82
- logger.info(f"Scraped {len(results)} URLs using Rust backend")
83
- return results
84
-
85
- except Exception as e:
86
- logger.error(f"Rust scraping failed, falling back to Python: {e}")
87
-
88
- # Fallback to Python implementation
89
- return await self._scrape_urls_python(urls)
90
-
91
- async def process_text_batch(self, texts: List[str]) -> List[ProcessedText]:
92
- """Process multiple texts concurrently with high performance."""
93
- if not texts:
94
- return []
95
-
96
- if self.rust_scraper and RUST_AVAILABLE:
97
- try:
98
- # Use Rust implementation
99
- rust_results = await self.rust_scraper.process_text_batch(texts)
100
-
101
- # Convert Rust results to Python objects
102
- results = []
103
- for result in rust_results:
104
- processed = ProcessedText(
105
- original=result["original"],
106
- cleaned=result["cleaned"],
107
- chunks=result["chunks"],
108
- keywords=result["keywords"],
109
- summary=result["summary"]
110
- )
111
- results.append(processed)
112
-
113
- logger.info(f"Processed {len(results)} texts using Rust backend")
114
- return results
115
-
116
- except Exception as e:
117
- logger.error(f"Rust text processing failed, falling back to Python: {e}")
118
-
119
- # Fallback to Python implementation
120
- return await self._process_text_batch_python(texts)
121
-
122
- async def extract_keywords(self, text: str, max_keywords: int = 10) -> List[str]:
123
- """Extract keywords from text using high-performance algorithms."""
124
- if self.rust_scraper and RUST_AVAILABLE:
125
- try:
126
- return await self.rust_scraper.extract_keywords(text, max_keywords)
127
- except Exception as e:
128
- logger.error(f"Rust keyword extraction failed, falling back to Python: {e}")
129
-
130
- # Fallback to Python implementation
131
- return await self._extract_keywords_python(text, max_keywords)
132
-
133
- async def chunk_text(self, text: str, chunk_size: int = 1000, overlap: int = 200) -> List[str]:
134
- """Chunk text into smaller pieces with overlap."""
135
- if self.rust_scraper and RUST_AVAILABLE:
136
- try:
137
- return await self.rust_scraper.chunk_text(text, chunk_size, overlap)
138
- except Exception as e:
139
- logger.error(f"Rust text chunking failed, falling back to Python: {e}")
140
-
141
- # Fallback to Python implementation
142
- return await self._chunk_text_python(text, chunk_size, overlap)
143
-
144
- def fast_text_clean(self, text: str) -> str:
145
- """Fast text cleaning using Rust implementation."""
146
- if RUST_AVAILABLE:
147
- try:
148
- return rust_perf.fast_text_clean(text)
149
- except Exception as e:
150
- logger.error(f"Rust text cleaning failed, falling back to Python: {e}")
151
-
152
- # Fallback to Python implementation
153
- return self._clean_text_python(text)
154
-
155
- def fast_url_validation(self, url: str) -> bool:
156
- """Fast URL validation using Rust implementation."""
157
- if RUST_AVAILABLE:
158
- try:
159
- return rust_perf.fast_url_validation(url)
160
- except Exception as e:
161
- logger.error(f"Rust URL validation failed, falling back to Python: {e}")
162
-
163
- # Fallback to Python implementation
164
- return self._validate_url_python(url)
165
-
166
- def fast_text_similarity(self, text1: str, text2: str) -> float:
167
- """Fast text similarity calculation using Rust implementation."""
168
- if RUST_AVAILABLE:
169
- try:
170
- return rust_perf.fast_text_similarity(text1, text2)
171
- except Exception as e:
172
- logger.error(f"Rust text similarity failed, falling back to Python: {e}")
173
-
174
- # Fallback to Python implementation
175
- return self._calculate_similarity_python(text1, text2)
176
-
177
- # Python fallback implementations
178
- async def _scrape_urls_python(self, urls: List[str]) -> List[ScrapedContent]:
179
- """Python fallback implementation for URL scraping."""
180
- import aiohttp
181
- from bs4 import BeautifulSoup
182
- import re
183
-
184
- async def scrape_single_url(session: aiohttp.ClientSession, url: str) -> Optional[ScrapedContent]:
185
- try:
186
- async with session.get(url, timeout=30) as response:
187
- if response.status != 200:
188
- return None
189
-
190
- html = await response.text()
191
- soup = BeautifulSoup(html, 'html.parser')
192
-
193
- # Extract title
194
- title = soup.find('title')
195
- title_text = title.get_text() if title else "No title"
196
-
197
- # Extract content
198
- content_selectors = ['article', 'main', '[role="main"]', '.content', '.main-content', 'body']
199
- content = ""
200
- for selector in content_selectors:
201
- element = soup.select_one(selector)
202
- if element:
203
- content = element.get_text(separator=' ', strip=True)
204
- break
205
-
206
- # Extract metadata
207
- metadata = {}
208
- for meta in soup.find_all('meta'):
209
- name = meta.get('name') or meta.get('property')
210
- content_attr = meta.get('content')
211
- if name and content_attr:
212
- metadata[name] = content_attr
213
-
214
- return ScrapedContent(
215
- url=url,
216
- title=title_text,
217
- content=content,
218
- metadata=metadata,
219
- timestamp=_utc_now()
220
- )
221
-
222
- except Exception as e:
223
- logger.error(f"Error scraping {url}: {e}")
224
- return None
225
-
226
- # Scrape URLs concurrently
227
- connector = aiohttp.TCPConnector(limit=self.max_concurrent)
228
- async with aiohttp.ClientSession(connector=connector) as session:
229
- tasks = [scrape_single_url(session, url) for url in urls]
230
- results = await asyncio.gather(*tasks, return_exceptions=True)
231
-
232
- # Filter out None results and exceptions
233
- scraped_content = []
234
- for result in results:
235
- if isinstance(result, ScrapedContent):
236
- scraped_content.append(result)
237
-
238
- return scraped_content
239
-
240
- async def _process_text_batch_python(self, texts: List[str]) -> List[ProcessedText]:
241
- """Python fallback implementation for text processing."""
242
- tasks = [self._process_single_text_python(text) for text in texts]
243
- return await asyncio.gather(*tasks)
244
-
245
- async def _process_single_text_python(self, text: str) -> ProcessedText:
246
- """Process a single text using Python implementation."""
247
- cleaned = self._clean_text_python(text)
248
- chunks = await self._chunk_text_python(cleaned, 1000, 200)
249
- keywords = await self._extract_keywords_python(cleaned, 10)
250
- summary = await self._generate_summary_python(cleaned)
251
-
252
- return ProcessedText(
253
- original=text,
254
- cleaned=cleaned,
255
- chunks=chunks,
256
- keywords=keywords,
257
- summary=summary
258
- )
259
-
260
- def _clean_text_python(self, text: str) -> str:
261
- """Python implementation of text cleaning."""
262
- import re
263
-
264
- # Remove extra whitespace
265
- text = re.sub(r'\s+', ' ', text)
266
-
267
- # Remove special characters but keep basic punctuation
268
- text = re.sub(r'[^\w\s\.\,\!\?\;\:\-\(\)\[\]\{\}]', '', text)
269
-
270
- # Normalize quotes and dashes
271
- text = text.replace('"', '"').replace('"', '"')
272
- text = text.replace(''', "'").replace(''', "'")
273
- text = text.replace('–', '-').replace('—', '-')
274
-
275
- return text.strip()
276
-
277
- async def _chunk_text_python(self, text: str, chunk_size: int, overlap: int) -> List[str]:
278
- """Python implementation of text chunking."""
279
- import re
280
-
281
- # Split into sentences
282
- sentences = re.split(r'[.!?]+', text)
283
- sentences = [s.strip() for s in sentences if s.strip()]
284
-
285
- chunks = []
286
- current_chunk = ""
287
- current_size = 0
288
-
289
- for sentence in sentences:
290
- sentence_size = len(sentence)
291
-
292
- if current_size + sentence_size > chunk_size and current_chunk:
293
- chunks.append(current_chunk.strip())
294
-
295
- # Start new chunk with overlap
296
- if overlap > 0:
297
- words = current_chunk.split()
298
- overlap_words = min(overlap // 10, len(words))
299
- if overlap_words > 0:
300
- current_chunk = " ".join(words[-overlap_words:]) + " "
301
- current_size = len(current_chunk)
302
- else:
303
- current_chunk = ""
304
- current_size = 0
305
- else:
306
- current_chunk = ""
307
- current_size = 0
308
-
309
- current_chunk += sentence + ". "
310
- current_size += sentence_size + 2
311
-
312
- if current_chunk.strip():
313
- chunks.append(current_chunk.strip())
314
-
315
- return chunks
316
-
317
- async def _extract_keywords_python(self, text: str, max_keywords: int) -> List[str]:
318
- """Python implementation of keyword extraction."""
319
- import re
320
- from collections import Counter
321
-
322
- # Stop words
323
- stop_words = {
324
- 'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with',
325
- 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had',
326
- 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'must',
327
- 'can', 'this', 'that', 'these', 'those', 'i', 'you', 'he', 'she', 'it', 'we', 'they'
328
- }
329
-
330
- # Extract words
331
- words = re.findall(r'\b[a-zA-Z]+\b', text.lower())
332
- words = [word for word in words if len(word) > 2 and word not in stop_words]
333
-
334
- # Count frequencies
335
- word_freq = Counter(words)
336
-
337
- # Return top keywords
338
- return [word for word, _ in word_freq.most_common(max_keywords)]
339
-
340
- async def _generate_summary_python(self, text: str) -> str:
341
- """Python implementation of text summarization."""
342
- import re
343
- from collections import Counter
344
-
345
- # Split into sentences
346
- sentences = re.split(r'[.!?]+', text)
347
- sentences = [s.strip() for s in sentences if s.strip()]
348
-
349
- if len(sentences) <= 3:
350
- return text
351
-
352
- # Calculate word frequencies
353
- words = re.findall(r'\b[a-zA-Z]+\b', text.lower())
354
- word_freq = Counter(words)
355
-
356
- # Score sentences
357
- sentence_scores = []
358
- for i, sentence in enumerate(sentences):
359
- sentence_words = re.findall(r'\b[a-zA-Z]+\b', sentence.lower())
360
- score = sum(word_freq.get(word, 0) for word in sentence_words)
361
- sentence_scores.append((i, score))
362
-
363
- # Sort by score and take top sentences
364
- sentence_scores.sort(key=lambda x: x[1], reverse=True)
365
- top_indices = sorted([i for i, _ in sentence_scores[:3]])
366
-
367
- summary = ". ".join(sentences[i] for i in top_indices)
368
- return summary + "."
369
-
370
- def _validate_url_python(self, url: str) -> bool:
371
- """Python implementation of URL validation."""
372
- try:
373
- from urllib.parse import urlparse
374
- result = urlparse(url)
375
- return all([result.scheme, result.netloc])
376
- except:
377
- return False
378
-
379
- def _calculate_similarity_python(self, text1: str, text2: str) -> float:
380
- """Python implementation of text similarity."""
381
- import re
382
- from collections import Counter
383
-
384
- # Extract words
385
- words1 = set(re.findall(r'\b[a-zA-Z]+\b', text1.lower()))
386
- words2 = set(re.findall(r'\b[a-zA-Z]+\b', text2.lower()))
387
-
388
- # Calculate Jaccard similarity
389
- intersection = len(words1.intersection(words2))
390
- union = len(words1.union(words2))
391
-
392
- return intersection / union if union > 0 else 0.0
393
-
394
- # Global instance
395
- performance_service = HighPerformanceService()
@@ -1,23 +0,0 @@
1
- """Research service package exposing conversation and synthesis utilities."""
2
-
3
- from .chatbot import ResearchChatbot
4
- from .citation_manager import CitationManager
5
- from .context_manager import ResearchContextManager
6
- from .conversation_manager import ResearchConversationManager
7
- from .critical_paper_detector import CriticalPaperDetector
8
- from .enhanced_research import EnhancedResearchPipeline
9
- from .enhanced_synthesizer import EnhancedSynthesizer
10
- from .query_generator import ResearchQueryGenerator
11
- from .synthesizer import ResearchSynthesizer
12
-
13
- __all__ = [
14
- "ResearchChatbot",
15
- "CitationManager",
16
- "ResearchContextManager",
17
- "ResearchConversationManager",
18
- "CriticalPaperDetector",
19
- "EnhancedResearchPipeline",
20
- "EnhancedSynthesizer",
21
- "ResearchQueryGenerator",
22
- "ResearchSynthesizer",
23
- ]