cite-agent 1.3.5__py3-none-any.whl → 1.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cite-agent might be problematic. Click here for more details.

Files changed (37) hide show
  1. cite_agent/__version__.py +1 -1
  2. cite_agent/cli.py +22 -2
  3. cite_agent/enhanced_ai_agent.py +407 -82
  4. cite_agent/project_detector.py +148 -0
  5. {cite_agent-1.3.5.dist-info → cite_agent-1.3.7.dist-info}/METADATA +1 -1
  6. cite_agent-1.3.7.dist-info/RECORD +31 -0
  7. {cite_agent-1.3.5.dist-info → cite_agent-1.3.7.dist-info}/top_level.txt +0 -1
  8. cite_agent-1.3.5.dist-info/RECORD +0 -56
  9. src/__init__.py +0 -1
  10. src/services/__init__.py +0 -132
  11. src/services/auth_service/__init__.py +0 -3
  12. src/services/auth_service/auth_manager.py +0 -33
  13. src/services/graph/__init__.py +0 -1
  14. src/services/graph/knowledge_graph.py +0 -194
  15. src/services/llm_service/__init__.py +0 -5
  16. src/services/llm_service/llm_manager.py +0 -495
  17. src/services/paper_service/__init__.py +0 -5
  18. src/services/paper_service/openalex.py +0 -231
  19. src/services/performance_service/__init__.py +0 -1
  20. src/services/performance_service/rust_performance.py +0 -395
  21. src/services/research_service/__init__.py +0 -23
  22. src/services/research_service/chatbot.py +0 -2056
  23. src/services/research_service/citation_manager.py +0 -436
  24. src/services/research_service/context_manager.py +0 -1441
  25. src/services/research_service/conversation_manager.py +0 -597
  26. src/services/research_service/critical_paper_detector.py +0 -577
  27. src/services/research_service/enhanced_research.py +0 -121
  28. src/services/research_service/enhanced_synthesizer.py +0 -375
  29. src/services/research_service/query_generator.py +0 -777
  30. src/services/research_service/synthesizer.py +0 -1273
  31. src/services/search_service/__init__.py +0 -5
  32. src/services/search_service/indexer.py +0 -186
  33. src/services/search_service/search_engine.py +0 -342
  34. src/services/simple_enhanced_main.py +0 -287
  35. {cite_agent-1.3.5.dist-info → cite_agent-1.3.7.dist-info}/WHEEL +0 -0
  36. {cite_agent-1.3.5.dist-info → cite_agent-1.3.7.dist-info}/entry_points.txt +0 -0
  37. {cite_agent-1.3.5.dist-info → cite_agent-1.3.7.dist-info}/licenses/LICENSE +0 -0
@@ -1,436 +0,0 @@
1
- """
2
- Citation management system for tracking sources and generating proper citations.
3
- Supports multiple citation formats and automatic reference management.
4
- """
5
-
6
- import re
7
- import logging
8
- from typing import List, Dict, Any, Optional
9
- from datetime import datetime, timezone
10
- from dataclasses import dataclass
11
- from urllib.parse import urlparse
12
- import json
13
- from enum import Enum
14
-
15
- logger = logging.getLogger(__name__)
16
-
17
-
18
- def _utc_now() -> datetime:
19
- return datetime.now(timezone.utc)
20
-
21
- class CitationFormat(Enum):
22
- """Supported citation formats."""
23
- APA = "apa"
24
- MLA = "mla"
25
- CHICAGO = "chicago"
26
- IEEE = "ieee"
27
-
28
- @dataclass
29
- class CitedFinding:
30
- """Represents a finding with its associated citations."""
31
- finding_id: str
32
- content: str
33
- citations: List[str] # List of citation IDs
34
- relevance_score: float
35
- category: str
36
- timestamp: datetime
37
-
38
- @dataclass
39
- class Citation:
40
- """Represents a citation with metadata."""
41
- id: str
42
- url: str
43
- title: str
44
- authors: List[str]
45
- publication_date: Optional[str]
46
- access_date: str
47
- source_type: str # 'web', 'journal', 'book', 'conference'
48
- doi: Optional[str] = None
49
- journal: Optional[str] = None
50
- volume: Optional[str] = None
51
- issue: Optional[str] = None
52
- pages: Optional[str] = None
53
- publisher: Optional[str] = None
54
- abstract: Optional[str] = None
55
- keywords: List[str] = None
56
- citation_count: Optional[int] = None
57
- relevance_score: float = 0.0
58
-
59
- @dataclass
60
- class CachedItem:
61
- """Represents a cached item with expiration."""
62
- data: Any
63
- created_at: datetime
64
- expires_at: Optional[datetime]
65
-
66
- class CitationManager:
67
- """Manages citations and generates proper reference formats."""
68
-
69
- def __init__(self, db_ops=None, openalex_client=None):
70
- self.citations: Dict[str, Citation] = {}
71
- self.citation_counter = 0
72
- self.db_ops = db_ops
73
- self.openalex_client = openalex_client
74
-
75
- def add_citation(self, url: str, title: str, content: str, metadata: Dict[str, Any]) -> str:
76
- """Add a new citation and return its ID."""
77
- citation_id = f"ref_{self.citation_counter + 1:03d}"
78
- self.citation_counter += 1
79
-
80
- # Extract authors from metadata or content
81
- authors = self._extract_authors(metadata, content)
82
-
83
- # Determine source type
84
- source_type = self._determine_source_type(url, metadata)
85
-
86
- # Extract publication date
87
- pub_date = self._extract_publication_date(metadata, content)
88
-
89
- # Calculate relevance score
90
- relevance_score = self._calculate_relevance_score(content, metadata)
91
-
92
- citation = Citation(
93
- id=citation_id,
94
- url=url,
95
- title=title,
96
- authors=authors,
97
- publication_date=pub_date,
98
- access_date=_utc_now().strftime("%Y-%m-%d"),
99
- source_type=source_type,
100
- doi=metadata.get('doi'),
101
- journal=metadata.get('journal'),
102
- volume=metadata.get('volume'),
103
- issue=metadata.get('issue'),
104
- pages=metadata.get('pages'),
105
- publisher=metadata.get('publisher'),
106
- abstract=metadata.get('abstract'),
107
- keywords=metadata.get('keywords', []),
108
- citation_count=metadata.get('citation_count'),
109
- relevance_score=relevance_score
110
- )
111
-
112
- self.citations[citation_id] = citation
113
- logger.info(f"Added citation {citation_id}: {title}")
114
-
115
- return citation_id
116
-
117
- def get_citation(self, citation_id: str) -> Optional[Citation]:
118
- """Get a citation by ID."""
119
- return self.citations.get(citation_id)
120
-
121
- def get_all_citations(self) -> List[Citation]:
122
- """Get all citations sorted by relevance."""
123
- return sorted(self.citations.values(), key=lambda x: x.relevance_score, reverse=True)
124
-
125
- def generate_citation_text(self, citation_id: str, format: str = "apa") -> str:
126
- """Generate citation text in specified format."""
127
- citation = self.get_citation(citation_id)
128
- if not citation:
129
- return f"[Citation {citation_id} not found]"
130
-
131
- if format.lower() == "apa":
132
- return self._generate_apa_citation(citation)
133
- elif format.lower() == "mla":
134
- return self._generate_mla_citation(citation)
135
- elif format.lower() == "chicago":
136
- return self._generate_chicago_citation(citation)
137
- elif format.lower() == "ieee":
138
- return self._generate_ieee_citation(citation)
139
- else:
140
- return self._generate_apa_citation(citation)
141
-
142
- def generate_reference_list(self, format: str = "apa") -> str:
143
- """Generate a complete reference list."""
144
- citations = self.get_all_citations()
145
-
146
- if format.lower() == "apa":
147
- return self._generate_apa_reference_list(citations)
148
- elif format.lower() == "mla":
149
- return self._generate_mla_reference_list(citations)
150
- elif format.lower() == "chicago":
151
- return self._generate_chicago_reference_list(citations)
152
- elif format.lower() == "ieee":
153
- return self._generate_ieee_reference_list(citations)
154
- else:
155
- return self._generate_apa_reference_list(citations)
156
-
157
- def generate_inline_citations(self, text: str, format: str = "apa") -> str:
158
- """Add inline citations to text."""
159
- # Find citation markers like [ref_001] and replace with proper citations
160
- pattern = r'\[(ref_\d+)\]'
161
-
162
- def replace_citation(match):
163
- citation_id = match.group(1)
164
- citation = self.get_citation(citation_id)
165
- if citation:
166
- if format.lower() == "apa":
167
- return f"({citation.authors[0] if citation.authors else 'Unknown'}, {citation.publication_date or 'n.d.'})"
168
- elif format.lower() == "mla":
169
- return f"({citation.authors[0] if citation.authors else 'Unknown'} {citation.publication_date or 'n.d.'})"
170
- else:
171
- return f"[{citation_id}]"
172
- return match.group(0)
173
-
174
- return re.sub(pattern, replace_citation, text)
175
-
176
- def export_citations(self, format: str = "json") -> str:
177
- """Export citations in various formats."""
178
- if format.lower() == "json":
179
- return json.dumps([self._citation_to_dict(c) for c in self.citations.values()], indent=2)
180
- elif format.lower() == "bibtex":
181
- return self._generate_bibtex_export()
182
- elif format.lower() == "ris":
183
- return self._generate_ris_export()
184
- else:
185
- return json.dumps([self._citation_to_dict(c) for c in self.citations.values()], indent=2)
186
-
187
- def _extract_authors(self, metadata: Dict[str, Any], content: str) -> List[str]:
188
- """Extract authors from metadata or content."""
189
- authors = []
190
-
191
- # Try metadata first
192
- if 'author' in metadata:
193
- if isinstance(metadata['author'], list):
194
- authors = metadata['author']
195
- else:
196
- authors = [metadata['author']]
197
-
198
- # Try Open Graph authors
199
- if 'og:author' in metadata:
200
- authors.append(metadata['og:author'])
201
-
202
- # Try to extract from content if no authors found
203
- if not authors:
204
- # Look for common author patterns
205
- author_patterns = [
206
- r'by\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)',
207
- r'Author[s]?:\s*([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)',
208
- r'Written by\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)'
209
- ]
210
-
211
- for pattern in author_patterns:
212
- matches = re.findall(pattern, content, re.IGNORECASE)
213
- if matches:
214
- authors.extend(matches[:3]) # Limit to 3 authors
215
- break
216
-
217
- return authors[:5] # Limit to 5 authors
218
-
219
- def _determine_source_type(self, url: str, metadata: Dict[str, Any]) -> str:
220
- """Determine the type of source."""
221
- domain = urlparse(url).netloc.lower()
222
-
223
- # Academic domains
224
- academic_domains = ['arxiv.org', 'scholar.google.com', 'researchgate.net', 'academia.edu',
225
- 'jstor.org', 'sciencedirect.com', 'ieee.org', 'acm.org']
226
-
227
- if any(domain in url for domain in academic_domains):
228
- return 'journal'
229
-
230
- # Conference domains
231
- conference_domains = ['conference', 'proceedings', 'workshop']
232
- if any(term in url for term in conference_domains):
233
- return 'conference'
234
-
235
- # Book domains
236
- book_domains = ['amazon.com', 'goodreads.com', 'books.google.com']
237
- if any(domain in url for domain in book_domains):
238
- return 'book'
239
-
240
- return 'web'
241
-
242
- def _extract_publication_date(self, metadata: Dict[str, Any], content: str) -> Optional[str]:
243
- """Extract publication date from metadata or content."""
244
- # Try metadata first
245
- date_fields = ['date', 'pubdate', 'publication_date', 'published', 'og:published_time']
246
- for field in date_fields:
247
- if field in metadata:
248
- return metadata[field]
249
-
250
- # Try to extract from content
251
- date_patterns = [
252
- r'(\d{4}-\d{2}-\d{2})',
253
- r'(\d{1,2}/\d{1,2}/\d{4})',
254
- r'(\w+\s+\d{1,2},?\s+\d{4})',
255
- r'(\d{4})' # Just year
256
- ]
257
-
258
- for pattern in date_patterns:
259
- matches = re.findall(pattern, content)
260
- if matches:
261
- return matches[0]
262
-
263
- return None
264
-
265
- def _calculate_relevance_score(self, content: str, metadata: Dict[str, Any]) -> float:
266
- """Calculate relevance score based on content quality and metadata."""
267
- score = 0.0
268
-
269
- # Content length (longer content is generally better)
270
- score += min(len(content) / 1000, 2.0)
271
-
272
- # Has authors
273
- if metadata.get('author'):
274
- score += 1.0
275
-
276
- # Has publication date
277
- if metadata.get('date') or metadata.get('publication_date'):
278
- score += 1.0
279
-
280
- # Has DOI (academic indicator)
281
- if metadata.get('doi'):
282
- score += 2.0
283
-
284
- # Has abstract
285
- if metadata.get('abstract'):
286
- score += 1.0
287
-
288
- # Source type bonus
289
- source_type = self._determine_source_type(metadata.get('url', ''), metadata)
290
- if source_type == 'journal':
291
- score += 2.0
292
- elif source_type == 'conference':
293
- score += 1.5
294
- elif source_type == 'book':
295
- score += 1.0
296
-
297
- return score
298
-
299
- def _generate_apa_citation(self, citation: Citation) -> str:
300
- """Generate APA format citation."""
301
- if citation.authors:
302
- authors = ', '.join(citation.authors)
303
- else:
304
- authors = 'Unknown'
305
-
306
- year = citation.publication_date[:4] if citation.publication_date else 'n.d.'
307
-
308
- if citation.source_type == 'journal':
309
- return f"{authors} ({year}). {citation.title}. {citation.journal or 'Journal'}, {citation.volume or ''}{f'({citation.issue})' if citation.issue else ''}{f', {citation.pages}' if citation.pages else ''}."
310
- else:
311
- return f"{authors} ({year}). {citation.title}. Retrieved {citation.access_date} from {citation.url}"
312
-
313
- def _generate_mla_citation(self, citation: Citation) -> str:
314
- """Generate MLA format citation."""
315
- if citation.authors:
316
- authors = ', '.join(citation.authors)
317
- else:
318
- authors = 'Unknown'
319
-
320
- year = citation.publication_date[:4] if citation.publication_date else 'n.d.'
321
-
322
- return f'"{citation.title}." {citation.publisher or "Web"}, {year}, {citation.url}. Accessed {citation.access_date}.'
323
-
324
- def _generate_chicago_citation(self, citation: Citation) -> str:
325
- """Generate Chicago format citation."""
326
- if citation.authors:
327
- authors = ', '.join(citation.authors)
328
- else:
329
- authors = 'Unknown'
330
-
331
- year = citation.publication_date[:4] if citation.publication_date else 'n.d.'
332
-
333
- return f"{authors}. \"{citation.title}.\" {citation.publisher or 'Web'}, {year}. {citation.url}."
334
-
335
- def _generate_ieee_citation(self, citation: Citation) -> str:
336
- """Generate IEEE format citation."""
337
- if citation.authors:
338
- authors = ', '.join(citation.authors)
339
- else:
340
- authors = 'Unknown'
341
-
342
- year = citation.publication_date[:4] if citation.publication_date else 'n.d.'
343
-
344
- return f"{authors}, \"{citation.title},\" {citation.publisher or 'Web'}, {year}. [Online]. Available: {citation.url}"
345
-
346
- def _generate_apa_reference_list(self, citations: List[Citation]) -> str:
347
- """Generate APA format reference list."""
348
- references = []
349
- for citation in citations:
350
- references.append(self._generate_apa_citation(citation))
351
-
352
- return "\n\n".join(references)
353
-
354
- def _generate_mla_reference_list(self, citations: List[Citation]) -> str:
355
- """Generate MLA format reference list."""
356
- references = []
357
- for citation in citations:
358
- references.append(self._generate_mla_citation(citation))
359
-
360
- return "\n\n".join(references)
361
-
362
- def _generate_chicago_reference_list(self, citations: List[Citation]) -> str:
363
- """Generate Chicago format reference list."""
364
- references = []
365
- for citation in citations:
366
- references.append(self._generate_chicago_citation(citation))
367
-
368
- return "\n\n".join(references)
369
-
370
- def _generate_ieee_reference_list(self, citations: List[Citation]) -> str:
371
- """Generate IEEE format reference list."""
372
- references = []
373
- for citation in citations:
374
- references.append(self._generate_ieee_citation(citation))
375
-
376
- return "\n\n".join(references)
377
-
378
- def _generate_bibtex_export(self) -> str:
379
- """Generate BibTeX export."""
380
- bibtex_entries = []
381
- for citation in self.citations.values():
382
- entry = f"@misc{{{citation.id},\n"
383
- entry += f" title = {{{citation.title}}},\n"
384
- if citation.authors:
385
- entry += f" author = {{{' and '.join(citation.authors)}}},\n"
386
- if citation.publication_date:
387
- entry += f" year = {{{citation.publication_date[:4]}}},\n"
388
- entry += f" url = {{{citation.url}}},\n"
389
- entry += f" urldate = {{{citation.access_date}}}\n"
390
- entry += "}"
391
- bibtex_entries.append(entry)
392
-
393
- return "\n\n".join(bibtex_entries)
394
-
395
- def _generate_ris_export(self) -> str:
396
- """Generate RIS export."""
397
- ris_entries = []
398
- for citation in self.citations.values():
399
- entry = []
400
- entry.append("TY - GEN")
401
- entry.append(f"TI - {citation.title}")
402
- if citation.authors:
403
- for author in citation.authors:
404
- entry.append(f"AU - {author}")
405
- if citation.publication_date:
406
- entry.append(f"PY - {citation.publication_date[:4]}")
407
- entry.append(f"UR - {citation.url}")
408
- entry.append(f"ER -")
409
- ris_entries.append("\n".join(entry))
410
-
411
- return "\n\n".join(ris_entries)
412
-
413
- def _citation_to_dict(self, citation: Citation) -> Dict[str, Any]:
414
- """Convert citation to dictionary for JSON export."""
415
- return {
416
- 'id': citation.id,
417
- 'url': citation.url,
418
- 'title': citation.title,
419
- 'authors': citation.authors,
420
- 'publication_date': citation.publication_date,
421
- 'access_date': citation.access_date,
422
- 'source_type': citation.source_type,
423
- 'doi': citation.doi,
424
- 'journal': citation.journal,
425
- 'volume': citation.volume,
426
- 'issue': citation.issue,
427
- 'pages': citation.pages,
428
- 'publisher': citation.publisher,
429
- 'abstract': citation.abstract,
430
- 'keywords': citation.keywords,
431
- 'citation_count': citation.citation_count,
432
- 'relevance_score': citation.relevance_score
433
- }
434
-
435
- # Global instance
436
- citation_manager = CitationManager()