iflow-mcp-yuzongmin-sqlite-literature-management-fastmcp-mcp-server 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1628 @@
1
+ from pathlib import Path
2
+ import sqlite3
3
+ import os
4
+ import json
5
+ import uuid
6
+ from typing import List, Dict, Any, Optional, Tuple, Set, Union
7
+ from fastmcp import FastMCP
8
+ from datetime import datetime
9
+ import re
10
+
11
+ # Initialize FastMCP server
12
+ mcp = FastMCP("Source Manager")
13
+
14
+ # Path to Literature database - must be provided via SQLITE_DB_PATH environment variable
15
+ if 'SQLITE_DB_PATH' not in os.environ:
16
+ raise ValueError("SQLITE_DB_PATH environment variable must be set")
17
+ DB_PATH = Path(os.environ['SQLITE_DB_PATH'])
18
+
19
+
20
+ # Classes
21
+
22
+ class SourceIdentifiers:
23
+ """Defines valid identifier types for sources"""
24
+ VALID_TYPES = {
25
+ 'semantic_scholar', # For academic papers via Semantic Scholar
26
+ 'arxiv', # For arXiv papers
27
+ 'doi', # For papers with DOI
28
+ 'isbn', # For books
29
+ 'url' # For webpages, blogs, videos
30
+ }
31
+
32
+ class SourceTypes:
33
+ """Defines valid source types"""
34
+ VALID_TYPES = {'paper', 'webpage', 'book', 'video', 'blog'}
35
+
36
+ class SourceStatus:
37
+ """Defines valid source status values"""
38
+ VALID_STATUS = {'unread', 'reading', 'completed', 'archived'}
39
+
40
+ class SQLiteConnection:
41
+ """Context manager for SQLite database connections"""
42
+ def __init__(self, db_path: Path):
43
+ self.db_path = db_path
44
+ self.conn = None
45
+
46
+ def __enter__(self):
47
+ self.conn = sqlite3.connect(str(self.db_path))
48
+ self.conn.row_factory = sqlite3.Row
49
+ return self.conn
50
+
51
+ def __exit__(self, exc_type, exc_val, exc_tb):
52
+ if self.conn:
53
+ self.conn.close()
54
+
55
+ class EntityRelations:
56
+ """Defines valid relation types for entity links"""
57
+ VALID_TYPES = {
58
+ 'discusses',
59
+ 'introduces',
60
+ 'extends',
61
+ 'evaluates',
62
+ 'applies',
63
+ 'critiques'
64
+ }
65
+
66
+
67
+ # Helper Functions
68
+ def search_sources(
69
+ sources: List[Tuple[str, str, str, str]], # List of (title, type, identifier_type, identifier_value)
70
+ db_path: Path
71
+ ) -> List[Tuple[Optional[str], List[Dict]]]:
72
+ """
73
+ Bulk search for multiple sources simultaneously while maintaining consistent return format.
74
+
75
+ Args:
76
+ sources: List of tuples, each containing:
77
+ - title: Source title
78
+ - type: Source type
79
+ - identifier_type: Type of identifier
80
+ - identifier_value: Value of the identifier
81
+ db_path: Path to SQLite database
82
+
83
+ Returns:
84
+ List of tuples, each containing:
85
+ - UUID of exact match if found by identifier (else None)
86
+ - List of potential matches by title/type (empty if exact match found)
87
+ """
88
+ results = []
89
+
90
+ with SQLiteConnection(db_path) as conn:
91
+ cursor = conn.cursor()
92
+
93
+ # Process each source maintaining the same logic and return structure
94
+ for title, type_, identifier_type, identifier_value in sources:
95
+ # Validate inputs (just like in original)
96
+ if type_ not in SourceTypes.VALID_TYPES:
97
+ raise ValueError(f"Invalid source type. Must be one of: {SourceTypes.VALID_TYPES}")
98
+ if identifier_type not in SourceIdentifiers.VALID_TYPES:
99
+ raise ValueError(f"Invalid identifier type. Must be one of: {SourceIdentifiers.VALID_TYPES}")
100
+
101
+ # First try exact identifier match
102
+ cursor.execute("""
103
+ SELECT id FROM sources
104
+ WHERE type = ? AND
105
+ json_extract(identifiers, ?) = ?
106
+ """, [
107
+ type_,
108
+ f"$.{identifier_type}",
109
+ identifier_value
110
+ ])
111
+
112
+ result = cursor.fetchone()
113
+ if result:
114
+ # If exact match found, append (uuid, empty list)
115
+ results.append((result['id'], []))
116
+ continue
117
+
118
+ # If no exact match, try fuzzy title match
119
+ cursor.execute("""
120
+ SELECT id, title, identifiers
121
+ FROM sources
122
+ WHERE type = ? AND
123
+ LOWER(title) LIKE ?
124
+ """, [
125
+ type_,
126
+ f"%{title.lower()}%"
127
+ ])
128
+
129
+ potential_matches = [
130
+ {
131
+ 'id': row['id'],
132
+ 'title': row['title'],
133
+ 'identifiers': json.loads(row['identifiers'])
134
+ }
135
+ for row in cursor.fetchall()
136
+ ]
137
+
138
+ # Append (None, potential_matches)
139
+ results.append((None, potential_matches))
140
+
141
+ return results
142
+
143
+ def get_sources_details(uuids: Union[str, List[str]], db_path: Path) -> List[Dict[str, Any]]:
144
+ """
145
+ Get complete information about multiple sources by their UUIDs.
146
+
147
+ Args:
148
+ uuids: Single UUID string or list of source UUIDs
149
+ db_path: Path to SQLite database
150
+
151
+ Returns:
152
+ List of dictionaries, each containing source information:
153
+ - Basic info (id, title, type, status, identifiers)
154
+ - Notes (list of {title, content, created_at})
155
+ - Entity links (list of {entity_name, relation_type, notes})
156
+
157
+ Raises:
158
+ ValueError: If any source UUID is not found
159
+ """
160
+ # Handle single UUID case
161
+ if isinstance(uuids, str):
162
+ uuids = [uuids]
163
+
164
+ if not uuids:
165
+ return []
166
+
167
+ with SQLiteConnection(db_path) as conn:
168
+ cursor = conn.cursor()
169
+
170
+ # Get basic source info for all UUIDs in one query
171
+ placeholders = ','.join('?' * len(uuids))
172
+ cursor.execute(f"""
173
+ SELECT id, title, type, status, identifiers
174
+ FROM sources
175
+ WHERE id IN ({placeholders})
176
+ """, uuids)
177
+
178
+ sources = cursor.fetchall()
179
+ if len(sources) != len(uuids):
180
+ found_ids = {source['id'] for source in sources}
181
+ missing_ids = [uuid for uuid in uuids if uuid not in found_ids]
182
+ raise ValueError(f"Sources not found for UUIDs: {', '.join(missing_ids)}")
183
+
184
+ # Initialize results dictionary
185
+ results = []
186
+ for source in sources:
187
+ source_data = {
188
+ 'id': source['id'],
189
+ 'title': source['title'],
190
+ 'type': source['type'],
191
+ 'status': source['status'],
192
+ 'identifiers': json.loads(source['identifiers'])
193
+ }
194
+ results.append(source_data)
195
+
196
+ # Get notes for all sources in one query
197
+ cursor.execute(f"""
198
+ SELECT source_id, note_title, content, created_at
199
+ FROM source_notes
200
+ WHERE source_id IN ({placeholders})
201
+ ORDER BY created_at DESC
202
+ """, uuids)
203
+
204
+ # Group notes by source_id
205
+ notes_by_source = {}
206
+ for row in cursor.fetchall():
207
+ source_id = row['source_id']
208
+ if source_id not in notes_by_source:
209
+ notes_by_source[source_id] = []
210
+ notes_by_source[source_id].append({
211
+ 'title': row['note_title'],
212
+ 'content': row['content'],
213
+ 'created_at': row['created_at']
214
+ })
215
+
216
+ # Get entity links for all sources in one query
217
+ cursor.execute(f"""
218
+ SELECT source_id, entity_name, relation_type, notes
219
+ FROM source_entity_links
220
+ WHERE source_id IN ({placeholders})
221
+ """, uuids)
222
+
223
+ # Group entity links by source_id
224
+ links_by_source = {}
225
+ for row in cursor.fetchall():
226
+ source_id = row['source_id']
227
+ if source_id not in links_by_source:
228
+ links_by_source[source_id] = []
229
+ links_by_source[source_id].append({
230
+ 'entity_name': row['entity_name'],
231
+ 'relation_type': row['relation_type'],
232
+ 'notes': row['notes']
233
+ })
234
+
235
+ # Add notes and entity links to each source
236
+ for source_data in results:
237
+ source_id = source_data['id']
238
+ source_data['notes'] = notes_by_source.get(source_id, [])
239
+ source_data['entity_links'] = links_by_source.get(source_id, [])
240
+
241
+ return results
242
+
243
+
244
+
245
+
246
+
247
+ # Original Tools of Sqlite DB
248
+
249
+ @mcp.tool()
250
+ def read_query(
251
+ query: str,
252
+ params: Optional[List[Any]] = None,
253
+ fetch_all: bool = True,
254
+ row_limit: int = 1000
255
+ ) -> List[Dict[str, Any]]:
256
+ """Execute a query on the Literature database.
257
+
258
+ Args:
259
+ query: SELECT SQL query to execute
260
+ params: Optional list of parameters for the query
261
+ fetch_all: If True, fetches all results. If False, fetches one row.
262
+ row_limit: Maximum number of rows to return (default 1000)
263
+
264
+ Returns:
265
+ List of dictionaries containing the query results
266
+ """
267
+ if not DB_PATH.exists():
268
+ raise FileNotFoundError(f"Literature database not found at: {DB_PATH}")
269
+
270
+ query = query.strip()
271
+ if query.endswith(';'):
272
+ query = query[:-1].strip()
273
+
274
+ def contains_multiple_statements(sql: str) -> bool:
275
+ in_single_quote = False
276
+ in_double_quote = False
277
+ for char in sql:
278
+ if char == "'" and not in_double_quote:
279
+ in_single_quote = not in_single_quote
280
+ elif char == '"' and not in_single_quote:
281
+ in_double_quote = not in_double_quote
282
+ elif char == ';' and not in_single_quote and not in_double_quote:
283
+ return True
284
+ return False
285
+
286
+ if contains_multiple_statements(query):
287
+ raise ValueError("Multiple SQL statements are not allowed")
288
+
289
+ query_lower = query.lower()
290
+ if not any(query_lower.startswith(prefix) for prefix in ('select', 'with')):
291
+ raise ValueError("Only SELECT queries (including WITH clauses) are allowed for safety")
292
+
293
+ params = params or []
294
+
295
+ with SQLiteConnection(DB_PATH) as conn:
296
+ cursor = conn.cursor()
297
+
298
+ try:
299
+ if 'limit' not in query_lower:
300
+ query = f"{query} LIMIT {row_limit}"
301
+
302
+ cursor.execute(query, params)
303
+
304
+ if fetch_all:
305
+ results = cursor.fetchall()
306
+ else:
307
+ results = [cursor.fetchone()]
308
+
309
+ return [dict(row) for row in results if row is not None]
310
+
311
+ except sqlite3.Error as e:
312
+ raise ValueError(f"SQLite error: {str(e)}")
313
+
314
+ @mcp.tool()
315
+ def list_tables() -> List[str]:
316
+ """List all tables in the Literature database.
317
+
318
+ Returns:
319
+ List of table names in the database
320
+ """
321
+ if not DB_PATH.exists():
322
+ raise FileNotFoundError(f"Literature database not found at: {DB_PATH}")
323
+
324
+ with SQLiteConnection(DB_PATH) as conn:
325
+ cursor = conn.cursor()
326
+
327
+ try:
328
+ cursor.execute("""
329
+ SELECT name FROM sqlite_master
330
+ WHERE type='table'
331
+ ORDER BY name
332
+ """)
333
+
334
+ return [row['name'] for row in cursor.fetchall()]
335
+
336
+ except sqlite3.Error as e:
337
+ raise ValueError(f"SQLite error: {str(e)}")
338
+
339
+ @mcp.tool()
340
+ def describe_table(table_name: str) -> List[Dict[str, str]]:
341
+ """Get detailed information about a table's schema.
342
+
343
+ Args:
344
+ table_name: Name of the table to describe
345
+
346
+ Returns:
347
+ List of dictionaries containing column information:
348
+ - name: Column name
349
+ - type: Column data type
350
+ - notnull: Whether the column can contain NULL values
351
+ - dflt_value: Default value for the column
352
+ - pk: Whether the column is part of the primary key
353
+ """
354
+ if not DB_PATH.exists():
355
+ raise FileNotFoundError(f"Literature database not found at: {DB_PATH}")
356
+
357
+ with SQLiteConnection(DB_PATH) as conn:
358
+ cursor = conn.cursor()
359
+
360
+ try:
361
+ # Verify table exists
362
+ cursor.execute("""
363
+ SELECT name FROM sqlite_master
364
+ WHERE type='table' AND name=?
365
+ """, [table_name])
366
+
367
+ if not cursor.fetchone():
368
+ raise ValueError(f"Table '{table_name}' does not exist")
369
+
370
+ # Get table schema
371
+ cursor.execute(f"PRAGMA table_info({table_name})")
372
+ columns = cursor.fetchall()
373
+
374
+ return [dict(row) for row in columns]
375
+
376
+ except sqlite3.Error as e:
377
+ raise ValueError(f"SQLite error: {str(e)}")
378
+
379
+ @mcp.tool()
380
+ def get_table_stats(table_name: str) -> Dict[str, Any]:
381
+ """Get statistics about a table, including row count and storage info.
382
+
383
+ Args:
384
+ table_name: Name of the table to analyze
385
+
386
+ Returns:
387
+ Dictionary containing table statistics
388
+ """
389
+ if not DB_PATH.exists():
390
+ raise FileNotFoundError(f"Literature database not found at: {DB_PATH}")
391
+
392
+ with SQLiteConnection(DB_PATH) as conn:
393
+ cursor = conn.cursor()
394
+ try:
395
+ # Verify table exists
396
+ cursor.execute("""
397
+ SELECT name FROM sqlite_master
398
+ WHERE type='table' AND name=?
399
+ """, [table_name])
400
+
401
+ if not cursor.fetchone():
402
+ raise ValueError(f"Table '{table_name}' does not exist")
403
+
404
+ # Get row count
405
+ cursor.execute(f"SELECT COUNT(*) as count FROM {table_name}")
406
+ row_count = cursor.fetchone()['count']
407
+
408
+ # Get storage info
409
+ cursor.execute("PRAGMA page_size")
410
+ page_size = cursor.fetchone()[0]
411
+
412
+ cursor.execute(f"PRAGMA table_info({table_name})")
413
+ columns = len(cursor.fetchall())
414
+
415
+ return {
416
+ "table_name": table_name,
417
+ "row_count": row_count,
418
+ "column_count": columns,
419
+ "page_size": page_size
420
+ }
421
+
422
+ except sqlite3.Error as e:
423
+ raise ValueError(f"SQLite error: {str(e)}")
424
+
425
+ @mcp.tool()
426
+ def get_database_info() -> Dict[str, Any]:
427
+ """Get overall database information and statistics.
428
+
429
+ Returns:
430
+ Dictionary containing database statistics and information
431
+ """
432
+ if not DB_PATH.exists():
433
+ raise FileNotFoundError(f"Literature database not found at: {DB_PATH}")
434
+
435
+ with SQLiteConnection(DB_PATH) as conn:
436
+ cursor = conn.cursor()
437
+ try:
438
+ # Get database size
439
+ db_size = os.path.getsize(DB_PATH)
440
+
441
+ # Get table counts
442
+ cursor.execute("""
443
+ SELECT COUNT(*) as count
444
+ FROM sqlite_master
445
+ WHERE type='table' AND name NOT LIKE 'sqlite_%'
446
+ """)
447
+ table_count = cursor.fetchone()['count']
448
+
449
+ # Get SQLite version
450
+ cursor.execute("SELECT sqlite_version()")
451
+ version = cursor.fetchone()[0]
452
+
453
+ # Get table statistics
454
+ tables = {}
455
+ cursor.execute("""
456
+ SELECT name
457
+ FROM sqlite_master
458
+ WHERE type='table' AND name NOT LIKE 'sqlite_%'
459
+ """)
460
+
461
+ for row in cursor.fetchall():
462
+ table_name = row['name']
463
+ cursor.execute(f"SELECT COUNT(*) as count FROM {table_name}")
464
+ tables[table_name] = cursor.fetchone()['count']
465
+
466
+ return {
467
+ "database_size_bytes": db_size,
468
+ "table_count": table_count,
469
+ "sqlite_version": version,
470
+ "table_row_counts": tables,
471
+ "path": str(DB_PATH)
472
+ }
473
+
474
+ except sqlite3.Error as e:
475
+ raise ValueError(f"SQLite error: {str(e)}")
476
+
477
+ @mcp.tool()
478
+ def vacuum_database() -> Dict[str, Any]:
479
+ """Optimize the database by running VACUUM command.
480
+ This rebuilds the database file to reclaim unused space.
481
+
482
+ Returns:
483
+ Dictionary containing the operation results
484
+ """
485
+ if not DB_PATH.exists():
486
+ raise FileNotFoundError(f"Literature database not found at: {DB_PATH}")
487
+
488
+ with SQLiteConnection(DB_PATH) as conn:
489
+ cursor = conn.cursor()
490
+ try:
491
+ # Get size before vacuum
492
+ size_before = os.path.getsize(DB_PATH)
493
+
494
+ # Run vacuum
495
+ cursor.execute("VACUUM")
496
+
497
+ # Get size after vacuum
498
+ size_after = os.path.getsize(DB_PATH)
499
+
500
+ return {
501
+ "status": "success",
502
+ "size_before_bytes": size_before,
503
+ "size_after_bytes": size_after,
504
+ "space_saved_bytes": size_before - size_after
505
+ }
506
+
507
+ except sqlite3.Error as e:
508
+ raise ValueError(f"SQLite error: {str(e)}")
509
+
510
+
511
+
512
+
513
+
514
+ # Source Management Tools:
515
+
516
+ @mcp.tool()
517
+ def add_sources(
518
+ sources: List[Tuple[str, str, str, str, Optional[Dict[str, str]]]] # [(title, type, identifier_type, identifier_value, initial_note)]
519
+ ) -> List[Dict[str, Any]]:
520
+ """Add multiple new sources with duplicate checking in a single transaction.
521
+
522
+ Args:
523
+ sources: List of tuples, each containing:
524
+ - title: Source title
525
+ - type: Source type (paper, webpage, book, video, blog)
526
+ - identifier_type: Type of identifier
527
+ - identifier_value: Value of the identifier
528
+ - initial_note: Optional dict with 'title' and 'content' keys
529
+
530
+ Returns:
531
+ List of dictionaries containing operation results for each source:
532
+ - On success: {"status": "success", "source": source_details}
533
+ - On duplicate: {"status": "error", "message": "...", "existing_source": details}
534
+ - On potential duplicate: {"status": "error", "message": "...", "matches": [...]}
535
+ """
536
+ if not DB_PATH.exists():
537
+ raise FileNotFoundError(f"Database not found at: {DB_PATH}")
538
+
539
+ if not sources:
540
+ return []
541
+
542
+ # Prepare search inputs for bulk search
543
+ search_inputs = [
544
+ (title, type_, id_type, id_value)
545
+ for title, type_, id_type, id_value, _ in sources
546
+ ]
547
+
548
+ # Bulk search for existing sources
549
+ search_results = search_sources(search_inputs, DB_PATH)
550
+
551
+ # Process results and prepare new sources
552
+ results = []
553
+ sources_to_add = []
554
+ notes_to_add = []
555
+
556
+ for (title, type_, id_type, id_value, initial_note), (uuid_str, matches) in zip(sources, search_results):
557
+ if uuid_str:
558
+ # Source already exists - get its details
559
+ try:
560
+ existing_source = get_sources_details(uuid_str, DB_PATH)[0]
561
+ results.append({
562
+ "status": "error",
563
+ "message": "Source already exists",
564
+ "existing_source": existing_source
565
+ })
566
+ except Exception as e:
567
+ results.append({
568
+ "status": "error",
569
+ "message": f"Error retrieving existing source: {str(e)}"
570
+ })
571
+ continue
572
+
573
+ if matches:
574
+ # Potential duplicates found
575
+ results.append({
576
+ "status": "error",
577
+ "message": "Potential duplicates found. Please verify or use add_identifier if these are the same source.",
578
+ "matches": matches
579
+ })
580
+ continue
581
+
582
+ # New source to add - using UUID module explicitly
583
+ new_id = str(uuid.uuid4()) # Generate new UUID using the imported module
584
+ identifiers = {id_type: id_value}
585
+
586
+ sources_to_add.append({
587
+ 'id': new_id,
588
+ 'title': title,
589
+ 'type': type_,
590
+ 'identifiers': json.dumps(identifiers)
591
+ })
592
+
593
+ if initial_note:
594
+ if not all(k in initial_note for k in ('title', 'content')):
595
+ results.append({
596
+ "status": "error",
597
+ "message": f"Invalid initial note format for source '{title}'"
598
+ })
599
+ continue
600
+
601
+ notes_to_add.append({
602
+ 'source_id': new_id,
603
+ 'note_title': initial_note['title'],
604
+ 'content': initial_note['content']
605
+ })
606
+
607
+ # Add placeholder for success result to be filled after insertion
608
+ results.append({
609
+ "status": "pending",
610
+ "source_id": new_id
611
+ })
612
+
613
+ # If we have any sources to add, do it in a single transaction
614
+ if sources_to_add:
615
+ with SQLiteConnection(DB_PATH) as conn:
616
+ cursor = conn.cursor()
617
+ try:
618
+ # Add all new sources
619
+ cursor.executemany("""
620
+ INSERT INTO sources (id, title, type, identifiers)
621
+ VALUES (:id, :title, :type, :identifiers)
622
+ """, sources_to_add)
623
+
624
+ # Add all initial notes
625
+ if notes_to_add:
626
+ cursor.executemany("""
627
+ INSERT INTO source_notes (source_id, note_title, content)
628
+ VALUES (:source_id, :note_title, :content)
629
+ """, notes_to_add)
630
+
631
+ conn.commit()
632
+
633
+ # Get full details for all added sources
634
+ added_source_ids = [s['id'] for s in sources_to_add]
635
+ added_sources = get_sources_details(added_source_ids, DB_PATH)
636
+
637
+ # Update results with full source details
638
+ for i, result in enumerate(results):
639
+ if result.get("status") == "pending":
640
+ source_id = result["source_id"]
641
+ source_details = next(s for s in added_sources if s['id'] == source_id)
642
+ results[i] = {
643
+ "status": "success",
644
+ "source": source_details
645
+ }
646
+
647
+ except sqlite3.Error as e:
648
+ conn.rollback()
649
+ raise ValueError(f"Database error: {str(e)}")
650
+
651
+ return results
652
+
653
+ @mcp.tool()
654
+ def add_notes(
655
+ source_notes: List[Tuple[str, str, str, str, str, str]] # [(title, type, identifier_type, identifier_value, note_title, note_content)]
656
+ ) -> List[Dict[str, Any]]:
657
+ """Add notes to multiple sources in a single transaction.
658
+
659
+ Args:
660
+ source_notes: List of tuples, each containing:
661
+ - title: Source title
662
+ - type: Source type
663
+ - identifier_type: Type of identifier
664
+ - identifier_value: Value of the identifier
665
+ - note_title: Title for the new note
666
+ - note_content: Content of the note
667
+
668
+ Returns:
669
+ List of dictionaries containing operation results for each note addition:
670
+ - On success: {"status": "success", "source": source_details}
671
+ - On source not found: {"status": "error", "message": "Source not found"}
672
+ - On ambiguous source: {"status": "error", "message": "...", "matches": [...]}
673
+ - On duplicate note: {"status": "error", "message": "Note with this title already exists for this source"}
674
+ """
675
+ if not DB_PATH.exists():
676
+ raise FileNotFoundError(f"Database not found at: {DB_PATH}")
677
+
678
+ if not source_notes:
679
+ return []
680
+
681
+ # Prepare search inputs for bulk source lookup
682
+ search_inputs = [
683
+ (title, type_, id_type, id_value)
684
+ for title, type_, id_type, id_value, _, _ in source_notes
685
+ ]
686
+
687
+ # Bulk search for sources
688
+ search_results = search_sources(search_inputs, DB_PATH)
689
+
690
+ # Process results and prepare notes
691
+ results = []
692
+ notes_to_add = []
693
+ source_ids = []
694
+
695
+ for (title, type_, id_type, id_value, note_title, note_content), (uuid_str, matches) in zip(source_notes, search_results):
696
+ if not uuid_str:
697
+ if matches:
698
+ results.append({
699
+ "status": "error",
700
+ "message": "Multiple potential matches found. Please verify the source.",
701
+ "matches": matches
702
+ })
703
+ else:
704
+ results.append({
705
+ "status": "error",
706
+ "message": "Source not found"
707
+ })
708
+ continue
709
+
710
+ notes_to_add.append({
711
+ 'source_id': uuid_str,
712
+ 'note_title': note_title,
713
+ 'content': note_content
714
+ })
715
+ source_ids.append(uuid_str)
716
+ results.append({
717
+ "status": "pending",
718
+ "source_id": uuid_str
719
+ })
720
+
721
+ if notes_to_add:
722
+ with SQLiteConnection(DB_PATH) as conn:
723
+ cursor = conn.cursor()
724
+ try:
725
+ # Check for duplicate note titles
726
+ placeholders = ','.join('?' * len(notes_to_add))
727
+ cursor.execute(f"""
728
+ SELECT source_id, note_title
729
+ FROM source_notes
730
+ WHERE (source_id, note_title) IN
731
+ ({','.join(f'(?,?)' for _ in notes_to_add)})
732
+ """, [
733
+ val for note in notes_to_add
734
+ for val in (note['source_id'], note['note_title'])
735
+ ])
736
+
737
+ # Track which notes already exist
738
+ existing_notes = {
739
+ (row['source_id'], row['note_title'])
740
+ for row in cursor.fetchall()
741
+ }
742
+
743
+ # Filter out notes that already exist
744
+ filtered_notes = []
745
+ for i, note in enumerate(notes_to_add):
746
+ if (note['source_id'], note['note_title']) in existing_notes:
747
+ results[i] = {
748
+ "status": "error",
749
+ "message": "Note with this title already exists for this source"
750
+ }
751
+ else:
752
+ filtered_notes.append(note)
753
+
754
+ # Add new notes
755
+ if filtered_notes:
756
+ cursor.executemany("""
757
+ INSERT INTO source_notes (source_id, note_title, content)
758
+ VALUES (:source_id, :note_title, :content)
759
+ """, filtered_notes)
760
+
761
+ conn.commit()
762
+
763
+ # Get updated source details
764
+ source_details = get_sources_details(list(set(source_ids)), DB_PATH)
765
+
766
+ # Update success results
767
+ for i, result in enumerate(results):
768
+ if result.get("status") == "pending":
769
+ source_id = result["source_id"]
770
+ source_detail = next(s for s in source_details if s['id'] == source_id)
771
+ results[i] = {
772
+ "status": "success",
773
+ "source": source_detail
774
+ }
775
+
776
+ except sqlite3.Error as e:
777
+ conn.rollback()
778
+ raise ValueError(f"Database error: {str(e)}")
779
+
780
+ return results
781
+
782
+ @mcp.tool()
783
+ def update_status(
784
+ source_status: List[Tuple[str, str, str, str, str]] # [(title, type, identifier_type, identifier_value, new_status)]
785
+ ) -> List[Dict[str, Any]]:
786
+ """Update status for multiple sources in a single transaction.
787
+
788
+ Args:
789
+ source_status: List of tuples, each containing:
790
+ - title: Source title
791
+ - type: Source type
792
+ - identifier_type: Type of identifier
793
+ - identifier_value: Value of the identifier
794
+ - new_status: New status value
795
+
796
+ Returns:
797
+ List of dictionaries containing operation results for each status update:
798
+ - On success: {"status": "success", "source": source_details}
799
+ - On source not found: {"status": "error", "message": "Source not found"}
800
+ - On ambiguous source: {"status": "error", "message": "...", "matches": [...]}
801
+ - On invalid status: {"status": "error", "message": "Invalid status. Must be one of: ..."}
802
+ """
803
+ if not DB_PATH.exists():
804
+ raise FileNotFoundError(f"Database not found at: {DB_PATH}")
805
+
806
+ if not source_status:
807
+ return []
808
+
809
+ # Validate all status values first
810
+ for _, _, _, _, status in source_status:
811
+ if status not in SourceStatus.VALID_STATUS:
812
+ raise ValueError(f"Invalid status. Must be one of: {SourceStatus.VALID_STATUS}")
813
+
814
+ # Prepare search inputs for bulk source lookup
815
+ search_inputs = [
816
+ (title, type_, id_type, id_value)
817
+ for title, type_, id_type, id_value, _ in source_status
818
+ ]
819
+
820
+ # Bulk search for sources
821
+ search_results = search_sources(search_inputs, DB_PATH)
822
+
823
+ # Process results and prepare updates
824
+ results = []
825
+ updates_to_make = []
826
+ source_ids = []
827
+
828
+ for (title, type_, id_type, id_value, new_status), (uuid_str, matches) in zip(source_status, search_results):
829
+ if not uuid_str:
830
+ if matches:
831
+ results.append({
832
+ "status": "error",
833
+ "message": "Multiple potential matches found. Please verify the source.",
834
+ "matches": matches
835
+ })
836
+ else:
837
+ results.append({
838
+ "status": "error",
839
+ "message": "Source not found"
840
+ })
841
+ continue
842
+
843
+ updates_to_make.append({
844
+ 'id': uuid_str,
845
+ 'status': new_status
846
+ })
847
+ source_ids.append(uuid_str)
848
+ results.append({
849
+ "status": "pending",
850
+ "source_id": uuid_str
851
+ })
852
+
853
+ if updates_to_make:
854
+ with SQLiteConnection(DB_PATH) as conn:
855
+ cursor = conn.cursor()
856
+ try:
857
+ # Update all statuses
858
+ cursor.executemany("""
859
+ UPDATE sources
860
+ SET status = :status
861
+ WHERE id = :id
862
+ """, updates_to_make)
863
+
864
+ conn.commit()
865
+
866
+ # Get updated source details
867
+ source_details = get_sources_details(list(set(source_ids)), DB_PATH)
868
+
869
+ # Update results
870
+ for i, result in enumerate(results):
871
+ if result.get("status") == "pending":
872
+ source_id = result["source_id"]
873
+ source_detail = next(s for s in source_details if s['id'] == source_id)
874
+ results[i] = {
875
+ "status": "success",
876
+ "source": source_detail
877
+ }
878
+
879
+ except sqlite3.Error as e:
880
+ conn.rollback()
881
+ raise ValueError(f"Database error: {str(e)}")
882
+
883
+ return results
884
+
885
+ @mcp.tool()
886
+ def add_identifiers(
887
+ source_identifiers: List[Tuple[str, str, str, str, str, str]] # [(title, type, current_id_type, current_id_value, new_id_type, new_id_value)]
888
+ ) -> List[Dict[str, Any]]:
889
+ """Add new identifiers to multiple sources in a single transaction.
890
+
891
+ Args:
892
+ source_identifiers: List of tuples, each containing:
893
+ - title: Source title
894
+ - type: Source type
895
+ - current_identifier_type: Current identifier type
896
+ - current_identifier_value: Current identifier value
897
+ - new_identifier_type: New identifier type to add
898
+ - new_identifier_value: New identifier value to add
899
+
900
+ Returns:
901
+ List of dictionaries containing operation results for each identifier addition:
902
+ - On success: {"status": "success", "source": source_details}
903
+ - On source not found: {"status": "error", "message": "Source not found"}
904
+ - On ambiguous source: {"status": "error", "message": "...", "matches": [...]}
905
+ - On duplicate identifier: {"status": "error", "message": "...", "existing_source": details}
906
+ - On invalid identifier type: {"status": "error", "message": "Invalid identifier type. Must be one of: ..."}
907
+ """
908
+ if not DB_PATH.exists():
909
+ raise FileNotFoundError(f"Database not found at: {DB_PATH}")
910
+
911
+ if not source_identifiers:
912
+ return []
913
+
914
+ # Validate all new identifier types first
915
+ for _, _, _, _, new_type, _ in source_identifiers:
916
+ if new_type not in SourceIdentifiers.VALID_TYPES:
917
+ raise ValueError(f"Invalid new identifier type. Must be one of: {SourceIdentifiers.VALID_TYPES}")
918
+
919
+ # Prepare search inputs for bulk source lookup
920
+ search_inputs = [
921
+ (title, type_, current_type, current_value)
922
+ for title, type_, current_type, current_value, _, _ in source_identifiers
923
+ ]
924
+
925
+ # Bulk search for sources
926
+ search_results = search_sources(search_inputs, DB_PATH)
927
+
928
+ # Process results and prepare updates
929
+ results = []
930
+ updates_to_make = []
931
+ source_ids = []
932
+
933
+ # First pass: collect all sources and validate new identifiers don't exist
934
+ new_identifier_checks = [
935
+ (type_, new_type, new_value)
936
+ for _, type_, _, _, new_type, new_value in source_identifiers
937
+ ]
938
+ new_id_search_results = search_sources([
939
+ (f"Check {i}", type_, id_type, id_value)
940
+ for i, (type_, id_type, id_value) in enumerate(new_identifier_checks)
941
+ ], DB_PATH)
942
+
943
+ # Create mapping of new identifiers to existing sources
944
+ existing_new_ids = {
945
+ (type_, id_type, id_value): uuid_str
946
+ for (_, type_, _, _, id_type, id_value), (uuid_str, _)
947
+ in zip(source_identifiers, new_id_search_results)
948
+ if uuid_str
949
+ }
950
+
951
+ for (title, type_, current_type, current_value, new_type, new_value), (uuid_str, matches) in zip(source_identifiers, search_results):
952
+ if not uuid_str:
953
+ if matches:
954
+ results.append({
955
+ "status": "error",
956
+ "message": "Multiple potential matches found. Please verify the source.",
957
+ "matches": matches
958
+ })
959
+ else:
960
+ results.append({
961
+ "status": "error",
962
+ "message": "Source not found"
963
+ })
964
+ continue
965
+
966
+ # Check if new identifier exists on a different source
967
+ existing_source = existing_new_ids.get((type_, new_type, new_value))
968
+ if existing_source and existing_source != uuid_str:
969
+ try:
970
+ existing_details = get_sources_details(existing_source, DB_PATH)[0]
971
+ results.append({
972
+ "status": "error",
973
+ "message": "New identifier already exists on a different source",
974
+ "existing_source": existing_details
975
+ })
976
+ except Exception as e:
977
+ results.append({
978
+ "status": "error",
979
+ "message": f"Error retrieving existing source: {str(e)}"
980
+ })
981
+ continue
982
+
983
+ updates_to_make.append({
984
+ 'id': uuid_str,
985
+ 'new_type': new_type,
986
+ 'new_value': new_value
987
+ })
988
+ source_ids.append(uuid_str)
989
+ results.append({
990
+ "status": "pending",
991
+ "source_id": uuid_str
992
+ })
993
+
994
+ if updates_to_make:
995
+ with SQLiteConnection(DB_PATH) as conn:
996
+ cursor = conn.cursor()
997
+ try:
998
+ # Update identifiers one by one (since we need to merge JSON)
999
+ for update in updates_to_make:
1000
+ cursor.execute("""
1001
+ UPDATE sources
1002
+ SET identifiers = json_set(
1003
+ identifiers,
1004
+ :path,
1005
+ :value
1006
+ )
1007
+ WHERE id = :id
1008
+ """, {
1009
+ 'id': update['id'],
1010
+ 'path': f"$.{update['new_type']}",
1011
+ 'value': update['new_value']
1012
+ })
1013
+
1014
+ conn.commit()
1015
+
1016
+ # Get updated source details
1017
+ source_details = get_sources_details(list(set(source_ids)), DB_PATH)
1018
+
1019
+ # Update results
1020
+ for i, result in enumerate(results):
1021
+ if result.get("status") == "pending":
1022
+ source_id = result["source_id"]
1023
+ source_detail = next(s for s in source_details if s['id'] == source_id)
1024
+ results[i] = {
1025
+ "status": "success",
1026
+ "source": source_detail
1027
+ }
1028
+
1029
+ except sqlite3.Error as e:
1030
+ conn.rollback()
1031
+ raise ValueError(f"Database error: {str(e)}")
1032
+
1033
+ return results
1034
+
1035
+
1036
+
1037
+
1038
+ # Entity Management Tools:
1039
+
1040
+ @mcp.tool()
1041
+ def link_to_entities(
1042
+ source_entity_links: List[Tuple[str, str, str, str, str, str, Optional[str]]]
1043
+ ) -> List[Dict[str, Any]]:
1044
+ """Link multiple sources to entities in the knowledge graph.
1045
+
1046
+ Args:
1047
+ source_entity_links: List of tuples, each containing:
1048
+ - title: Source title
1049
+ - type: Source type (paper, webpage, book, video, blog)
1050
+ - identifier_type: Type of identifier (semantic_scholar, arxiv, doi, isbn, url)
1051
+ - identifier_value: Value of the identifier
1052
+ - entity_name: Name of the entity to link to
1053
+ - relation_type: Type of relationship (discusses, introduces, extends, evaluates, applies, critiques)
1054
+ - notes: Optional notes explaining the relationship
1055
+
1056
+ Returns:
1057
+ List of operation results, each containing:
1058
+ {
1059
+ "status": "success" | "error",
1060
+ "message": Error message if status is "error",
1061
+ "source": Source details if status is "success",
1062
+ "matches": List of potential matches if ambiguous source found
1063
+ }
1064
+ """
1065
+ if not DB_PATH.exists():
1066
+ raise FileNotFoundError(f"Database not found at: {DB_PATH}")
1067
+
1068
+ if not source_entity_links:
1069
+ return []
1070
+
1071
+ # Validate relation types first
1072
+ for _, _, _, _, _, relation_type, _ in source_entity_links:
1073
+ if relation_type not in EntityRelations.VALID_TYPES:
1074
+ raise ValueError(f"Invalid relation type. Must be one of: {EntityRelations.VALID_TYPES}")
1075
+
1076
+ # Prepare search inputs for bulk source lookup
1077
+ search_inputs = [
1078
+ (title, type_, id_type, id_value)
1079
+ for title, type_, id_type, id_value, _, _, _ in source_entity_links
1080
+ ]
1081
+
1082
+ # Bulk search for sources
1083
+ search_results = search_sources(search_inputs, DB_PATH)
1084
+
1085
+ # Process results and prepare links
1086
+ results = []
1087
+ links_to_add = []
1088
+ source_ids = []
1089
+
1090
+ for (title, type_, id_type, id_value, entity_name, relation_type, notes), (uuid_str, matches) in zip(source_entity_links, search_results):
1091
+ if not uuid_str:
1092
+ if matches:
1093
+ results.append({
1094
+ "status": "error",
1095
+ "message": "Multiple potential matches found. Please verify the source.",
1096
+ "matches": matches
1097
+ })
1098
+ else:
1099
+ results.append({
1100
+ "status": "error",
1101
+ "message": "Source not found"
1102
+ })
1103
+ continue
1104
+
1105
+ links_to_add.append({
1106
+ 'source_id': uuid_str,
1107
+ 'entity_name': entity_name,
1108
+ 'relation_type': relation_type,
1109
+ 'notes': notes
1110
+ })
1111
+ source_ids.append(uuid_str)
1112
+ results.append({
1113
+ "status": "pending",
1114
+ "source_id": uuid_str
1115
+ })
1116
+
1117
+ if links_to_add:
1118
+ with SQLiteConnection(DB_PATH) as conn:
1119
+ cursor = conn.cursor()
1120
+ try:
1121
+ # Check for existing links
1122
+ placeholders = ','.join('(?,?)' for _ in links_to_add)
1123
+ cursor.execute(f"""
1124
+ SELECT source_id, entity_name
1125
+ FROM source_entity_links
1126
+ WHERE (source_id, entity_name) IN ({placeholders})
1127
+ """, [
1128
+ val for link in links_to_add
1129
+ for val in (link['source_id'], link['entity_name'])
1130
+ ])
1131
+
1132
+ # Track existing links
1133
+ existing_links = {
1134
+ (row['source_id'], row['entity_name'])
1135
+ for row in cursor.fetchall()
1136
+ }
1137
+
1138
+ # Filter out existing links
1139
+ filtered_links = []
1140
+ for i, link in enumerate(links_to_add):
1141
+ if (link['source_id'], link['entity_name']) in existing_links:
1142
+ results[i] = {
1143
+ "status": "error",
1144
+ "message": "Link already exists between this source and entity"
1145
+ }
1146
+ else:
1147
+ filtered_links.append(link)
1148
+
1149
+ # Add new links
1150
+ if filtered_links:
1151
+ cursor.executemany("""
1152
+ INSERT INTO source_entity_links
1153
+ (source_id, entity_name, relation_type, notes)
1154
+ VALUES (:source_id, :entity_name, :relation_type, :notes)
1155
+ """, filtered_links)
1156
+
1157
+ conn.commit()
1158
+
1159
+ # Get updated source details
1160
+ source_details = get_sources_details(list(set(source_ids)), DB_PATH)
1161
+
1162
+ # Update success results
1163
+ for i, result in enumerate(results):
1164
+ if result.get("status") == "pending":
1165
+ source_id = result["source_id"]
1166
+ source_detail = next(s for s in source_details if s['id'] == source_id)
1167
+ results[i] = {
1168
+ "status": "success",
1169
+ "source": source_detail
1170
+ }
1171
+
1172
+ except sqlite3.Error as e:
1173
+ conn.rollback()
1174
+ raise ValueError(f"Database error: {str(e)}")
1175
+
1176
+ return results
1177
+
1178
+ @mcp.tool()
1179
+ def get_source_entities(
1180
+ sources: List[Tuple[str, str, str, str]]
1181
+ ) -> List[Dict[str, Any]]:
1182
+ """Get all entities linked to multiple sources.
1183
+
1184
+ Args:
1185
+ sources: List of tuples, each containing:
1186
+ - title: Source title
1187
+ - type: Source type
1188
+ - identifier_type: Type of identifier
1189
+ - identifier_value: Value of the identifier
1190
+
1191
+ Returns:
1192
+ List of operation results, each containing:
1193
+ {
1194
+ "status": "success" | "error",
1195
+ "message": Error message if status is "error",
1196
+ "source": Source details including linked entities if status is "success",
1197
+ "matches": List of potential matches if ambiguous source found
1198
+ }
1199
+ """
1200
+ if not DB_PATH.exists():
1201
+ raise FileNotFoundError(f"Database not found at: {DB_PATH}")
1202
+
1203
+ if not sources:
1204
+ return []
1205
+
1206
+ # Bulk search for sources
1207
+ search_results = search_sources(sources, DB_PATH)
1208
+
1209
+ # Process results
1210
+ results = []
1211
+ source_ids = []
1212
+
1213
+ for (title, type_, id_type, id_value), (uuid_str, matches) in zip(sources, search_results):
1214
+ if not uuid_str:
1215
+ if matches:
1216
+ results.append({
1217
+ "status": "error",
1218
+ "message": "Multiple potential matches found. Please verify the source.",
1219
+ "matches": matches
1220
+ })
1221
+ else:
1222
+ results.append({
1223
+ "status": "error",
1224
+ "message": "Source not found"
1225
+ })
1226
+ continue
1227
+
1228
+ source_ids.append(uuid_str)
1229
+ results.append({
1230
+ "status": "pending",
1231
+ "source_id": uuid_str
1232
+ })
1233
+
1234
+ if source_ids:
1235
+ try:
1236
+ # Get source details with entity links
1237
+ source_details = get_sources_details(source_ids, DB_PATH)
1238
+
1239
+ # Update results
1240
+ for i, result in enumerate(results):
1241
+ if result.get("status") == "pending":
1242
+ source_id = result["source_id"]
1243
+ source_detail = next(s for s in source_details if s['id'] == source_id)
1244
+ results[i] = {
1245
+ "status": "success",
1246
+ "source": source_detail
1247
+ }
1248
+
1249
+ except ValueError as e:
1250
+ # Handle any errors from get_sources_details
1251
+ for i, result in enumerate(results):
1252
+ if result.get("status") == "pending":
1253
+ results[i] = {
1254
+ "status": "error",
1255
+ "message": str(e)
1256
+ }
1257
+
1258
+ return results
1259
+
1260
+ @mcp.tool()
1261
+ def update_entity_links(
1262
+ source_entity_updates: List[Tuple[str, str, str, str, str, Optional[str], Optional[str]]]
1263
+ ) -> List[Dict[str, Any]]:
1264
+ """Update existing links between sources and entities.
1265
+
1266
+ Args:
1267
+ source_entity_updates: List of tuples, each containing:
1268
+ - title: Source title
1269
+ - type: Source type
1270
+ - identifier_type: Type of identifier
1271
+ - identifier_value: Value of the identifier
1272
+ - entity_name: Name of the entity
1273
+ - relation_type: Optional new relationship type
1274
+ - notes: Optional new notes
1275
+
1276
+ Note: At least one of relation_type or notes must be provided in each tuple
1277
+
1278
+ Returns:
1279
+ List of operation results, each containing:
1280
+ {
1281
+ "status": "success" | "error",
1282
+ "message": Error message if status is "error",
1283
+ "source": Source details if status is "success",
1284
+ "matches": List of potential matches if ambiguous source found
1285
+ }
1286
+ """
1287
+ if not DB_PATH.exists():
1288
+ raise FileNotFoundError(f"Database not found at: {DB_PATH}")
1289
+
1290
+ if not source_entity_updates:
1291
+ return []
1292
+
1293
+ # Validate updates first
1294
+ for _, _, _, _, _, relation_type, notes in source_entity_updates:
1295
+ if relation_type and relation_type not in EntityRelations.VALID_TYPES:
1296
+ raise ValueError(f"Invalid relation type. Must be one of: {EntityRelations.VALID_TYPES}")
1297
+ if not relation_type and notes is None:
1298
+ raise ValueError("At least one of relation_type or notes must be provided")
1299
+
1300
+ # Prepare search inputs for bulk source lookup
1301
+ search_inputs = [
1302
+ (title, type_, id_type, id_value)
1303
+ for title, type_, id_type, id_value, _, _, _ in source_entity_updates
1304
+ ]
1305
+
1306
+ # Bulk search for sources
1307
+ search_results = search_sources(search_inputs, DB_PATH)
1308
+
1309
+ # Process results and prepare updates
1310
+ results = []
1311
+ updates_to_make = []
1312
+ source_ids = []
1313
+
1314
+ for (title, type_, id_type, id_value, entity_name, relation_type, notes), (uuid_str, matches) in zip(source_entity_updates, search_results):
1315
+ if not uuid_str:
1316
+ if matches:
1317
+ results.append({
1318
+ "status": "error",
1319
+ "message": "Multiple potential matches found. Please verify the source.",
1320
+ "matches": matches
1321
+ })
1322
+ else:
1323
+ results.append({
1324
+ "status": "error",
1325
+ "message": "Source not found"
1326
+ })
1327
+ continue
1328
+
1329
+ updates_to_make.append({
1330
+ 'source_id': uuid_str,
1331
+ 'entity_name': entity_name,
1332
+ 'relation_type': relation_type,
1333
+ 'notes': notes
1334
+ })
1335
+ source_ids.append(uuid_str)
1336
+ results.append({
1337
+ "status": "pending",
1338
+ "source_id": uuid_str
1339
+ })
1340
+
1341
+ if updates_to_make:
1342
+ with SQLiteConnection(DB_PATH) as conn:
1343
+ cursor = conn.cursor()
1344
+ try:
1345
+ # Update each link
1346
+ for update in updates_to_make:
1347
+ updates = []
1348
+ params = []
1349
+
1350
+ if update['relation_type']:
1351
+ updates.append("relation_type = ?")
1352
+ params.append(update['relation_type'])
1353
+ if update['notes'] is not None:
1354
+ updates.append("notes = ?")
1355
+ params.append(update['notes'])
1356
+
1357
+ params.extend([update['source_id'], update['entity_name']])
1358
+
1359
+ query = f"""
1360
+ UPDATE source_entity_links
1361
+ SET {', '.join(updates)}
1362
+ WHERE source_id = ? AND entity_name = ?
1363
+ """
1364
+
1365
+ cursor.execute(query, params)
1366
+ if cursor.rowcount == 0:
1367
+ # Find index of this update in results
1368
+ idx = next(i for i, r in enumerate(results)
1369
+ if r.get("status") == "pending" and
1370
+ r.get("source_id") == update['source_id'])
1371
+ results[idx] = {
1372
+ "status": "error",
1373
+ "message": "No link found between this source and entity"
1374
+ }
1375
+
1376
+ conn.commit()
1377
+
1378
+ # Get updated source details
1379
+ source_details = get_sources_details(list(set(source_ids)), DB_PATH)
1380
+
1381
+ # Update success results
1382
+ for i, result in enumerate(results):
1383
+ if result.get("status") == "pending":
1384
+ source_id = result["source_id"]
1385
+ source_detail = next(s for s in source_details if s['id'] == source_id)
1386
+ results[i] = {
1387
+ "status": "success",
1388
+ "source": source_detail
1389
+ }
1390
+
1391
+ except sqlite3.Error as e:
1392
+ conn.rollback()
1393
+ raise ValueError(f"Database error: {str(e)}")
1394
+
1395
+ return results
1396
+
1397
+ @mcp.tool()
1398
+ def remove_entity_links(
1399
+ source_entity_pairs: List[Tuple[str, str, str, str, str]]
1400
+ ) -> List[Dict[str, Any]]:
1401
+ """Remove links between sources and entities.
1402
+
1403
+ Args:
1404
+ source_entity_pairs: List of tuples, each containing:
1405
+ - title: Source title
1406
+ - type: Source type
1407
+ - identifier_type: Type of identifier
1408
+ - identifier_value: Value of the identifier
1409
+ - entity_name: Name of the entity
1410
+
1411
+ Returns:
1412
+ List of operation results, each containing:
1413
+ {
1414
+ "status": "success" | "error",
1415
+ "message": Error message if status is "error",
1416
+ "source": Source details if status is "success",
1417
+ "matches": List of potential matches if ambiguous source found
1418
+ }
1419
+ """
1420
+ if not DB_PATH.exists():
1421
+ raise FileNotFoundError(f"Database not found at: {DB_PATH}")
1422
+
1423
+ if not source_entity_pairs:
1424
+ return []
1425
+
1426
+ # Prepare search inputs for bulk source lookup
1427
+ search_inputs = [
1428
+ (title, type_, id_type, id_value)
1429
+ for title, type_, id_type, id_value, _ in source_entity_pairs
1430
+ ]
1431
+
1432
+ # Bulk search for sources
1433
+ search_results = search_sources(search_inputs, DB_PATH)
1434
+
1435
+ # Process results and prepare deletions
1436
+ results = []
1437
+ links_to_remove = []
1438
+ source_ids = []
1439
+
1440
+ for (title, type_, id_type, id_value, entity_name), (uuid_str, matches) in zip(source_entity_pairs, search_results):
1441
+ if not uuid_str:
1442
+ if matches:
1443
+ results.append({
1444
+ "status": "error",
1445
+ "message": "Multiple potential matches found. Please verify the source.",
1446
+ "matches": matches
1447
+ })
1448
+ else:
1449
+ results.append({
1450
+ "status": "error",
1451
+ "message": "Source not found"
1452
+ })
1453
+ continue
1454
+
1455
+ links_to_remove.append({
1456
+ 'source_id': uuid_str,
1457
+ 'entity_name': entity_name
1458
+ })
1459
+ source_ids.append(uuid_str)
1460
+ results.append({
1461
+ "status": "pending",
1462
+ "source_id": uuid_str
1463
+ })
1464
+
1465
+ if links_to_remove:
1466
+ with SQLiteConnection(DB_PATH) as conn:
1467
+ cursor = conn.cursor()
1468
+ try:
1469
+ # Remove all links in one query
1470
+ placeholders = ','.join('(?,?)' for _ in links_to_remove)
1471
+ cursor.execute(f"""
1472
+ DELETE FROM source_entity_links
1473
+ WHERE (source_id, entity_name) IN ({placeholders})
1474
+ """, [
1475
+ val for link in links_to_remove
1476
+ for val in (link['source_id'], link['entity_name'])
1477
+ ])
1478
+
1479
+ # Track which links were actually removed
1480
+ removed_count = cursor.rowcount
1481
+ if removed_count < len(links_to_remove):
1482
+ # Some links weren't found - need to check which ones
1483
+ cursor.execute(f"""
1484
+ SELECT source_id, entity_name
1485
+ FROM source_entity_links
1486
+ WHERE (source_id, entity_name) IN ({placeholders})
1487
+ """, [
1488
+ val for link in links_to_remove
1489
+ for val in (link['source_id'], link['entity_name'])
1490
+ ])
1491
+
1492
+ existing_links = {
1493
+ (row['source_id'], row['entity_name'])
1494
+ for row in cursor.fetchall()
1495
+ }
1496
+
1497
+ # Update results for non-existent links
1498
+ for i, link in enumerate(links_to_remove):
1499
+ if (link['source_id'], link['entity_name']) not in existing_links:
1500
+ idx = next(j for j, r in enumerate(results)
1501
+ if r.get("status") == "pending" and
1502
+ r.get("source_id") == link['source_id'])
1503
+ results[idx] = {
1504
+ "status": "error",
1505
+ "message": "No link found between this source and entity"
1506
+ }
1507
+
1508
+ conn.commit()
1509
+
1510
+ # Get updated source details
1511
+ source_details = get_sources_details(list(set(source_ids)), DB_PATH)
1512
+
1513
+ # Update success results
1514
+ for i, result in enumerate(results):
1515
+ if result.get("status") == "pending":
1516
+ source_id = result["source_id"]
1517
+ source_detail = next(s for s in source_details if s['id'] == source_id)
1518
+ results[i] = {
1519
+ "status": "success",
1520
+ "source": source_detail
1521
+ }
1522
+
1523
+ except sqlite3.Error as e:
1524
+ conn.rollback()
1525
+ raise ValueError(f"Database error: {str(e)}")
1526
+
1527
+ return results
1528
+
1529
+ @mcp.tool()
1530
+ def get_entity_sources(
1531
+ entity_filters: List[Tuple[str, Optional[str], Optional[str]]]
1532
+ ) -> List[Dict[str, Any]]:
1533
+ """Get all sources linked to specific entities with optional filtering.
1534
+
1535
+ Args:
1536
+ entity_filters: List of tuples, each containing:
1537
+ - entity_name: Name of the entity
1538
+ - type_filter: Optional filter by source type (paper, webpage, book, video, blog)
1539
+ - relation_filter: Optional filter by relation type (discusses, introduces, extends, evaluates, applies, critiques)
1540
+
1541
+ Returns:
1542
+ List of operation results, each containing:
1543
+ {
1544
+ "status": "success" | "error",
1545
+ "message": Error message if status is "error",
1546
+ "entity": Entity name,
1547
+ "filters_applied": {
1548
+ "type": Applied type filter,
1549
+ "relation": Applied relation filter
1550
+ },
1551
+ "sources": List of source details if status is "success"
1552
+ }
1553
+ """
1554
+ if not DB_PATH.exists():
1555
+ raise FileNotFoundError(f"Database not found at: {DB_PATH}")
1556
+
1557
+ if not entity_filters:
1558
+ return []
1559
+
1560
+ # Validate filters first
1561
+ for _, type_filter, relation_filter in entity_filters:
1562
+ if type_filter and type_filter not in SourceTypes.VALID_TYPES:
1563
+ raise ValueError(f"Invalid type filter. Must be one of: {SourceTypes.VALID_TYPES}")
1564
+ if relation_filter and relation_filter not in EntityRelations.VALID_TYPES:
1565
+ raise ValueError(f"Invalid relation filter. Must be one of: {EntityRelations.VALID_TYPES}")
1566
+
1567
+ results = []
1568
+
1569
+ with SQLiteConnection(DB_PATH) as conn:
1570
+ cursor = conn.cursor()
1571
+ try:
1572
+ for entity_name, type_filter, relation_filter in entity_filters:
1573
+ query = """
1574
+ SELECT DISTINCT s.id
1575
+ FROM sources s
1576
+ JOIN source_entity_links l ON s.id = l.source_id
1577
+ WHERE l.entity_name = ?
1578
+ """
1579
+ params = [entity_name]
1580
+
1581
+ if type_filter:
1582
+ query += " AND s.type = ?"
1583
+ params.append(type_filter)
1584
+
1585
+ if relation_filter:
1586
+ query += " AND l.relation_type = ?"
1587
+ params.append(relation_filter)
1588
+
1589
+ cursor.execute(query, params)
1590
+ source_ids = [row['id'] for row in cursor.fetchall()]
1591
+
1592
+ if source_ids:
1593
+ source_details = get_sources_details(source_ids, DB_PATH)
1594
+ results.append({
1595
+ "status": "success",
1596
+ "entity": entity_name,
1597
+ "filters_applied": {
1598
+ "type": type_filter,
1599
+ "relation": relation_filter
1600
+ },
1601
+ "sources": source_details
1602
+ })
1603
+ else:
1604
+ results.append({
1605
+ "status": "success",
1606
+ "entity": entity_name,
1607
+ "filters_applied": {
1608
+ "type": type_filter,
1609
+ "relation": relation_filter
1610
+ },
1611
+ "sources": []
1612
+ })
1613
+
1614
+ except sqlite3.Error as e:
1615
+ raise ValueError(f"Database error: {str(e)}")
1616
+
1617
+ return results
1618
+
1619
+
1620
+
1621
+
1622
+
1623
+ if __name__ == "__main__":
1624
+ # Start the FastMCP server
1625
+ mcp.run()
1626
+ def main():
1627
+ """Entry point for the console script"""
1628
+ mcp.run()