content-core 1.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. content_core/__init__.py +216 -0
  2. content_core/cc_config.yaml +86 -0
  3. content_core/common/__init__.py +38 -0
  4. content_core/common/exceptions.py +70 -0
  5. content_core/common/retry.py +325 -0
  6. content_core/common/state.py +64 -0
  7. content_core/common/types.py +15 -0
  8. content_core/common/utils.py +31 -0
  9. content_core/config.py +575 -0
  10. content_core/content/__init__.py +6 -0
  11. content_core/content/cleanup/__init__.py +5 -0
  12. content_core/content/cleanup/core.py +15 -0
  13. content_core/content/extraction/__init__.py +13 -0
  14. content_core/content/extraction/graph.py +252 -0
  15. content_core/content/identification/__init__.py +9 -0
  16. content_core/content/identification/file_detector.py +505 -0
  17. content_core/content/summary/__init__.py +5 -0
  18. content_core/content/summary/core.py +15 -0
  19. content_core/logging.py +15 -0
  20. content_core/mcp/__init__.py +5 -0
  21. content_core/mcp/server.py +214 -0
  22. content_core/models.py +60 -0
  23. content_core/models_config.yaml +31 -0
  24. content_core/notebooks/run.ipynb +359 -0
  25. content_core/notebooks/urls.ipynb +154 -0
  26. content_core/processors/audio.py +272 -0
  27. content_core/processors/docling.py +79 -0
  28. content_core/processors/office.py +331 -0
  29. content_core/processors/pdf.py +292 -0
  30. content_core/processors/text.py +36 -0
  31. content_core/processors/url.py +324 -0
  32. content_core/processors/video.py +166 -0
  33. content_core/processors/youtube.py +262 -0
  34. content_core/py.typed +2 -0
  35. content_core/templated_message.py +70 -0
  36. content_core/tools/__init__.py +9 -0
  37. content_core/tools/cleanup.py +15 -0
  38. content_core/tools/extract.py +21 -0
  39. content_core/tools/summarize.py +17 -0
  40. content_core-1.10.0.dist-info/METADATA +742 -0
  41. content_core-1.10.0.dist-info/RECORD +44 -0
  42. content_core-1.10.0.dist-info/WHEEL +4 -0
  43. content_core-1.10.0.dist-info/entry_points.txt +5 -0
  44. content_core-1.10.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,505 @@
1
+ """
2
+ Pure Python file type detection using magic bytes and content analysis.
3
+ Replaces libmagic dependency with a lightweight implementation.
4
+ """
5
+
6
+ import zipfile
7
+ from pathlib import Path
8
+ from typing import Dict, Optional
9
+
10
+ from content_core.common.exceptions import UnsupportedTypeException
11
+ from content_core.logging import logger
12
+
13
+
14
+ class FileDetector:
15
+ """Pure Python file type detection using magic bytes and content analysis."""
16
+
17
+ # Configuration constants for binary/text detection
18
+ SIGNATURE_READ_SIZE = 512 # Bytes to read for binary signature detection
19
+ TEXT_READ_SIZE = 1024 # Bytes to read for text content analysis
20
+
21
+ # Configuration constants for CSV detection
22
+ CSV_MAX_FIELD_LENGTH = 100 # Maximum average field length for CSV (longer suggests prose)
23
+ CSV_MAX_VARIANCE = 500 # Maximum variance in field lengths (higher suggests natural text)
24
+ CSV_MIN_SCORE = 2 # Minimum score required to classify as CSV
25
+ CSV_MIN_FIELDS = 2 # Minimum number of fields required for CSV
26
+ CSV_MAX_HEADER_FIELD_LENGTH = 50 # Maximum length for individual header fields
27
+
28
+ def __init__(self):
29
+ """Initialize the FileDetector with signature mappings."""
30
+ self.binary_signatures = self._load_binary_signatures()
31
+ self.text_patterns = self._load_text_patterns()
32
+ self.extension_mapping = self._load_extension_mapping()
33
+ self.zip_content_patterns = self._load_zip_content_patterns()
34
+
35
+ def _load_binary_signatures(self) -> Dict[bytes, str]:
36
+ """Load binary file signatures (magic bytes) to MIME type mappings."""
37
+ # Ordered by specificity - longer/more specific signatures first
38
+ return {
39
+ # PDF
40
+ b'%PDF': 'application/pdf', # PDF document signature (hex: 25 50 44 46)
41
+
42
+ # Images
43
+ b'\xff\xd8\xff\xe0': 'image/jpeg', # JPEG with JFIF header (JPEG File Interchange Format)
44
+ b'\xff\xd8\xff\xe1': 'image/jpeg', # JPEG with EXIF header (Exchangeable Image File Format)
45
+ b'\xff\xd8\xff\xe2': 'image/jpeg', # JPEG with Adobe header (Adobe JPEG)
46
+ b'\xff\xd8\xff\xdb': 'image/jpeg', # JPEG with DQT (Define Quantization Table) marker
47
+ b'\xff\xd8': 'image/jpeg', # Generic JPEG signature (Start of Image marker, must be last)
48
+ b'\x89PNG\r\n\x1a\n': 'image/png', # PNG signature (hex: 89 50 4E 47 0D 0A 1A 0A)
49
+ b'GIF87a': 'image/gif', # GIF version 87a
50
+ b'GIF89a': 'image/gif', # GIF version 89a (supports animation and transparency)
51
+ b'II*\x00': 'image/tiff', # TIFF little-endian (Intel byte order)
52
+ b'MM\x00*': 'image/tiff', # TIFF big-endian (Motorola byte order)
53
+ b'BM': 'image/bmp', # Windows Bitmap signature
54
+
55
+ # Audio
56
+ b'ID3': 'audio/mpeg', # MP3 with ID3v2 metadata tag
57
+ b'\xff\xfb': 'audio/mpeg', # MP3 frame sync with MPEG-1 Layer 3
58
+ b'\xff\xf3': 'audio/mpeg', # MP3 frame sync with MPEG-2 Layer 3
59
+ b'\xff\xf2': 'audio/mpeg', # MP3 frame sync with MPEG-2.5 Layer 3
60
+ b'RIFF': None, # Resource Interchange File Format - requires further inspection (could be WAV, AVI, WebP)
61
+ b'fLaC': 'audio/flac', # Free Lossless Audio Codec signature
62
+
63
+ # Video/Audio containers - these will be handled by ftyp detection
64
+ # MP4/M4A/MOV use ftyp box at offset 4 for identification
65
+
66
+ # Archive formats
67
+ b'PK\x03\x04': 'application/zip', # ZIP archive (also used by DOCX, XLSX, PPTX, JAR, etc.)
68
+ b'PK\x05\x06': 'application/zip', # Empty ZIP archive (End of Central Directory)
69
+ }
70
+
71
+ def _load_text_patterns(self) -> Dict[str, str]:
72
+ """Load text-based format detection patterns."""
73
+ return {
74
+ '<!DOCTYPE html': 'text/html',
75
+ '<!doctype html': 'text/html',
76
+ '<html': 'text/html',
77
+ '<?xml': 'text/xml',
78
+ '{': 'application/json', # Will need more validation
79
+ '[': 'application/json', # Will need more validation
80
+ '---\n': 'text/yaml',
81
+ '---\r\n': 'text/yaml',
82
+ }
83
+
84
+ def _load_extension_mapping(self) -> Dict[str, str]:
85
+ """Load file extension to MIME type mappings as fallback."""
86
+ return {
87
+ # Documents
88
+ '.pdf': 'application/pdf',
89
+ '.txt': 'text/plain',
90
+ '.md': 'text/plain', # Markdown treated as plain text (current behavior)
91
+ '.markdown': 'text/plain',
92
+ '.rst': 'text/plain', # reStructuredText
93
+ '.log': 'text/plain',
94
+
95
+ # Web formats
96
+ '.html': 'text/html',
97
+ '.htm': 'text/html',
98
+ '.xhtml': 'text/html',
99
+ '.xml': 'text/xml',
100
+
101
+ # Data formats
102
+ '.json': 'application/json',
103
+ '.yaml': 'text/yaml',
104
+ '.yml': 'text/yaml',
105
+ '.csv': 'text/csv',
106
+ '.tsv': 'text/csv', # Tab-separated values
107
+
108
+ # Images
109
+ '.jpg': 'image/jpeg',
110
+ '.jpeg': 'image/jpeg',
111
+ '.jpe': 'image/jpeg',
112
+ '.png': 'image/png',
113
+ '.gif': 'image/gif',
114
+ '.tiff': 'image/tiff',
115
+ '.tif': 'image/tiff',
116
+ '.bmp': 'image/bmp',
117
+ '.webp': 'image/webp',
118
+ '.ico': 'image/x-icon',
119
+ '.svg': 'image/svg+xml',
120
+
121
+ # Audio
122
+ '.mp3': 'audio/mpeg',
123
+ '.wav': 'audio/wav',
124
+ '.wave': 'audio/wav',
125
+ '.m4a': 'audio/mp4',
126
+ '.aac': 'audio/aac',
127
+ '.ogg': 'audio/ogg',
128
+ '.oga': 'audio/ogg',
129
+ '.flac': 'audio/flac',
130
+ '.wma': 'audio/x-ms-wma',
131
+
132
+ # Video
133
+ '.mp4': 'video/mp4',
134
+ '.m4v': 'video/mp4',
135
+ '.avi': 'video/x-msvideo',
136
+ '.mov': 'video/quicktime',
137
+ '.qt': 'video/quicktime',
138
+ '.wmv': 'video/x-ms-wmv',
139
+ '.flv': 'video/x-flv',
140
+ '.mkv': 'video/x-matroska',
141
+ '.webm': 'video/webm',
142
+ '.mpg': 'video/mpeg',
143
+ '.mpeg': 'video/mpeg',
144
+ '.3gp': 'video/3gpp',
145
+
146
+ # Office formats
147
+ '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
148
+ '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
149
+ '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
150
+
151
+ # E-books
152
+ '.epub': 'application/epub+zip',
153
+
154
+ # Archives (basic detection - not expanded)
155
+ '.zip': 'application/zip',
156
+ '.tar': 'application/x-tar',
157
+ '.gz': 'application/gzip',
158
+ '.bz2': 'application/x-bzip2',
159
+ '.7z': 'application/x-7z-compressed',
160
+ '.rar': 'application/x-rar-compressed',
161
+ }
162
+
163
+ def _load_zip_content_patterns(self) -> Dict[str, str]:
164
+ """Load patterns for identifying ZIP-based formats by their content."""
165
+ return {
166
+ 'word/': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
167
+ 'xl/': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
168
+ 'ppt/': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
169
+ 'META-INF/container.xml': 'application/epub+zip',
170
+ }
171
+
172
+ async def detect(self, file_path: str) -> str:
173
+ """
174
+ Detect file type using magic bytes and content analysis.
175
+
176
+ Args:
177
+ file_path: Path to the file to analyze
178
+
179
+ Returns:
180
+ MIME type string
181
+
182
+ Raises:
183
+ UnsupportedTypeException: If file type cannot be determined
184
+ """
185
+ file_path = Path(file_path)
186
+
187
+ if not file_path.exists():
188
+ raise FileNotFoundError(f"File not found: {file_path}")
189
+
190
+ if not file_path.is_file():
191
+ raise ValueError(f"Not a file: {file_path}")
192
+
193
+ # Try binary signature detection first
194
+ mime_type = await self._detect_by_signature(file_path)
195
+ if mime_type:
196
+ logger.debug(f"Detected {file_path} as {mime_type} by signature")
197
+ return mime_type
198
+
199
+ # Try text-based detection
200
+ mime_type = await self._detect_text_format(file_path)
201
+ if mime_type:
202
+ logger.debug(f"Detected {file_path} as {mime_type} by text analysis")
203
+ return mime_type
204
+
205
+ # Fallback to extension
206
+ mime_type = self._detect_by_extension(file_path)
207
+ if mime_type:
208
+ logger.debug(f"Detected {file_path} as {mime_type} by extension")
209
+ return mime_type
210
+
211
+ # If all detection methods fail
212
+ raise UnsupportedTypeException(f"Unable to determine file type for: {file_path}")
213
+
214
+ async def _detect_by_signature(self, file_path: Path) -> Optional[str]:
215
+ """Detect file type by binary signature (magic bytes)."""
216
+ try:
217
+ with open(file_path, 'rb') as f:
218
+ # Read bytes for signature detection
219
+ header = f.read(self.SIGNATURE_READ_SIZE)
220
+
221
+ if not header:
222
+ return None
223
+
224
+ # Check for exact signature matches
225
+ for signature, mime_type in self.binary_signatures.items():
226
+ if header.startswith(signature):
227
+ # Special handling for RIFF (could be WAV or AVI)
228
+ if signature == b'RIFF' and len(header) >= 12:
229
+ if header[8:12] == b'WAVE':
230
+ return 'audio/wav'
231
+ elif header[8:12] == b'AVI ':
232
+ return 'video/x-msvideo'
233
+
234
+ # Special handling for ZIP-based formats
235
+ if mime_type == 'application/zip':
236
+ zip_mime = await self._detect_zip_format(file_path)
237
+ if zip_mime:
238
+ return zip_mime
239
+
240
+ if mime_type:
241
+ return mime_type
242
+
243
+ # Special check for MP4/MOV files with ftyp box
244
+ if len(header) >= 12 and header[4:8] == b'ftyp':
245
+ ftyp_brand = header[8:12]
246
+ # Don't strip - check exact 4-byte brand
247
+ if ftyp_brand == b'M4A ' or ftyp_brand.startswith(b'M4A'):
248
+ return 'audio/mp4'
249
+ elif ftyp_brand in [b'mp41', b'mp42', b'isom', b'iso2', b'iso5', b'M4V ', b'M4VP']:
250
+ return 'video/mp4'
251
+ elif ftyp_brand.startswith(b'qt'):
252
+ return 'video/quicktime'
253
+ else:
254
+ # Generic MP4 for other ftyp brands
255
+ return 'video/mp4'
256
+
257
+ return None
258
+
259
+ except Exception as e:
260
+ logger.debug(f"Error reading file signature: {e}")
261
+ return None
262
+
263
+ async def _detect_zip_format(self, file_path: Path) -> Optional[str]:
264
+ """Detect specific ZIP-based format (DOCX, XLSX, PPTX, EPUB)."""
265
+ try:
266
+ with zipfile.ZipFile(file_path, 'r') as zf:
267
+ namelist = zf.namelist()
268
+
269
+ # Check for specific content patterns
270
+ for pattern, mime_type in self.zip_content_patterns.items():
271
+ if any(name.startswith(pattern) for name in namelist):
272
+ return mime_type
273
+
274
+ # If it's a valid ZIP but no specific pattern matched
275
+ return 'application/zip'
276
+
277
+ except zipfile.BadZipFile:
278
+ logger.debug(f"Invalid ZIP file: {file_path}")
279
+ return None
280
+ except Exception as e:
281
+ logger.debug(f"Error inspecting ZIP content: {e}")
282
+ return None
283
+
284
+ async def _detect_text_format(self, file_path: Path) -> Optional[str]:
285
+ """Detect text-based formats by content analysis."""
286
+ try:
287
+ # Read bytes for text content analysis
288
+ with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
289
+ content = f.read(self.TEXT_READ_SIZE)
290
+
291
+ if not content or len(content) < 10:
292
+ return None
293
+
294
+ # Strip whitespace for analysis
295
+ content_stripped = content.strip()
296
+
297
+ # Check for text patterns
298
+ for pattern, mime_type in self.text_patterns.items():
299
+ if content_stripped.lower().startswith(pattern.lower()):
300
+ # Special validation for JSON
301
+ if mime_type == 'application/json':
302
+ if self._is_valid_json_start(content_stripped):
303
+ return mime_type
304
+ # HTML needs to be detected for routing
305
+ elif mime_type == 'text/html':
306
+ return mime_type
307
+ # For other text patterns (YAML, etc), just return text/plain
308
+ else:
309
+ return 'text/plain'
310
+
311
+ # Check for CSV pattern (multiple comma-separated values)
312
+ if self._looks_like_csv(content):
313
+ return 'text/csv'
314
+
315
+ # If it's readable text but no specific format detected
316
+ if self._is_text_file(content):
317
+ return 'text/plain'
318
+
319
+ return None
320
+
321
+ except UnicodeDecodeError:
322
+ # Not a text file
323
+ return None
324
+ except Exception as e:
325
+ logger.debug(f"Error analyzing text content: {e}")
326
+ return None
327
+
328
+ def _detect_by_extension(self, file_path: Path) -> Optional[str]:
329
+ """Detect file type by extension as fallback."""
330
+ extension = file_path.suffix.lower()
331
+ return self.extension_mapping.get(extension)
332
+
333
+ def _is_valid_json_start(self, content: str) -> bool:
334
+ """Check if content starts like valid JSON."""
335
+ # More robust JSON detection
336
+ content = content.strip()
337
+ if not (content.startswith('{') or content.startswith('[')):
338
+ return False
339
+
340
+ # Strong JSON indicators that are less likely in other formats
341
+ strong_indicators = [
342
+ '{\n "', # Pretty-printed JSON object
343
+ '{\n\t"', # Tab-indented JSON
344
+ '{"', # Compact JSON object
345
+ '[\n {', # Pretty-printed JSON array
346
+ '[{', # Compact JSON array
347
+ '": {', # Nested object
348
+ '": [' # Nested array
349
+ ]
350
+
351
+ # Check for strong indicators
352
+ for indicator in strong_indicators:
353
+ if indicator in content[:200]:
354
+ return True
355
+
356
+ # Weaker indicators - require multiple matches
357
+ json_patterns = ['":', '": ', '",', ', "', '"]', '"}']
358
+ pattern_count = sum(1 for pattern in json_patterns if pattern in content[:200])
359
+
360
+ # Check for JSON keywords but not in URLs or natural text
361
+ json_keywords = ['true', 'false', 'null']
362
+ keyword_count = 0
363
+ content_lower = content[:200].lower()
364
+ for kw in json_keywords:
365
+ # Check if keyword appears as a value (not in URL or sentence)
366
+ if f': {kw}' in content_lower or f':{kw}' in content_lower or f', {kw}' in content_lower:
367
+ keyword_count += 1
368
+
369
+ # Require stronger evidence to avoid false positives
370
+ return pattern_count >= 3 or keyword_count >= 1
371
+
372
+
373
+ def _looks_like_csv(self, content: str) -> bool:
374
+ """
375
+ Check if content looks like CSV format with improved heuristics.
376
+
377
+ Uses a multi-stage approach with performance optimization:
378
+ 1. Basic structural checks (cheap)
379
+ 2. Field length analysis (cheap, early exit)
380
+ 3. Pattern matching (moderate cost)
381
+ 4. Variance analysis (expensive, only if needed)
382
+ """
383
+ lines = content.split('\n', 10)[:10] # Check first 10 lines for better accuracy
384
+ non_empty_lines = [line for line in lines if line.strip()]
385
+
386
+ # Stage 1: Basic structural checks
387
+ if len(non_empty_lines) < 2:
388
+ return False
389
+
390
+ # Count commas in each line
391
+ comma_counts = [line.count(',') for line in non_empty_lines]
392
+
393
+ # Must have at least one comma per line
394
+ if not all(count > 0 for count in comma_counts):
395
+ return False
396
+
397
+ # CSV should have consistent comma counts across lines
398
+ if len(set(comma_counts)) != 1:
399
+ return False
400
+
401
+ num_fields = comma_counts[0] + 1 # Number of fields = commas + 1
402
+
403
+ # Must have minimum number of fields to be CSV
404
+ if num_fields < self.CSV_MIN_FIELDS:
405
+ return False
406
+
407
+ # Stage 2: Field length analysis (PERFORMANCE OPTIMIZATION: early exit)
408
+ first_line = non_empty_lines[0]
409
+ fields = first_line.split(',')
410
+
411
+ # CSV fields should be relatively short (not long sentences)
412
+ # Average field length should be reasonable (not paragraphs)
413
+ # Early exit avoids expensive variance calculations for obvious prose
414
+ avg_field_length = sum(len(f.strip()) for f in fields) / len(fields)
415
+ if avg_field_length > self.CSV_MAX_FIELD_LENGTH:
416
+ return False # Too long to be typical CSV fields - exit early
417
+
418
+ # Stage 3: Pattern matching
419
+ # Check for CSV-like patterns:
420
+ # 1. Fields that look like headers (short, alphanumeric)
421
+ # 2. Quoted fields (common in CSV)
422
+ # 3. Numeric fields
423
+ has_quoted_fields = any('"' in line or "'" in line for line in non_empty_lines[:3])
424
+
425
+ first_line_fields = [f.strip() for f in fields]
426
+ # Check if first line looks like a header (short, no sentence-ending punctuation)
427
+ looks_like_header = all(
428
+ len(f) < self.CSV_MAX_HEADER_FIELD_LENGTH and not f.endswith('.') and not f.endswith('!')
429
+ for f in first_line_fields
430
+ )
431
+
432
+ # Stage 4: Variance analysis (EXPENSIVE - only if we have enough data)
433
+ # Check if subsequent lines have similar field structure
434
+ # Real CSV tends to have consistent field lengths
435
+ if len(non_empty_lines) >= 3:
436
+ field_lengths_per_line = []
437
+ for line in non_empty_lines[:5]:
438
+ line_fields = line.split(',')
439
+ field_lengths = [len(f.strip()) for f in line_fields]
440
+ field_lengths_per_line.append(field_lengths)
441
+
442
+ # Calculate variance in field positions
443
+ # CSV data should have relatively consistent field lengths at each position
444
+ # Natural text with commas will have much more variance
445
+ position_variances = []
446
+ for i in range(num_fields):
447
+ lengths_at_position = [fl[i] if i < len(fl) else 0 for fl in field_lengths_per_line]
448
+ if lengths_at_position:
449
+ avg = sum(lengths_at_position) / len(lengths_at_position)
450
+ variance = sum((x - avg) ** 2 for x in lengths_at_position) / len(lengths_at_position)
451
+ position_variances.append(variance)
452
+
453
+ # High variance suggests natural text, not structured CSV
454
+ if position_variances:
455
+ avg_variance = sum(position_variances) / len(position_variances)
456
+ if avg_variance > self.CSV_MAX_VARIANCE:
457
+ return False # Very high variance = likely prose
458
+
459
+ # Scoring: Require at least some CSV-like characteristics
460
+ csv_score = 0
461
+ if looks_like_header:
462
+ csv_score += 1
463
+ if has_quoted_fields:
464
+ csv_score += 1
465
+ if num_fields >= 3: # Multiple fields is more CSV-like
466
+ csv_score += 1
467
+
468
+ # Need minimum score to confidently classify as CSV
469
+ return csv_score >= self.CSV_MIN_SCORE
470
+
471
+
472
+ def _is_text_file(self, content: str) -> bool:
473
+ """Check if content appears to be plain text."""
474
+ if not content or len(content) < 10: # Need reasonable content
475
+ return False
476
+
477
+ # Check for high ratio of printable characters
478
+ printable_chars = sum(1 for c in content if c.isprintable() or c.isspace())
479
+
480
+ # Also check that it has reasonable line lengths (not binary data)
481
+ lines = content.split('\n')
482
+ max_line_length = max(len(line) for line in lines) if lines else 0
483
+
484
+ # Text files typically have lines under 1000 chars and high printable ratio
485
+ return (printable_chars / len(content) > 0.95 and
486
+ max_line_length < 1000 and
487
+ len(content) > 20) # Minimum reasonable text file size
488
+
489
+
490
+ # Backward compatibility function
491
+ async def get_file_type(file_path: str) -> str:
492
+ """
493
+ Legacy function for compatibility with existing code.
494
+
495
+ Args:
496
+ file_path: Path to the file to analyze
497
+
498
+ Returns:
499
+ MIME type string
500
+
501
+ Raises:
502
+ UnsupportedTypeException: If file type cannot be determined
503
+ """
504
+ detector = FileDetector()
505
+ return await detector.detect(file_path)
@@ -0,0 +1,5 @@
1
+ """Content summarization functionality for content-core."""
2
+
3
+ from .core import summarize
4
+
5
+ __all__ = ["summarize"]
@@ -0,0 +1,15 @@
1
+ from functools import partial
2
+
3
+ from content_core.models import ModelFactory
4
+ from content_core.templated_message import TemplatedMessageInput, templated_message
5
+
6
+
7
+ async def summarize(content: str, context: str) -> str:
8
+ templated_message_fn = partial(templated_message, model=ModelFactory.get_model('summary_model'))
9
+ response = await templated_message_fn(
10
+ TemplatedMessageInput(
11
+ user_prompt_template="content/summarize",
12
+ data={"content": content, "context": context},
13
+ )
14
+ )
15
+ return response
@@ -0,0 +1,15 @@
1
+ import sys
2
+ from loguru import logger
3
+
4
+ def configure_logging(debug=False):
5
+ """
6
+ Configure the global logger for the application.
7
+
8
+ Args:
9
+ debug (bool): If True, set logging level to DEBUG; otherwise, set to INFO.
10
+ """
11
+ logger.remove() # Remove any existing handlers
12
+ logger.add(sys.stderr, level="DEBUG" if debug else "INFO")
13
+
14
+ # Initial configuration with default level (INFO)
15
+ configure_logging(debug=False)
@@ -0,0 +1,5 @@
1
+ """Content Core MCP Server module."""
2
+
3
+ from .server import mcp, main
4
+
5
+ __all__ = ["mcp", "main"]