content-core 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of content-core might be problematic. Click here for more details.

@@ -4,7 +4,6 @@ from typing import Any, Dict, Optional
4
4
  from urllib.parse import urlparse
5
5
 
6
6
  import aiohttp
7
- import magic
8
7
  from langgraph.graph import END, START, StateGraph
9
8
 
10
9
  from content_core.common import (
@@ -54,12 +53,14 @@ async def source_identification(state: ProcessSourceState) -> Dict[str, str]:
54
53
 
55
54
  async def file_type(state: ProcessSourceState) -> Dict[str, Any]:
56
55
  """
57
- Identify the file using python-magic
56
+ Identify the file using pure Python file detection
58
57
  """
58
+ from content_core.content.identification import get_file_type
59
+
59
60
  return_dict = {}
60
61
  file_path = state.file_path
61
62
  if file_path is not None:
62
- return_dict["identified_type"] = magic.from_file(file_path, mime=True)
63
+ return_dict["identified_type"] = await get_file_type(file_path)
63
64
  return_dict["title"] = os.path.basename(file_path)
64
65
  return return_dict
65
66
 
@@ -1,8 +1,9 @@
1
- import magic
1
+ from .file_detector import FileDetector
2
2
 
3
3
 
4
4
  async def get_file_type(file_path: str) -> str:
5
5
  """
6
- Identify the file using python-magic
6
+ Identify the file using pure Python file detection
7
7
  """
8
- return magic.from_file(file_path, mime=True)
8
+ detector = FileDetector()
9
+ return await detector.detect(file_path)
@@ -0,0 +1,415 @@
1
+ """
2
+ Pure Python file type detection using magic bytes and content analysis.
3
+ Replaces libmagic dependency with a lightweight implementation.
4
+ """
5
+
6
+ import os
7
+ import zipfile
8
+ from pathlib import Path
9
+ from typing import Dict, Optional, Tuple
10
+
11
+ from content_core.common.exceptions import UnsupportedTypeException
12
+ from content_core.logging import logger
13
+
14
+
15
+ class FileDetector:
16
+ """Pure Python file type detection using magic bytes and content analysis."""
17
+
18
+ # Configuration constants
19
+ SIGNATURE_READ_SIZE = 512 # Bytes to read for binary signature detection
20
+ TEXT_READ_SIZE = 1024 # Bytes to read for text content analysis
21
+
22
+ def __init__(self):
23
+ """Initialize the FileDetector with signature mappings."""
24
+ self.binary_signatures = self._load_binary_signatures()
25
+ self.text_patterns = self._load_text_patterns()
26
+ self.extension_mapping = self._load_extension_mapping()
27
+ self.zip_content_patterns = self._load_zip_content_patterns()
28
+
29
+ def _load_binary_signatures(self) -> Dict[bytes, str]:
30
+ """Load binary file signatures (magic bytes) to MIME type mappings."""
31
+ # Ordered by specificity - longer/more specific signatures first
32
+ return {
33
+ # PDF
34
+ b'%PDF': 'application/pdf', # PDF document signature (hex: 25 50 44 46)
35
+
36
+ # Images
37
+ b'\xff\xd8\xff\xe0': 'image/jpeg', # JPEG with JFIF header (JPEG File Interchange Format)
38
+ b'\xff\xd8\xff\xe1': 'image/jpeg', # JPEG with EXIF header (Exchangeable Image File Format)
39
+ b'\xff\xd8\xff\xe2': 'image/jpeg', # JPEG with Adobe header (Adobe JPEG)
40
+ b'\xff\xd8\xff\xdb': 'image/jpeg', # JPEG with DQT (Define Quantization Table) marker
41
+ b'\xff\xd8': 'image/jpeg', # Generic JPEG signature (Start of Image marker, must be last)
42
+ b'\x89PNG\r\n\x1a\n': 'image/png', # PNG signature (hex: 89 50 4E 47 0D 0A 1A 0A)
43
+ b'GIF87a': 'image/gif', # GIF version 87a
44
+ b'GIF89a': 'image/gif', # GIF version 89a (supports animation and transparency)
45
+ b'II*\x00': 'image/tiff', # TIFF little-endian (Intel byte order)
46
+ b'MM\x00*': 'image/tiff', # TIFF big-endian (Motorola byte order)
47
+ b'BM': 'image/bmp', # Windows Bitmap signature
48
+
49
+ # Audio
50
+ b'ID3': 'audio/mpeg', # MP3 with ID3v2 metadata tag
51
+ b'\xff\xfb': 'audio/mpeg', # MP3 frame sync with MPEG-1 Layer 3
52
+ b'\xff\xf3': 'audio/mpeg', # MP3 frame sync with MPEG-2 Layer 3
53
+ b'\xff\xf2': 'audio/mpeg', # MP3 frame sync with MPEG-2.5 Layer 3
54
+ b'RIFF': None, # Resource Interchange File Format - requires further inspection (could be WAV, AVI, WebP)
55
+ b'fLaC': 'audio/flac', # Free Lossless Audio Codec signature
56
+
57
+ # Video/Audio containers - these will be handled by ftyp detection
58
+ # MP4/M4A/MOV use ftyp box at offset 4 for identification
59
+
60
+ # Archive formats
61
+ b'PK\x03\x04': 'application/zip', # ZIP archive (also used by DOCX, XLSX, PPTX, JAR, etc.)
62
+ b'PK\x05\x06': 'application/zip', # Empty ZIP archive (End of Central Directory)
63
+ }
64
+
65
+ def _load_text_patterns(self) -> Dict[str, str]:
66
+ """Load text-based format detection patterns."""
67
+ return {
68
+ '<!DOCTYPE html': 'text/html',
69
+ '<!doctype html': 'text/html',
70
+ '<html': 'text/html',
71
+ '<?xml': 'text/xml',
72
+ '{': 'application/json', # Will need more validation
73
+ '[': 'application/json', # Will need more validation
74
+ '---\n': 'text/yaml',
75
+ '---\r\n': 'text/yaml',
76
+ }
77
+
78
+ def _load_extension_mapping(self) -> Dict[str, str]:
79
+ """Load file extension to MIME type mappings as fallback."""
80
+ return {
81
+ # Documents
82
+ '.pdf': 'application/pdf',
83
+ '.txt': 'text/plain',
84
+ '.md': 'text/plain', # Markdown treated as plain text (current behavior)
85
+ '.markdown': 'text/plain',
86
+ '.rst': 'text/plain', # reStructuredText
87
+ '.log': 'text/plain',
88
+
89
+ # Web formats
90
+ '.html': 'text/html',
91
+ '.htm': 'text/html',
92
+ '.xhtml': 'text/html',
93
+ '.xml': 'text/xml',
94
+
95
+ # Data formats
96
+ '.json': 'application/json',
97
+ '.yaml': 'text/yaml',
98
+ '.yml': 'text/yaml',
99
+ '.csv': 'text/csv',
100
+ '.tsv': 'text/csv', # Tab-separated values
101
+
102
+ # Images
103
+ '.jpg': 'image/jpeg',
104
+ '.jpeg': 'image/jpeg',
105
+ '.jpe': 'image/jpeg',
106
+ '.png': 'image/png',
107
+ '.gif': 'image/gif',
108
+ '.tiff': 'image/tiff',
109
+ '.tif': 'image/tiff',
110
+ '.bmp': 'image/bmp',
111
+ '.webp': 'image/webp',
112
+ '.ico': 'image/x-icon',
113
+ '.svg': 'image/svg+xml',
114
+
115
+ # Audio
116
+ '.mp3': 'audio/mpeg',
117
+ '.wav': 'audio/wav',
118
+ '.wave': 'audio/wav',
119
+ '.m4a': 'audio/mp4',
120
+ '.aac': 'audio/aac',
121
+ '.ogg': 'audio/ogg',
122
+ '.oga': 'audio/ogg',
123
+ '.flac': 'audio/flac',
124
+ '.wma': 'audio/x-ms-wma',
125
+
126
+ # Video
127
+ '.mp4': 'video/mp4',
128
+ '.m4v': 'video/mp4',
129
+ '.avi': 'video/x-msvideo',
130
+ '.mov': 'video/quicktime',
131
+ '.qt': 'video/quicktime',
132
+ '.wmv': 'video/x-ms-wmv',
133
+ '.flv': 'video/x-flv',
134
+ '.mkv': 'video/x-matroska',
135
+ '.webm': 'video/webm',
136
+ '.mpg': 'video/mpeg',
137
+ '.mpeg': 'video/mpeg',
138
+ '.3gp': 'video/3gpp',
139
+
140
+ # Office formats
141
+ '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
142
+ '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
143
+ '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
144
+
145
+ # E-books
146
+ '.epub': 'application/epub+zip',
147
+
148
+ # Archives (basic detection - not expanded)
149
+ '.zip': 'application/zip',
150
+ '.tar': 'application/x-tar',
151
+ '.gz': 'application/gzip',
152
+ '.bz2': 'application/x-bzip2',
153
+ '.7z': 'application/x-7z-compressed',
154
+ '.rar': 'application/x-rar-compressed',
155
+ }
156
+
157
+ def _load_zip_content_patterns(self) -> Dict[str, str]:
158
+ """Load patterns for identifying ZIP-based formats by their content."""
159
+ return {
160
+ 'word/': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
161
+ 'xl/': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
162
+ 'ppt/': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
163
+ 'META-INF/container.xml': 'application/epub+zip',
164
+ }
165
+
166
+ async def detect(self, file_path: str) -> str:
167
+ """
168
+ Detect file type using magic bytes and content analysis.
169
+
170
+ Args:
171
+ file_path: Path to the file to analyze
172
+
173
+ Returns:
174
+ MIME type string
175
+
176
+ Raises:
177
+ UnsupportedTypeException: If file type cannot be determined
178
+ """
179
+ file_path = Path(file_path)
180
+
181
+ if not file_path.exists():
182
+ raise FileNotFoundError(f"File not found: {file_path}")
183
+
184
+ if not file_path.is_file():
185
+ raise ValueError(f"Not a file: {file_path}")
186
+
187
+ # Try binary signature detection first
188
+ mime_type = await self._detect_by_signature(file_path)
189
+ if mime_type:
190
+ logger.debug(f"Detected {file_path} as {mime_type} by signature")
191
+ return mime_type
192
+
193
+ # Try text-based detection
194
+ mime_type = await self._detect_text_format(file_path)
195
+ if mime_type:
196
+ logger.debug(f"Detected {file_path} as {mime_type} by text analysis")
197
+ return mime_type
198
+
199
+ # Fallback to extension
200
+ mime_type = self._detect_by_extension(file_path)
201
+ if mime_type:
202
+ logger.debug(f"Detected {file_path} as {mime_type} by extension")
203
+ return mime_type
204
+
205
+ # If all detection methods fail
206
+ raise UnsupportedTypeException(f"Unable to determine file type for: {file_path}")
207
+
208
+ async def _detect_by_signature(self, file_path: Path) -> Optional[str]:
209
+ """Detect file type by binary signature (magic bytes)."""
210
+ try:
211
+ with open(file_path, 'rb') as f:
212
+ # Read bytes for signature detection
213
+ header = f.read(self.SIGNATURE_READ_SIZE)
214
+
215
+ if not header:
216
+ return None
217
+
218
+ # Check for exact signature matches
219
+ for signature, mime_type in self.binary_signatures.items():
220
+ if header.startswith(signature):
221
+ # Special handling for RIFF (could be WAV or AVI)
222
+ if signature == b'RIFF' and len(header) >= 12:
223
+ if header[8:12] == b'WAVE':
224
+ return 'audio/wav'
225
+ elif header[8:12] == b'AVI ':
226
+ return 'video/x-msvideo'
227
+
228
+ # Special handling for ZIP-based formats
229
+ if mime_type == 'application/zip':
230
+ zip_mime = await self._detect_zip_format(file_path)
231
+ if zip_mime:
232
+ return zip_mime
233
+
234
+ if mime_type:
235
+ return mime_type
236
+
237
+ # Special check for MP4/MOV files with ftyp box
238
+ if len(header) >= 12 and header[4:8] == b'ftyp':
239
+ ftyp_brand = header[8:12]
240
+ # Don't strip - check exact 4-byte brand
241
+ if ftyp_brand == b'M4A ' or ftyp_brand.startswith(b'M4A'):
242
+ return 'audio/mp4'
243
+ elif ftyp_brand in [b'mp41', b'mp42', b'isom', b'iso2', b'iso5', b'M4V ', b'M4VP']:
244
+ return 'video/mp4'
245
+ elif ftyp_brand.startswith(b'qt'):
246
+ return 'video/quicktime'
247
+ else:
248
+ # Generic MP4 for other ftyp brands
249
+ return 'video/mp4'
250
+
251
+ return None
252
+
253
+ except Exception as e:
254
+ logger.debug(f"Error reading file signature: {e}")
255
+ return None
256
+
257
+ async def _detect_zip_format(self, file_path: Path) -> Optional[str]:
258
+ """Detect specific ZIP-based format (DOCX, XLSX, PPTX, EPUB)."""
259
+ try:
260
+ with zipfile.ZipFile(file_path, 'r') as zf:
261
+ namelist = zf.namelist()
262
+
263
+ # Check for specific content patterns
264
+ for pattern, mime_type in self.zip_content_patterns.items():
265
+ if any(name.startswith(pattern) for name in namelist):
266
+ return mime_type
267
+
268
+ # If it's a valid ZIP but no specific pattern matched
269
+ return 'application/zip'
270
+
271
+ except zipfile.BadZipFile:
272
+ logger.debug(f"Invalid ZIP file: {file_path}")
273
+ return None
274
+ except Exception as e:
275
+ logger.debug(f"Error inspecting ZIP content: {e}")
276
+ return None
277
+
278
+ async def _detect_text_format(self, file_path: Path) -> Optional[str]:
279
+ """Detect text-based formats by content analysis."""
280
+ try:
281
+ # Read bytes for text content analysis
282
+ with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
283
+ content = f.read(self.TEXT_READ_SIZE)
284
+
285
+ if not content or len(content) < 10:
286
+ return None
287
+
288
+ # Strip whitespace for analysis
289
+ content_stripped = content.strip()
290
+
291
+ # Check for text patterns
292
+ for pattern, mime_type in self.text_patterns.items():
293
+ if content_stripped.lower().startswith(pattern.lower()):
294
+ # Special validation for JSON
295
+ if mime_type == 'application/json':
296
+ if self._is_valid_json_start(content_stripped):
297
+ return mime_type
298
+ # HTML needs to be detected for routing
299
+ elif mime_type == 'text/html':
300
+ return mime_type
301
+ # For other text patterns (YAML, etc), just return text/plain
302
+ else:
303
+ return 'text/plain'
304
+
305
+ # Check for CSV pattern (multiple comma-separated values)
306
+ if self._looks_like_csv(content):
307
+ return 'text/csv'
308
+
309
+ # If it's readable text but no specific format detected
310
+ if self._is_text_file(content):
311
+ return 'text/plain'
312
+
313
+ return None
314
+
315
+ except UnicodeDecodeError:
316
+ # Not a text file
317
+ return None
318
+ except Exception as e:
319
+ logger.debug(f"Error analyzing text content: {e}")
320
+ return None
321
+
322
+ def _detect_by_extension(self, file_path: Path) -> Optional[str]:
323
+ """Detect file type by extension as fallback."""
324
+ extension = file_path.suffix.lower()
325
+ return self.extension_mapping.get(extension)
326
+
327
+ def _is_valid_json_start(self, content: str) -> bool:
328
+ """Check if content starts like valid JSON."""
329
+ # More robust JSON detection
330
+ content = content.strip()
331
+ if not (content.startswith('{') or content.startswith('[')):
332
+ return False
333
+
334
+ # Strong JSON indicators that are less likely in other formats
335
+ strong_indicators = [
336
+ '{\n "', # Pretty-printed JSON object
337
+ '{\n\t"', # Tab-indented JSON
338
+ '{"', # Compact JSON object
339
+ '[\n {', # Pretty-printed JSON array
340
+ '[{', # Compact JSON array
341
+ '": {', # Nested object
342
+ '": [' # Nested array
343
+ ]
344
+
345
+ # Check for strong indicators
346
+ for indicator in strong_indicators:
347
+ if indicator in content[:200]:
348
+ return True
349
+
350
+ # Weaker indicators - require multiple matches
351
+ json_patterns = ['":', '": ', '",', ', "', '"]', '"}']
352
+ pattern_count = sum(1 for pattern in json_patterns if pattern in content[:200])
353
+
354
+ # Check for JSON keywords but not in URLs or natural text
355
+ json_keywords = ['true', 'false', 'null']
356
+ keyword_count = 0
357
+ content_lower = content[:200].lower()
358
+ for kw in json_keywords:
359
+ # Check if keyword appears as a value (not in URL or sentence)
360
+ if f': {kw}' in content_lower or f':{kw}' in content_lower or f', {kw}' in content_lower:
361
+ keyword_count += 1
362
+
363
+ # Require stronger evidence to avoid false positives
364
+ return pattern_count >= 3 or keyword_count >= 1
365
+
366
+
367
+ def _looks_like_csv(self, content: str) -> bool:
368
+ """Check if content looks like CSV format."""
369
+ lines = content.split('\n', 5)[:5] # Check first 5 lines
370
+ if len(lines) < 2:
371
+ return False
372
+
373
+ # Count commas in each line
374
+ comma_counts = [line.count(',') for line in lines if line.strip()]
375
+ if not comma_counts:
376
+ return False
377
+
378
+ # CSV should have consistent comma counts
379
+ return len(set(comma_counts)) == 1 and comma_counts[0] > 0
380
+
381
+
382
+ def _is_text_file(self, content: str) -> bool:
383
+ """Check if content appears to be plain text."""
384
+ if not content or len(content) < 10: # Need reasonable content
385
+ return False
386
+
387
+ # Check for high ratio of printable characters
388
+ printable_chars = sum(1 for c in content if c.isprintable() or c.isspace())
389
+
390
+ # Also check that it has reasonable line lengths (not binary data)
391
+ lines = content.split('\n')
392
+ max_line_length = max(len(line) for line in lines) if lines else 0
393
+
394
+ # Text files typically have lines under 1000 chars and high printable ratio
395
+ return (printable_chars / len(content) > 0.95 and
396
+ max_line_length < 1000 and
397
+ len(content) > 20) # Minimum reasonable text file size
398
+
399
+
400
+ # Backward compatibility function
401
+ async def get_file_type(file_path: str) -> str:
402
+ """
403
+ Legacy function for compatibility with existing code.
404
+
405
+ Args:
406
+ file_path: Path to the file to analyze
407
+
408
+ Returns:
409
+ MIME type string
410
+
411
+ Raises:
412
+ UnsupportedTypeException: If file type cannot be determined
413
+ """
414
+ detector = FileDetector()
415
+ return await detector.detect(file_path)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: content-core
3
- Version: 1.3.0
3
+ Version: 1.4.0
4
4
  Summary: Extract what matters from any media source. Available as Python Library, macOS Service, CLI and MCP Server
5
5
  Author-email: LUIS NOVO <lfnovo@gmail.com>
6
6
  License-File: LICENSE
@@ -11,7 +11,7 @@ Requires-Dist: asciidoc>=10.2.1
11
11
  Requires-Dist: bs4>=0.0.2
12
12
  Requires-Dist: dicttoxml>=1.7.16
13
13
  Requires-Dist: esperanto>=1.2.0
14
- Requires-Dist: fastmcp>=0.5.0
14
+ Requires-Dist: fastmcp>=2.10.0
15
15
  Requires-Dist: firecrawl-py>=2.7.0
16
16
  Requires-Dist: jinja2>=3.1.6
17
17
  Requires-Dist: langdetect>=1.0.9
@@ -24,8 +24,6 @@ Requires-Dist: pillow>=10.4.0
24
24
  Requires-Dist: pymupdf>=1.25.5
25
25
  Requires-Dist: python-docx>=1.1.2
26
26
  Requires-Dist: python-dotenv>=1.1.0
27
- Requires-Dist: python-magic-bin==0.4.14; sys_platform == 'win32'
28
- Requires-Dist: python-magic>=0.4.27
29
27
  Requires-Dist: python-pptx>=1.0.2
30
28
  Requires-Dist: pytubefix>=9.1.1
31
29
  Requires-Dist: readability-lxml>=0.8.4.1
@@ -38,6 +36,14 @@ Description-Content-Type: text/markdown
38
36
  # Content Core
39
37
 
40
38
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
39
+ [![PyPI version](https://badge.fury.io/py/content-core.svg)](https://badge.fury.io/py/content-core)
40
+ [![Downloads](https://pepy.tech/badge/content-core)](https://pepy.tech/project/content-core)
41
+ [![Downloads](https://pepy.tech/badge/content-core/month)](https://pepy.tech/project/content-core)
42
+ [![GitHub stars](https://img.shields.io/github/stars/lfnovo/content-core?style=social)](https://github.com/lfnovo/content-core)
43
+ [![GitHub forks](https://img.shields.io/github/forks/lfnovo/content-core?style=social)](https://github.com/lfnovo/content-core)
44
+ [![GitHub issues](https://img.shields.io/github/issues/lfnovo/content-core)](https://github.com/lfnovo/content-core/issues)
45
+ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
46
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
41
47
 
42
48
  **Content Core** is a powerful, AI-powered content extraction and processing platform that transforms any source into clean, structured content. Extract text from websites, transcribe videos, process documents, and generate AI summaries—all through a unified interface with multiple integration options.
43
49
 
@@ -103,12 +109,13 @@ summary = await cc.summarize_content(result, context="explain to a child")
103
109
  * **⚡ Zero-Install Options:** Use `uvx` for instant access without installation
104
110
  * **🧠 AI-Powered Processing:** LLM integration for content cleaning and summarization
105
111
  * **🔄 Asynchronous:** Built with `asyncio` for efficient processing
112
+ * **🐍 Pure Python Implementation:** No system dependencies required - simplified installation across all platforms
106
113
 
107
114
  ## Getting Started
108
115
 
109
116
  ### Installation
110
117
 
111
- Install Content Core using `pip`:
118
+ Install Content Core using `pip` - **no system dependencies required!**
112
119
 
113
120
  ```bash
114
121
  # Basic installation (PyMuPDF + BeautifulSoup/Jina extraction)
@@ -124,6 +131,8 @@ pip install content-core
124
131
  pip install content-core[docling]
125
132
  ```
126
133
 
134
+ > **Note:** Unlike many content extraction tools, Content Core uses pure Python implementations and doesn't require system libraries like libmagic. This ensures consistent, hassle-free installation across Windows, macOS, and Linux.
135
+
127
136
  Alternatively, if you’re developing locally:
128
137
 
129
138
  ```bash
@@ -264,6 +273,10 @@ For more information on how to use the Content Core library, including details o
264
273
 
265
274
  Content Core includes a Model Context Protocol (MCP) server that enables seamless integration with Claude Desktop and other MCP-compatible applications. The MCP server exposes Content Core's powerful extraction capabilities through a standardized protocol.
266
275
 
276
+ <a href="https://glama.ai/mcp/servers/@lfnovo/content-core">
277
+ <img width="380" height="200" src="https://glama.ai/mcp/servers/@lfnovo/content-core/badge" />
278
+ </a>
279
+
267
280
  ### Quick Setup with Claude Desktop
268
281
 
269
282
  ```bash
@@ -15,8 +15,9 @@ content_core/content/__init__.py,sha256=7IxfLTUHKyHjoT4MfWM2PX2J3QBeYcuERzE9vFeF
15
15
  content_core/content/cleanup/__init__.py,sha256=wymD24WLDDdsZrv-5WhparSiHBK9SJCcqBHmokuZqk4,121
16
16
  content_core/content/cleanup/core.py,sha256=AXUGUWxGob8si5uKRnDrreOcHV_gbGJr4YnRsNm2GX0,531
17
17
  content_core/content/extraction/__init__.py,sha256=TaYw6CAcG62GZfsJxeZ6VJDLP85BU2a7_G271v6WWPk,446
18
- content_core/content/extraction/graph.py,sha256=sjk6NpzOMOzMbUOM0bqrDSlB3cLQzboviLDNbj48pjY,8074
19
- content_core/content/identification/__init__.py,sha256=x4n8JIjDwmPvAopEEEcmZjlozg-zGbMq_s9VYdBjzYU,169
18
+ content_core/content/extraction/graph.py,sha256=AFi9B_hTuxqdgvogCOk4Xdqoboug7_KXtV0ZHlb8igM,8139
19
+ content_core/content/identification/__init__.py,sha256=DDoCi1r-6Z_pGPPi3X1ZwyRrcRtg-rAiCTK50hnO5Y0,235
20
+ content_core/content/identification/file_detector.py,sha256=s_10Osxv8gfVfs3UPXFzCOosvWCrf4ZCFXcW2yimUIM,17170
20
21
  content_core/content/summary/__init__.py,sha256=ReKCZWKfDtqlInKeh87Y1DEfiNzVWabGybEz3hS2FrI,114
21
22
  content_core/content/summary/core.py,sha256=kEabpETljzUb-yf0NcVWTOuCtayESo74gGBVDX7YTFs,550
22
23
  content_core/mcp/__init__.py,sha256=KNZYH4F9AoW1Orw1BtO3n92Cn-127hI7iF9gnGadueU,95
@@ -35,8 +36,8 @@ content_core/tools/__init__.py,sha256=DuJmd7fE-NpDvLP8IW1XY5MUkAQcdks52rn2jk4N8j
35
36
  content_core/tools/cleanup.py,sha256=5IdKedsFyRQMdYzgFSKtsfyxJldbroXQXHesHICNENI,523
36
37
  content_core/tools/extract.py,sha256=-r2_jsuMMXyXxGVqWhh1ilNPo_UMYAbw3Pkp1FzPy5g,577
37
38
  content_core/tools/summarize.py,sha256=DPfeglLWB08q8SvHrsKpOKZ35XjduUDs2J02ISwjdj0,596
38
- content_core-1.3.0.dist-info/METADATA,sha256=tT8CQXeG9PgSiXM--nSFj9OsNdH7ybXwF9WNYjO7IR4,19696
39
- content_core-1.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
40
- content_core-1.3.0.dist-info/entry_points.txt,sha256=ifbBxw37b7gAxZXoduS15KtqHuMHuU58STRkEmgM2zA,147
41
- content_core-1.3.0.dist-info/licenses/LICENSE,sha256=myj0z2T4qIkenCgLsRfx7Wk6UqCQNj5c7O14Qx4zpGg,1066
42
- content_core-1.3.0.dist-info/RECORD,,
39
+ content_core-1.4.0.dist-info/METADATA,sha256=BPP1PzkgDyOC-vGHawSDgLgMDGqJHEzKN6fV4uHgc1o,21093
40
+ content_core-1.4.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
41
+ content_core-1.4.0.dist-info/entry_points.txt,sha256=ifbBxw37b7gAxZXoduS15KtqHuMHuU58STRkEmgM2zA,147
42
+ content_core-1.4.0.dist-info/licenses/LICENSE,sha256=myj0z2T4qIkenCgLsRfx7Wk6UqCQNj5c7O14Qx4zpGg,1066
43
+ content_core-1.4.0.dist-info/RECORD,,