content-core 1.1.0__py3-none-any.whl → 1.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of content-core might be problematic. Click here for more details.
- content_core/mcp/server.py +37 -34
- {content_core-1.1.0.dist-info → content_core-1.1.2.dist-info}/METADATA +58 -2
- {content_core-1.1.0.dist-info → content_core-1.1.2.dist-info}/RECORD +6 -6
- {content_core-1.1.0.dist-info → content_core-1.1.2.dist-info}/WHEEL +0 -0
- {content_core-1.1.0.dist-info → content_core-1.1.2.dist-info}/entry_points.txt +0 -0
- {content_core-1.1.0.dist-info → content_core-1.1.2.dist-info}/licenses/LICENSE +0 -0
content_core/mcp/server.py
CHANGED
|
@@ -30,6 +30,7 @@ def suppress_stdout():
|
|
|
30
30
|
finally:
|
|
31
31
|
sys.stdout = original_stdout
|
|
32
32
|
|
|
33
|
+
|
|
33
34
|
# Add parent directory to path to import content_core
|
|
34
35
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
|
35
36
|
|
|
@@ -38,38 +39,40 @@ import content_core as cc
|
|
|
38
39
|
# Initialize MCP server
|
|
39
40
|
mcp = FastMCP("Content Core MCP Server")
|
|
40
41
|
|
|
42
|
+
|
|
41
43
|
async def _extract_content_impl(
|
|
42
|
-
url: Optional[str] = None,
|
|
43
|
-
file_path: Optional[str] = None
|
|
44
|
+
url: Optional[str] = None, file_path: Optional[str] = None
|
|
44
45
|
) -> Dict[str, Any]:
|
|
45
46
|
"""
|
|
46
|
-
Extract content from a URL or file using Content Core's auto engine.
|
|
47
|
-
|
|
47
|
+
Extract content from a URL or file using Content Core's auto engine. This is useful for processing Youtube transcripts, website content, PDFs, ePUB, Office files, etc. You can also use it to extract transcripts from audio or video files.
|
|
48
|
+
|
|
48
49
|
Args:
|
|
49
50
|
url: Optional URL to extract content from
|
|
50
51
|
file_path: Optional file path to extract content from
|
|
51
|
-
|
|
52
|
+
|
|
52
53
|
Returns:
|
|
53
54
|
JSON object containing extracted content and metadata
|
|
54
|
-
|
|
55
|
+
|
|
55
56
|
Raises:
|
|
56
57
|
ValueError: If neither or both url and file_path are provided
|
|
57
58
|
"""
|
|
58
59
|
# Validate input - exactly one must be provided
|
|
59
|
-
if (url is None and file_path is None) or (
|
|
60
|
+
if (url is None and file_path is None) or (
|
|
61
|
+
url is not None and file_path is not None
|
|
62
|
+
):
|
|
60
63
|
return {
|
|
61
64
|
"success": False,
|
|
62
65
|
"error": "Exactly one of 'url' or 'file_path' must be provided",
|
|
63
66
|
"source_type": None,
|
|
64
67
|
"source": None,
|
|
65
68
|
"content": None,
|
|
66
|
-
"metadata": None
|
|
69
|
+
"metadata": None,
|
|
67
70
|
}
|
|
68
|
-
|
|
71
|
+
|
|
69
72
|
# Determine source type and validate
|
|
70
73
|
source_type = "url" if url else "file"
|
|
71
74
|
source = url if url else file_path
|
|
72
|
-
|
|
75
|
+
|
|
73
76
|
# Additional validation for file paths
|
|
74
77
|
if file_path:
|
|
75
78
|
path = Path(file_path)
|
|
@@ -80,9 +83,9 @@ async def _extract_content_impl(
|
|
|
80
83
|
"source_type": source_type,
|
|
81
84
|
"source": source,
|
|
82
85
|
"content": None,
|
|
83
|
-
"metadata": None
|
|
86
|
+
"metadata": None,
|
|
84
87
|
}
|
|
85
|
-
|
|
88
|
+
|
|
86
89
|
# Security check - ensure no directory traversal
|
|
87
90
|
try:
|
|
88
91
|
# Resolve to absolute path and ensure it's not trying to access sensitive areas
|
|
@@ -95,30 +98,30 @@ async def _extract_content_impl(
|
|
|
95
98
|
"source_type": source_type,
|
|
96
99
|
"source": source,
|
|
97
100
|
"content": None,
|
|
98
|
-
"metadata": None
|
|
101
|
+
"metadata": None,
|
|
99
102
|
}
|
|
100
|
-
|
|
103
|
+
|
|
101
104
|
# Build extraction request
|
|
102
105
|
extraction_request = {}
|
|
103
106
|
if url:
|
|
104
107
|
extraction_request["url"] = url
|
|
105
108
|
else:
|
|
106
109
|
extraction_request["file_path"] = str(Path(file_path).resolve())
|
|
107
|
-
|
|
110
|
+
|
|
108
111
|
# Track start time
|
|
109
112
|
start_time = datetime.utcnow()
|
|
110
|
-
|
|
113
|
+
|
|
111
114
|
try:
|
|
112
115
|
# Use Content Core's extract_content with auto engine
|
|
113
116
|
logger.info(f"Extracting content from {source_type}: {source}")
|
|
114
|
-
|
|
117
|
+
|
|
115
118
|
# Suppress stdout to prevent MoviePy and other libraries from interfering with MCP protocol
|
|
116
119
|
with suppress_stdout():
|
|
117
120
|
result = await cc.extract_content(extraction_request)
|
|
118
|
-
|
|
121
|
+
|
|
119
122
|
# Calculate extraction time
|
|
120
123
|
extraction_time = (datetime.utcnow() - start_time).total_seconds()
|
|
121
|
-
|
|
124
|
+
|
|
122
125
|
# Build response - result is a ProcessSourceOutput object
|
|
123
126
|
response = {
|
|
124
127
|
"success": True,
|
|
@@ -132,13 +135,13 @@ async def _extract_content_impl(
|
|
|
132
135
|
"content_length": len(result.content or ""),
|
|
133
136
|
"identified_type": result.identified_type or "unknown",
|
|
134
137
|
"identified_provider": result.identified_provider or "",
|
|
135
|
-
}
|
|
138
|
+
},
|
|
136
139
|
}
|
|
137
|
-
|
|
140
|
+
|
|
138
141
|
# Add metadata from the result
|
|
139
142
|
if result.metadata:
|
|
140
143
|
response["metadata"].update(result.metadata)
|
|
141
|
-
|
|
144
|
+
|
|
142
145
|
# Add specific metadata based on source type
|
|
143
146
|
if source_type == "url":
|
|
144
147
|
if result.title:
|
|
@@ -152,10 +155,10 @@ async def _extract_content_impl(
|
|
|
152
155
|
response["metadata"]["file_path"] = result.file_path
|
|
153
156
|
response["metadata"]["file_size"] = Path(file_path).stat().st_size
|
|
154
157
|
response["metadata"]["file_extension"] = Path(file_path).suffix
|
|
155
|
-
|
|
158
|
+
|
|
156
159
|
logger.info(f"Successfully extracted content from {source_type}: {source}")
|
|
157
160
|
return response
|
|
158
|
-
|
|
161
|
+
|
|
159
162
|
except Exception as e:
|
|
160
163
|
logger.error(f"Error extracting content from {source_type} {source}: {str(e)}")
|
|
161
164
|
return {
|
|
@@ -166,26 +169,25 @@ async def _extract_content_impl(
|
|
|
166
169
|
"content": None,
|
|
167
170
|
"metadata": {
|
|
168
171
|
"extraction_timestamp": start_time.isoformat() + "Z",
|
|
169
|
-
"error_type": type(e).__name__
|
|
170
|
-
}
|
|
172
|
+
"error_type": type(e).__name__,
|
|
173
|
+
},
|
|
171
174
|
}
|
|
172
175
|
|
|
173
176
|
|
|
174
177
|
@mcp.tool
|
|
175
178
|
async def extract_content(
|
|
176
|
-
url: Optional[str] = None,
|
|
177
|
-
file_path: Optional[str] = None
|
|
179
|
+
url: Optional[str] = None, file_path: Optional[str] = None
|
|
178
180
|
) -> Dict[str, Any]:
|
|
179
181
|
"""
|
|
180
182
|
Extract content from a URL or file using Content Core's auto engine.
|
|
181
|
-
|
|
183
|
+
|
|
182
184
|
Args:
|
|
183
185
|
url: Optional URL to extract content from
|
|
184
186
|
file_path: Optional file path to extract content from
|
|
185
|
-
|
|
187
|
+
|
|
186
188
|
Returns:
|
|
187
189
|
JSON object containing extracted content and metadata
|
|
188
|
-
|
|
190
|
+
|
|
189
191
|
Raises:
|
|
190
192
|
ValueError: If neither or both url and file_path are provided
|
|
191
193
|
"""
|
|
@@ -197,15 +199,16 @@ def main():
|
|
|
197
199
|
# Additional MoviePy configuration to suppress all output
|
|
198
200
|
try:
|
|
199
201
|
import moviepy.config as mp_config
|
|
202
|
+
|
|
200
203
|
mp_config.check_and_download_cmd("ffmpeg") # Pre-download to avoid logs later
|
|
201
204
|
except Exception:
|
|
202
205
|
pass # Ignore if MoviePy isn't available or configured
|
|
203
|
-
|
|
206
|
+
|
|
204
207
|
logger.info("Starting Content Core MCP Server")
|
|
205
|
-
|
|
208
|
+
|
|
206
209
|
# Run with STDIO transport for MCP compatibility
|
|
207
210
|
mcp.run()
|
|
208
211
|
|
|
209
212
|
|
|
210
213
|
if __name__ == "__main__":
|
|
211
|
-
main()
|
|
214
|
+
main()
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: content-core
|
|
3
|
-
Version: 1.1.
|
|
4
|
-
Summary: Extract what matters from any media source
|
|
3
|
+
Version: 1.1.2
|
|
4
|
+
Summary: Extract what matters from any media source. Available as Python Library, macOS Service, CLI and MCP Server
|
|
5
5
|
Author-email: LUIS NOVO <lfnovo@gmail.com>
|
|
6
6
|
License-File: LICENSE
|
|
7
7
|
Requires-Python: >=3.10
|
|
@@ -60,6 +60,7 @@ The primary goal of Content Core is to simplify the process of ingesting content
|
|
|
60
60
|
* You can override this by specifying an engine, but `'auto'` is recommended for most users.
|
|
61
61
|
* **Content Cleaning (Optional):** Likely integrates with LLMs (via `prompter.py` and Jinja templates) to refine and clean the extracted content.
|
|
62
62
|
* **MCP Server:** Includes a Model Context Protocol (MCP) server for seamless integration with Claude Desktop and other MCP-compatible applications.
|
|
63
|
+
* **macOS Services:** Right-click context menu integration for Finder (extract and summarize files directly).
|
|
63
64
|
* **Asynchronous:** Built with `asyncio` for efficient I/O operations.
|
|
64
65
|
|
|
65
66
|
## Getting Started
|
|
@@ -92,6 +93,18 @@ uv sync
|
|
|
92
93
|
Content Core provides three CLI commands for extracting, cleaning, and summarizing content:
|
|
93
94
|
ccore, cclean, and csum. These commands support input from text, URLs, files, or piped data (e.g., via cat file | command).
|
|
94
95
|
|
|
96
|
+
**Zero-install usage with uvx:**
|
|
97
|
+
```bash
|
|
98
|
+
# Extract content
|
|
99
|
+
uvx --from "content-core" ccore https://example.com
|
|
100
|
+
|
|
101
|
+
# Clean content
|
|
102
|
+
uvx --from "content-core" cclean "messy content"
|
|
103
|
+
|
|
104
|
+
# Summarize content
|
|
105
|
+
uvx --from "content-core" csum "long text" --context "bullet points"
|
|
106
|
+
```
|
|
107
|
+
|
|
95
108
|
#### ccore - Extract Content
|
|
96
109
|
|
|
97
110
|
Extracts content from text, URLs, or files, with optional formatting.
|
|
@@ -232,6 +245,49 @@ Add to your `claude_desktop_config.json`:
|
|
|
232
245
|
|
|
233
246
|
For detailed setup instructions, configuration options, and usage examples, see our [MCP Documentation](docs/mcp.md).
|
|
234
247
|
|
|
248
|
+
## macOS Services Integration
|
|
249
|
+
|
|
250
|
+
Content Core provides powerful right-click integration with macOS Finder, allowing you to extract and summarize content from any file without installation. Choose between clipboard or TextEdit output for maximum flexibility.
|
|
251
|
+
|
|
252
|
+
### Available Services
|
|
253
|
+
|
|
254
|
+
Create **4 convenient services** for different workflows:
|
|
255
|
+
|
|
256
|
+
- **Extract Content → Clipboard** - Quick copy for immediate pasting
|
|
257
|
+
- **Extract Content → TextEdit** - Review before using
|
|
258
|
+
- **Summarize Content → Clipboard** - Quick summary copying
|
|
259
|
+
- **Summarize Content → TextEdit** - Formatted summary with headers
|
|
260
|
+
|
|
261
|
+
### Quick Setup
|
|
262
|
+
|
|
263
|
+
1. **Install uv** (if not already installed):
|
|
264
|
+
```bash
|
|
265
|
+
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
2. **Create services manually** using Automator (5 minutes setup)
|
|
269
|
+
|
|
270
|
+
### Usage
|
|
271
|
+
|
|
272
|
+
**Right-click any supported file** in Finder → **Services** → Choose your option:
|
|
273
|
+
|
|
274
|
+
- **PDFs, Word docs** - Instant text extraction
|
|
275
|
+
- **Videos, audio files** - Automatic transcription
|
|
276
|
+
- **Images** - OCR text recognition
|
|
277
|
+
- **Web content** - Clean text extraction
|
|
278
|
+
- **Multiple files** - Batch processing support
|
|
279
|
+
|
|
280
|
+
### Features
|
|
281
|
+
|
|
282
|
+
- **Zero-install processing**: Uses `uvx` for isolated execution
|
|
283
|
+
- **Multiple output options**: Clipboard or TextEdit display
|
|
284
|
+
- **System notifications**: Visual feedback on completion
|
|
285
|
+
- **Wide format support**: 20+ file types supported
|
|
286
|
+
- **Batch processing**: Handle multiple files at once
|
|
287
|
+
- **Keyboard shortcuts**: Assignable hotkeys for power users
|
|
288
|
+
|
|
289
|
+
For complete setup instructions with copy-paste scripts, see [macOS Services Documentation](docs/macos.md).
|
|
290
|
+
|
|
235
291
|
## Using with Langchain
|
|
236
292
|
|
|
237
293
|
For users integrating with the [Langchain](https://python.langchain.com/) framework, `content-core` exposes a set of compatible tools. These tools, located in the `src/content_core/tools` directory, allow you to leverage `content-core` extraction, cleaning, and summarization capabilities directly within your Langchain agents and chains.
|
|
@@ -20,7 +20,7 @@ content_core/content/identification/__init__.py,sha256=x4n8JIjDwmPvAopEEEcmZjloz
|
|
|
20
20
|
content_core/content/summary/__init__.py,sha256=ReKCZWKfDtqlInKeh87Y1DEfiNzVWabGybEz3hS2FrI,114
|
|
21
21
|
content_core/content/summary/core.py,sha256=kEabpETljzUb-yf0NcVWTOuCtayESo74gGBVDX7YTFs,550
|
|
22
22
|
content_core/mcp/__init__.py,sha256=KNZYH4F9AoW1Orw1BtO3n92Cn-127hI7iF9gnGadueU,95
|
|
23
|
-
content_core/mcp/server.py,sha256=
|
|
23
|
+
content_core/mcp/server.py,sha256=ql0uXHkIbZlHQUhUQ4CaRnj19xT6t8ErydWntFgmtUg,7021
|
|
24
24
|
content_core/notebooks/run.ipynb,sha256=WPBNcQUNXR5MldNMghVcU4vE4ibrVmlANa80baQn8TA,371078
|
|
25
25
|
content_core/processors/audio.py,sha256=Mie20g_2Akhw6BHBVo3sHMpDRYUkqBI72lEDakscx3s,5729
|
|
26
26
|
content_core/processors/docling.py,sha256=dkXehsQdfyWXfrK1K_6Pye50ABM7DxMk6TMguabM9Pc,2151
|
|
@@ -34,8 +34,8 @@ content_core/tools/__init__.py,sha256=DuJmd7fE-NpDvLP8IW1XY5MUkAQcdks52rn2jk4N8j
|
|
|
34
34
|
content_core/tools/cleanup.py,sha256=5IdKedsFyRQMdYzgFSKtsfyxJldbroXQXHesHICNENI,523
|
|
35
35
|
content_core/tools/extract.py,sha256=-r2_jsuMMXyXxGVqWhh1ilNPo_UMYAbw3Pkp1FzPy5g,577
|
|
36
36
|
content_core/tools/summarize.py,sha256=DPfeglLWB08q8SvHrsKpOKZ35XjduUDs2J02ISwjdj0,596
|
|
37
|
-
content_core-1.1.
|
|
38
|
-
content_core-1.1.
|
|
39
|
-
content_core-1.1.
|
|
40
|
-
content_core-1.1.
|
|
41
|
-
content_core-1.1.
|
|
37
|
+
content_core-1.1.2.dist-info/METADATA,sha256=_0Rg4yeU-05hDB_91dvcMXYKMaKcMcU5C8SpkYhtiRs,15072
|
|
38
|
+
content_core-1.1.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
39
|
+
content_core-1.1.2.dist-info/entry_points.txt,sha256=ifbBxw37b7gAxZXoduS15KtqHuMHuU58STRkEmgM2zA,147
|
|
40
|
+
content_core-1.1.2.dist-info/licenses/LICENSE,sha256=myj0z2T4qIkenCgLsRfx7Wk6UqCQNj5c7O14Qx4zpGg,1066
|
|
41
|
+
content_core-1.1.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|