content-core 1.1.0__tar.gz → 1.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of content-core might be problematic. Click here for more details.

Files changed (67) hide show
  1. {content_core-1.1.0 → content_core-1.1.2}/PKG-INFO +58 -2
  2. {content_core-1.1.0 → content_core-1.1.2}/README.md +56 -0
  3. content_core-1.1.2/docs/macos.md +287 -0
  4. {content_core-1.1.0 → content_core-1.1.2}/docs/mcp.md +61 -12
  5. {content_core-1.1.0 → content_core-1.1.2}/pyproject.toml +2 -2
  6. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/mcp/server.py +37 -34
  7. {content_core-1.1.0 → content_core-1.1.2}/uv.lock +1 -1
  8. {content_core-1.1.0 → content_core-1.1.2}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  9. {content_core-1.1.0 → content_core-1.1.2}/.github/workflows/publish.yml +0 -0
  10. {content_core-1.1.0 → content_core-1.1.2}/.gitignore +0 -0
  11. {content_core-1.1.0 → content_core-1.1.2}/.python-version +0 -0
  12. {content_core-1.1.0 → content_core-1.1.2}/CONTRIBUTING.md +0 -0
  13. {content_core-1.1.0 → content_core-1.1.2}/LICENSE +0 -0
  14. {content_core-1.1.0 → content_core-1.1.2}/Makefile +0 -0
  15. {content_core-1.1.0 → content_core-1.1.2}/docs/processors.md +0 -0
  16. {content_core-1.1.0 → content_core-1.1.2}/docs/usage.md +0 -0
  17. {content_core-1.1.0 → content_core-1.1.2}/prompts/content/cleanup.jinja +0 -0
  18. {content_core-1.1.0 → content_core-1.1.2}/prompts/content/summarize.jinja +0 -0
  19. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/__init__.py +0 -0
  20. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/cc_config.yaml +0 -0
  21. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/common/__init__.py +0 -0
  22. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/common/exceptions.py +0 -0
  23. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/common/state.py +0 -0
  24. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/common/types.py +0 -0
  25. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/common/utils.py +0 -0
  26. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/config.py +0 -0
  27. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/content/__init__.py +0 -0
  28. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/content/cleanup/__init__.py +0 -0
  29. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/content/cleanup/core.py +0 -0
  30. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/content/extraction/__init__.py +0 -0
  31. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/content/extraction/graph.py +0 -0
  32. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/content/identification/__init__.py +0 -0
  33. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/content/summary/__init__.py +0 -0
  34. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/content/summary/core.py +0 -0
  35. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/logging.py +0 -0
  36. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/mcp/__init__.py +0 -0
  37. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/models.py +0 -0
  38. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/models_config.yaml +0 -0
  39. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/notebooks/run.ipynb +0 -0
  40. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/processors/audio.py +0 -0
  41. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/processors/docling.py +0 -0
  42. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/processors/office.py +0 -0
  43. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/processors/pdf.py +0 -0
  44. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/processors/text.py +0 -0
  45. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/processors/url.py +0 -0
  46. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/processors/video.py +0 -0
  47. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/processors/youtube.py +0 -0
  48. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/py.typed +0 -0
  49. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/templated_message.py +0 -0
  50. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/tools/__init__.py +0 -0
  51. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/tools/cleanup.py +0 -0
  52. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/tools/extract.py +0 -0
  53. {content_core-1.1.0 → content_core-1.1.2}/src/content_core/tools/summarize.py +0 -0
  54. {content_core-1.1.0 → content_core-1.1.2}/tests/input_content/file.docx +0 -0
  55. {content_core-1.1.0 → content_core-1.1.2}/tests/input_content/file.epub +0 -0
  56. {content_core-1.1.0 → content_core-1.1.2}/tests/input_content/file.md +0 -0
  57. {content_core-1.1.0 → content_core-1.1.2}/tests/input_content/file.mp3 +0 -0
  58. {content_core-1.1.0 → content_core-1.1.2}/tests/input_content/file.mp4 +0 -0
  59. {content_core-1.1.0 → content_core-1.1.2}/tests/input_content/file.pdf +0 -0
  60. {content_core-1.1.0 → content_core-1.1.2}/tests/input_content/file.pptx +0 -0
  61. {content_core-1.1.0 → content_core-1.1.2}/tests/input_content/file.txt +0 -0
  62. {content_core-1.1.0 → content_core-1.1.2}/tests/input_content/file.xlsx +0 -0
  63. {content_core-1.1.0 → content_core-1.1.2}/tests/input_content/file_audio.mp3 +0 -0
  64. {content_core-1.1.0 → content_core-1.1.2}/tests/integration/test_cli.py +0 -0
  65. {content_core-1.1.0 → content_core-1.1.2}/tests/integration/test_extraction.py +0 -0
  66. {content_core-1.1.0 → content_core-1.1.2}/tests/unit/test_docling.py +0 -0
  67. {content_core-1.1.0 → content_core-1.1.2}/tests/unit/test_mcp_server.py +0 -0
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: content-core
3
- Version: 1.1.0
4
- Summary: Extract what matters from any media source
3
+ Version: 1.1.2
4
+ Summary: Extract what matters from any media source. Available as Python Library, macOS Service, CLI and MCP Server
5
5
  Author-email: LUIS NOVO <lfnovo@gmail.com>
6
6
  License-File: LICENSE
7
7
  Requires-Python: >=3.10
@@ -60,6 +60,7 @@ The primary goal of Content Core is to simplify the process of ingesting content
60
60
  * You can override this by specifying an engine, but `'auto'` is recommended for most users.
61
61
  * **Content Cleaning (Optional):** Likely integrates with LLMs (via `prompter.py` and Jinja templates) to refine and clean the extracted content.
62
62
  * **MCP Server:** Includes a Model Context Protocol (MCP) server for seamless integration with Claude Desktop and other MCP-compatible applications.
63
+ * **macOS Services:** Right-click context menu integration for Finder (extract and summarize files directly).
63
64
  * **Asynchronous:** Built with `asyncio` for efficient I/O operations.
64
65
 
65
66
  ## Getting Started
@@ -92,6 +93,18 @@ uv sync
92
93
  Content Core provides three CLI commands for extracting, cleaning, and summarizing content:
93
94
  ccore, cclean, and csum. These commands support input from text, URLs, files, or piped data (e.g., via cat file | command).
94
95
 
96
+ **Zero-install usage with uvx:**
97
+ ```bash
98
+ # Extract content
99
+ uvx --from "content-core" ccore https://example.com
100
+
101
+ # Clean content
102
+ uvx --from "content-core" cclean "messy content"
103
+
104
+ # Summarize content
105
+ uvx --from "content-core" csum "long text" --context "bullet points"
106
+ ```
107
+
95
108
  #### ccore - Extract Content
96
109
 
97
110
  Extracts content from text, URLs, or files, with optional formatting.
@@ -232,6 +245,49 @@ Add to your `claude_desktop_config.json`:
232
245
 
233
246
  For detailed setup instructions, configuration options, and usage examples, see our [MCP Documentation](docs/mcp.md).
234
247
 
248
+ ## macOS Services Integration
249
+
250
+ Content Core provides powerful right-click integration with macOS Finder, allowing you to extract and summarize content from any file without installation. Choose between clipboard or TextEdit output for maximum flexibility.
251
+
252
+ ### Available Services
253
+
254
+ Create **4 convenient services** for different workflows:
255
+
256
+ - **Extract Content → Clipboard** - Quick copy for immediate pasting
257
+ - **Extract Content → TextEdit** - Review before using
258
+ - **Summarize Content → Clipboard** - Quick summary copying
259
+ - **Summarize Content → TextEdit** - Formatted summary with headers
260
+
261
+ ### Quick Setup
262
+
263
+ 1. **Install uv** (if not already installed):
264
+ ```bash
265
+ curl -LsSf https://astral.sh/uv/install.sh | sh
266
+ ```
267
+
268
+ 2. **Create services manually** using Automator (5 minutes setup)
269
+
270
+ ### Usage
271
+
272
+ **Right-click any supported file** in Finder → **Services** → Choose your option:
273
+
274
+ - **PDFs, Word docs** - Instant text extraction
275
+ - **Videos, audio files** - Automatic transcription
276
+ - **Images** - OCR text recognition
277
+ - **Web content** - Clean text extraction
278
+ - **Multiple files** - Batch processing support
279
+
280
+ ### Features
281
+
282
+ - **Zero-install processing**: Uses `uvx` for isolated execution
283
+ - **Multiple output options**: Clipboard or TextEdit display
284
+ - **System notifications**: Visual feedback on completion
285
+ - **Wide format support**: 20+ file types supported
286
+ - **Batch processing**: Handle multiple files at once
287
+ - **Keyboard shortcuts**: Assignable hotkeys for power users
288
+
289
+ For complete setup instructions with copy-paste scripts, see [macOS Services Documentation](docs/macos.md).
290
+
235
291
  ## Using with Langchain
236
292
 
237
293
  For users integrating with the [Langchain](https://python.langchain.com/) framework, `content-core` exposes a set of compatible tools. These tools, located in the `src/content_core/tools` directory, allow you to leverage `content-core` extraction, cleaning, and summarization capabilities directly within your Langchain agents and chains.
@@ -23,6 +23,7 @@ The primary goal of Content Core is to simplify the process of ingesting content
23
23
  * You can override this by specifying an engine, but `'auto'` is recommended for most users.
24
24
  * **Content Cleaning (Optional):** Likely integrates with LLMs (via `prompter.py` and Jinja templates) to refine and clean the extracted content.
25
25
  * **MCP Server:** Includes a Model Context Protocol (MCP) server for seamless integration with Claude Desktop and other MCP-compatible applications.
26
+ * **macOS Services:** Right-click context menu integration for Finder (extract and summarize files directly).
26
27
  * **Asynchronous:** Built with `asyncio` for efficient I/O operations.
27
28
 
28
29
  ## Getting Started
@@ -55,6 +56,18 @@ uv sync
55
56
  Content Core provides three CLI commands for extracting, cleaning, and summarizing content:
56
57
  ccore, cclean, and csum. These commands support input from text, URLs, files, or piped data (e.g., via cat file | command).
57
58
 
59
+ **Zero-install usage with uvx:**
60
+ ```bash
61
+ # Extract content
62
+ uvx --from "content-core" ccore https://example.com
63
+
64
+ # Clean content
65
+ uvx --from "content-core" cclean "messy content"
66
+
67
+ # Summarize content
68
+ uvx --from "content-core" csum "long text" --context "bullet points"
69
+ ```
70
+
58
71
  #### ccore - Extract Content
59
72
 
60
73
  Extracts content from text, URLs, or files, with optional formatting.
@@ -195,6 +208,49 @@ Add to your `claude_desktop_config.json`:
195
208
 
196
209
  For detailed setup instructions, configuration options, and usage examples, see our [MCP Documentation](docs/mcp.md).
197
210
 
211
+ ## macOS Services Integration
212
+
213
+ Content Core provides powerful right-click integration with macOS Finder, allowing you to extract and summarize content from any file without installation. Choose between clipboard or TextEdit output for maximum flexibility.
214
+
215
+ ### Available Services
216
+
217
+ Create **4 convenient services** for different workflows:
218
+
219
+ - **Extract Content → Clipboard** - Quick copy for immediate pasting
220
+ - **Extract Content → TextEdit** - Review before using
221
+ - **Summarize Content → Clipboard** - Quick summary copying
222
+ - **Summarize Content → TextEdit** - Formatted summary with headers
223
+
224
+ ### Quick Setup
225
+
226
+ 1. **Install uv** (if not already installed):
227
+ ```bash
228
+ curl -LsSf https://astral.sh/uv/install.sh | sh
229
+ ```
230
+
231
+ 2. **Create services manually** using Automator (5 minutes setup)
232
+
233
+ ### Usage
234
+
235
+ **Right-click any supported file** in Finder → **Services** → Choose your option:
236
+
237
+ - **PDFs, Word docs** - Instant text extraction
238
+ - **Videos, audio files** - Automatic transcription
239
+ - **Images** - OCR text recognition
240
+ - **Web content** - Clean text extraction
241
+ - **Multiple files** - Batch processing support
242
+
243
+ ### Features
244
+
245
+ - **Zero-install processing**: Uses `uvx` for isolated execution
246
+ - **Multiple output options**: Clipboard or TextEdit display
247
+ - **System notifications**: Visual feedback on completion
248
+ - **Wide format support**: 20+ file types supported
249
+ - **Batch processing**: Handle multiple files at once
250
+ - **Keyboard shortcuts**: Assignable hotkeys for power users
251
+
252
+ For complete setup instructions with copy-paste scripts, see [macOS Services Documentation](docs/macos.md).
253
+
198
254
  ## Using with Langchain
199
255
 
200
256
  For users integrating with the [Langchain](https://python.langchain.com/) framework, `content-core` exposes a set of compatible tools. These tools, located in the `src/content_core/tools` directory, allow you to leverage `content-core` extraction, cleaning, and summarization capabilities directly within your Langchain agents and chains.
@@ -0,0 +1,287 @@
1
+ # macOS Services Integration
2
+
3
+ Content Core can be integrated into macOS Finder as right-click context menu services, allowing you to extract and summarize content directly from files without any installation.
4
+
5
+ ## Features
6
+
7
+ - **Right-click integration**: Extract or summarize any file directly from Finder
8
+ - **Zero-install processing**: Uses `uvx` for isolated execution
9
+ - **Multiple output options**: Clipboard or TextEdit display
10
+ - **System notifications**: Get notified when processing completes
11
+ - **Wide format support**: PDFs, Word docs, videos, audio files, images, and more
12
+
13
+ ## Quick Setup
14
+
15
+ ### Prerequisites
16
+
17
+ 1. **Install uv** (if not already installed):
18
+ ```bash
19
+ curl -LsSf https://astral.sh/uv/install.sh | sh
20
+ ```
21
+
22
+ 2. **Restart your terminal** after installation
23
+
24
+ ### Create the Services
25
+
26
+ We'll create **4 different services** for different use cases:
27
+
28
+ 1. **Extract Content → Clipboard**
29
+ 2. **Extract Content → TextEdit**
30
+ 3. **Summarize Content → Clipboard**
31
+ 4. **Summarize Content → TextEdit**
32
+
33
+ ## Service Scripts
34
+
35
+ ### 1. Extract Content → Clipboard
36
+
37
+ **Service Name:** `Extract Content (Clipboard)`
38
+
39
+ ```bash
40
+ export PATH="/opt/homebrew/bin:/usr/local/bin:$PATH"
41
+
42
+ for file in "$@"; do
43
+ echo "Extracting content from: $(basename "$file")"
44
+ uvx --from "content-core" ccore "$file" | pbcopy
45
+ osascript -e 'display notification "Content extracted and copied to clipboard" with title "Content Core"'
46
+ done
47
+ ```
48
+
49
+ ### 2. Extract Content → TextEdit
50
+
51
+ **Service Name:** `Extract Content (TextEdit)`
52
+
53
+ ```bash
54
+ export PATH="/opt/homebrew/bin:/usr/local/bin:$PATH"
55
+
56
+ for file in "$@"; do
57
+ filename=$(basename "$file")
58
+ echo "Extracting content from: $filename"
59
+
60
+ # Create temporary file for the extracted content
61
+ temp_file="/tmp/extracted_$(date +%s)_$filename.txt"
62
+
63
+ # Extract content and save to temp file
64
+ uvx --from "content-core" ccore "$file" > "$temp_file"
65
+
66
+ # Open in TextEdit
67
+ open -a "TextEdit" "$temp_file"
68
+
69
+ osascript -e 'display notification "Content extracted and opened in TextEdit" with title "Content Core"'
70
+ done
71
+ ```
72
+
73
+ ### 3. Summarize Content → Clipboard
74
+
75
+ **Service Name:** `Summarize Content (Clipboard)`
76
+
77
+ ```bash
78
+ export PATH="/opt/homebrew/bin:/usr/local/bin:$PATH"
79
+
80
+ for file in "$@"; do
81
+ echo "Summarizing content from: $(basename "$file")"
82
+ uvx --from "content-core" csum "$file" 2>/dev/null | pbcopy
83
+ osascript -e 'display notification "Summary copied to clipboard" with title "Content Core"'
84
+ done
85
+ ```
86
+
87
+ ### 4. Summarize Content → TextEdit
88
+
89
+ **Service Name:** `Summarize Content (TextEdit)`
90
+
91
+ ```bash
92
+ export PATH="/opt/homebrew/bin:/usr/local/bin:$PATH"
93
+
94
+ for file in "$@"; do
95
+ filename=$(basename "$file")
96
+ echo "Summarizing content from: $filename"
97
+
98
+ # Create temporary file for the summary
99
+ temp_file="/tmp/summary_$(date +%s)_$filename.txt"
100
+
101
+ # Add header to the summary
102
+ echo "=== SUMMARY OF: $filename ===" > "$temp_file"
103
+ echo "Generated on: $(date)" >> "$temp_file"
104
+ echo "" >> "$temp_file"
105
+
106
+ # Generate summary and append to temp file
107
+ uvx --from "content-core" csum "$file" 2>/dev/null >> "$temp_file"
108
+
109
+ # Open in TextEdit
110
+ open -a "TextEdit" "$temp_file"
111
+
112
+ osascript -e 'display notification "Summary opened in TextEdit" with title "Content Core"'
113
+ done
114
+ ```
115
+
116
+ ## Step-by-Step Installation
117
+
118
+ ### For Each Service:
119
+
120
+ 1. **Open Automator** (Cmd+Space → type "Automator")
121
+ 2. Choose **"Quick Action"**
122
+ 3. Configure the service:
123
+ - Set **"Workflow receives current"** → **"files or folders"**
124
+ - Set **"in"** → **"Finder"**
125
+ 4. **Drag "Run Shell Script"** from Actions to the workflow area
126
+ 5. **Configure the shell script**:
127
+ - Make sure **"Pass input: as arguments"** is selected
128
+ - **Paste the appropriate script** from above
129
+ 6. **Save** with the service name (e.g., "Extract Content (Clipboard)")
130
+ 7. **Repeat** for all 4 services
131
+
132
+ ## Usage
133
+
134
+ After installation, **right-click any supported file** in Finder:
135
+
136
+ ### In the Services submenu, you'll see:
137
+ - **Extract Content (Clipboard)** - Content copied to clipboard
138
+ - **Extract Content (TextEdit)** - Content opens in TextEdit
139
+ - **Summarize Content (Clipboard)** - Summary copied to clipboard
140
+ - **Summarize Content (TextEdit)** - Summary opens in TextEdit
141
+
142
+ ### Supported File Types
143
+
144
+ Content Core services work with:
145
+
146
+ #### Documents
147
+ - **PDFs** - Text extraction with layout preservation
148
+ - **Word Documents** (.docx) - Full text and formatting
149
+ - **PowerPoint** (.pptx) - Slide content and speaker notes
150
+ - **Excel** (.xlsx) - Cell data and formulas
151
+ - **Text files** (.txt, .md, .csv)
152
+
153
+ #### Web & Markup
154
+ - **HTML files** - Clean text extraction
155
+ - **Markdown files** - Formatted content
156
+ - **EPUB books** - Chapter text
157
+
158
+ #### Media Files
159
+ - **Videos** (.mp4, .avi, .mov, .mkv) - Automatic transcription
160
+ - **Audio** (.mp3, .wav, .m4a, .flac) - Speech-to-text conversion
161
+ - **Images** (.jpg, .png, .tiff) - OCR text extraction
162
+
163
+ #### Archives
164
+ - **ZIP files** - Extract text from contained files
165
+ - **Compressed formats** - Automatic extraction and processing
166
+
167
+ ## Customization
168
+
169
+ ### Custom Output Locations
170
+
171
+ Want to save to a specific folder instead of temp files? Modify the TextEdit scripts:
172
+
173
+ ```bash
174
+ # Save to Desktop
175
+ output_file="~/Desktop/extracted_$filename.txt"
176
+
177
+ # Save to Documents folder
178
+ output_file="~/Documents/ContentCore/extracted_$filename.txt"
179
+ ```
180
+
181
+ ### Custom Summary Context
182
+
183
+ Add context to your summaries by modifying the csum command:
184
+
185
+ ```bash
186
+ # Summarize as bullet points
187
+ uvx --from "content-core" csum "$file" --context "bullet points"
188
+
189
+ # Summarize for a specific audience
190
+ uvx --from "content-core" csum "$file" --context "explain to a child"
191
+
192
+ # Executive summary
193
+ uvx --from "content-core" csum "$file" --context "executive summary"
194
+ ```
195
+
196
+ ### JSON Output
197
+
198
+ For structured data, use JSON format:
199
+
200
+ ```bash
201
+ uvx --from "content-core" ccore "$file" --format json
202
+ ```
203
+
204
+ ## Troubleshooting
205
+
206
+ ### Services Not Appearing?
207
+
208
+ 1. **Restart Finder**: `killall Finder`
209
+ 2. **Check Services settings**:
210
+ - Go to **System Preferences** → **Keyboard** → **Shortcuts** → **Services**
211
+ - Look in **"Files and Folders"** section
212
+ - **Enable** the Content Core services
213
+
214
+ ### Permission Issues?
215
+
216
+ - macOS might ask for permission to run scripts
217
+ - **Grant access** when prompted
218
+ - Check **Security & Privacy** settings if needed
219
+
220
+ ### uvx Not Found Error?
221
+
222
+ - Make sure the **PATH export line** is at the top of each script
223
+ - Verify uvx location: `which uvx`
224
+ - Update the PATH if uvx is in a different location
225
+
226
+ ### Services Work But No Content?
227
+
228
+ - Check if you have the required API keys for certain content types:
229
+ - **OPENAI_API_KEY** - Required for audio/video transcription
230
+ - **FIRECRAWL_API_KEY** - Optional, for better web content extraction
231
+
232
+ Add API keys to your shell profile:
233
+ ```bash
234
+ echo 'export OPENAI_API_KEY="your-key-here"' >> ~/.zshrc
235
+ source ~/.zshrc
236
+ ```
237
+
238
+ ## Advanced Usage
239
+
240
+ ### Keyboard Shortcuts
241
+
242
+ Assign keyboard shortcuts to your services:
243
+
244
+ 1. **System Preferences** → **Keyboard** → **Shortcuts** → **Services**
245
+ 2. Find your Content Core services
246
+ 3. **Click** next to the service name
247
+ 4. **Press** your desired key combination
248
+
249
+ ### Batch Processing
250
+
251
+ Services work with **multiple selected files**:
252
+
253
+ 1. **Select multiple files** in Finder (Cmd+click)
254
+ 2. **Right-click** → **Services** → Choose your service
255
+ 3. **All files** will be processed sequentially
256
+
257
+ ### Integration with Other Apps
258
+
259
+ The extracted content works great with:
260
+
261
+ - **Note-taking apps** (Obsidian, Notion, Bear)
262
+ - **Research tools** (DEVONthink, Zotero)
263
+ - **Writing apps** (Ulysses, Scrivener)
264
+ - **Code editors** (VS Code, Sublime Text)
265
+
266
+ Simply use the clipboard versions and paste into your preferred app!
267
+
268
+ ## Tips & Best Practices
269
+
270
+ 1. **Use descriptive file names** - They appear in notifications
271
+ 2. **Process one large file at a time** - Videos/audio take time to transcribe
272
+ 3. **Check clipboard** after extraction - Some content might be very long
273
+ 4. **Use TextEdit version** for long documents to review before copying
274
+ 5. **Set up API keys** for full functionality with media files
275
+
276
+ ## Uninstalling
277
+
278
+ To remove the services:
279
+
280
+ ```bash
281
+ rm -rf ~/Library/Services/"Extract Content (Clipboard).workflow"
282
+ rm -rf ~/Library/Services/"Extract Content (TextEdit).workflow"
283
+ rm -rf ~/Library/Services/"Summarize Content (Clipboard).workflow"
284
+ rm -rf ~/Library/Services/"Summarize Content (TextEdit).workflow"
285
+ ```
286
+
287
+ Then restart Finder: `killall Finder`
@@ -13,7 +13,7 @@ The [Model Context Protocol (MCP)](https://modelcontextprotocol.io/) is an open
13
13
  - **Rich metadata**: Returns detailed information about extraction process and content
14
14
  - **Structured JSON responses**: Consistent format with success/error handling
15
15
  - **Wide format support**: Handles web pages, PDFs, Word docs, videos, audio files, and more
16
- - **Zero-install option**: Run with `uvx` without local installation
16
+ - **Zero-install option**: Run MCP server and CLI tools with `uvx` without local installation
17
17
 
18
18
  ## Installation
19
19
 
@@ -30,8 +30,13 @@ content-core-mcp
30
30
  ### Option 2: Use with uvx (Recommended for production)
31
31
 
32
32
  ```bash
33
- # Run directly without installation
33
+ # Run MCP server directly without installation
34
34
  uvx --from "content-core[mcp]" content-core-mcp
35
+
36
+ # Also works for CLI tools
37
+ uvx --from "content-core" ccore https://example.com
38
+ uvx --from "content-core" cclean "messy text"
39
+ uvx --from "content-core" csum "long content" --context "bullet points"
35
40
  ```
36
41
 
37
42
  ## Claude Desktop Setup
@@ -81,7 +86,14 @@ Add Content Core to your Claude Desktop configuration file:
81
86
 
82
87
  ### With Environment Variables
83
88
 
84
- If you need to pass API keys or other configuration:
89
+ For optimal functionality, you'll need to configure API keys. Here's what each key does:
90
+
91
+ **Required:**
92
+ - `OPENAI_API_KEY` - **Required for audio/video transcription and content cleaning**
93
+
94
+ **Optional (but recommended):**
95
+ - `FIRECRAWL_API_KEY` - **Improved web crawling and content extraction from URLs**
96
+ - `JINA_API_KEY` - **Alternative web crawling service (fallback when Firecrawl unavailable)**
85
97
 
86
98
  ```json
87
99
  {
@@ -94,15 +106,21 @@ If you need to pass API keys or other configuration:
94
106
  "content-core-mcp"
95
107
  ],
96
108
  "env": {
97
- "OPENAI_API_KEY": "your-openai-key",
98
- "FIRECRAWL_API_KEY": "your-firecrawl-key",
99
- "JINA_API_KEY": "your-jina-key"
109
+ "OPENAI_API_KEY": "sk-your-openai-key-here",
110
+ "FIRECRAWL_API_KEY": "fc-your-firecrawl-key-here",
111
+ "JINA_API_KEY": "jina-your-jina-key-here"
100
112
  }
101
113
  }
102
114
  }
103
115
  }
104
116
  ```
105
117
 
118
+ **Note:** Without `OPENAI_API_KEY`, you won't be able to:
119
+ - Transcribe audio or video files
120
+ - Use AI-powered content cleaning and summarization features
121
+
122
+ Without the web crawling API keys, Content Core will fall back to basic BeautifulSoup extraction for URLs, which may be less reliable for complex websites.
123
+
106
124
  ## Usage
107
125
 
108
126
  After setting up the MCP server, you can use it directly in Claude Desktop conversations. The server provides one main tool:
@@ -241,18 +259,39 @@ Content Core's MCP server uses the 'auto' engine by default, which automatically
241
259
 
242
260
  ### API Keys
243
261
 
244
- To get the best extraction results, configure these optional API keys:
262
+ To get the best extraction results, configure these API keys:
263
+
264
+ **Required for Audio/Video Processing:**
265
+ ```bash
266
+ # Essential for transcribing audio and video files
267
+ export OPENAI_API_KEY="sk-your-openai-key-here"
268
+ ```
245
269
 
270
+ **Optional but Recommended for Web Extraction:**
246
271
  ```bash
247
- # For enhanced web extraction
248
- export FIRECRAWL_API_KEY="your-firecrawl-key"
249
- export JINA_API_KEY="your-jina-key"
272
+ # For enhanced web crawling (recommended)
273
+ export FIRECRAWL_API_KEY="fc-your-firecrawl-key-here"
250
274
 
251
- # For AI-powered content cleaning and summarization
252
- export OPENAI_API_KEY="your-openai-key"
275
+ # Alternative web crawling service (fallback)
276
+ export JINA_API_KEY="jina-your-jina-key-here"
277
+ ```
278
+
279
+ **Additional AI Models (Optional):**
280
+ ```bash
281
+ # For alternative AI models
253
282
  export GOOGLE_API_KEY="your-google-key"
254
283
  ```
255
284
 
285
+ **What happens without these keys:**
286
+ - **No OPENAI_API_KEY**: Audio/video transcription will fail
287
+ - **No web crawling keys**: URLs will use basic BeautifulSoup extraction (less reliable)
288
+ - **No AI model keys**: Content cleaning/summarization features won't work
289
+
290
+ **Getting API Keys:**
291
+ - **OpenAI**: Visit [OpenAI API Keys](https://platform.openai.com/api-keys)
292
+ - **Firecrawl**: Visit [Firecrawl](https://www.firecrawl.dev/) for enhanced web scraping
293
+ - **Jina**: Visit [Jina AI](https://jina.ai/) for alternative web extraction
294
+
256
295
  ### Custom Prompts
257
296
 
258
297
  You can customize Content Core's behavior by setting a custom prompt path:
@@ -284,6 +323,16 @@ uvx --from "content-core[mcp]" content-core-mcp
284
323
  pip install --force-reinstall content-core[mcp]
285
324
  ```
286
325
 
326
+ **Audio/video extraction failing:**
327
+ - Make sure `OPENAI_API_KEY` is set in your environment variables
328
+ - Check that your OpenAI API key has sufficient credits
329
+ - Audio/video files require OpenAI's Whisper API for transcription
330
+
331
+ **Poor web extraction quality:**
332
+ - Add `FIRECRAWL_API_KEY` for better web scraping results
333
+ - Add `JINA_API_KEY` as a fallback option
334
+ - Without these keys, basic BeautifulSoup extraction is used (limited functionality)
335
+
287
336
  ### Debug Mode
288
337
 
289
338
  For development and debugging, you can run the server with additional logging:
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "content-core"
3
- version = "1.1.0"
4
- description = "Extract what matters from any media source"
3
+ version = "1.1.2"
4
+ description = "Extract what matters from any media source. Available as Python Library, macOS Service, CLI and MCP Server"
5
5
  readme = "README.md"
6
6
  homepage = "https://github.com/lfnovo/content-core"
7
7
  authors = [
@@ -30,6 +30,7 @@ def suppress_stdout():
30
30
  finally:
31
31
  sys.stdout = original_stdout
32
32
 
33
+
33
34
  # Add parent directory to path to import content_core
34
35
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
35
36
 
@@ -38,38 +39,40 @@ import content_core as cc
38
39
  # Initialize MCP server
39
40
  mcp = FastMCP("Content Core MCP Server")
40
41
 
42
+
41
43
  async def _extract_content_impl(
42
- url: Optional[str] = None,
43
- file_path: Optional[str] = None
44
+ url: Optional[str] = None, file_path: Optional[str] = None
44
45
  ) -> Dict[str, Any]:
45
46
  """
46
- Extract content from a URL or file using Content Core's auto engine.
47
-
47
+ Extract content from a URL or file using Content Core's auto engine. This is useful for processing Youtube transcripts, website content, PDFs, ePUB, Office files, etc. You can also use it to extract transcripts from audio or video files.
48
+
48
49
  Args:
49
50
  url: Optional URL to extract content from
50
51
  file_path: Optional file path to extract content from
51
-
52
+
52
53
  Returns:
53
54
  JSON object containing extracted content and metadata
54
-
55
+
55
56
  Raises:
56
57
  ValueError: If neither or both url and file_path are provided
57
58
  """
58
59
  # Validate input - exactly one must be provided
59
- if (url is None and file_path is None) or (url is not None and file_path is not None):
60
+ if (url is None and file_path is None) or (
61
+ url is not None and file_path is not None
62
+ ):
60
63
  return {
61
64
  "success": False,
62
65
  "error": "Exactly one of 'url' or 'file_path' must be provided",
63
66
  "source_type": None,
64
67
  "source": None,
65
68
  "content": None,
66
- "metadata": None
69
+ "metadata": None,
67
70
  }
68
-
71
+
69
72
  # Determine source type and validate
70
73
  source_type = "url" if url else "file"
71
74
  source = url if url else file_path
72
-
75
+
73
76
  # Additional validation for file paths
74
77
  if file_path:
75
78
  path = Path(file_path)
@@ -80,9 +83,9 @@ async def _extract_content_impl(
80
83
  "source_type": source_type,
81
84
  "source": source,
82
85
  "content": None,
83
- "metadata": None
86
+ "metadata": None,
84
87
  }
85
-
88
+
86
89
  # Security check - ensure no directory traversal
87
90
  try:
88
91
  # Resolve to absolute path and ensure it's not trying to access sensitive areas
@@ -95,30 +98,30 @@ async def _extract_content_impl(
95
98
  "source_type": source_type,
96
99
  "source": source,
97
100
  "content": None,
98
- "metadata": None
101
+ "metadata": None,
99
102
  }
100
-
103
+
101
104
  # Build extraction request
102
105
  extraction_request = {}
103
106
  if url:
104
107
  extraction_request["url"] = url
105
108
  else:
106
109
  extraction_request["file_path"] = str(Path(file_path).resolve())
107
-
110
+
108
111
  # Track start time
109
112
  start_time = datetime.utcnow()
110
-
113
+
111
114
  try:
112
115
  # Use Content Core's extract_content with auto engine
113
116
  logger.info(f"Extracting content from {source_type}: {source}")
114
-
117
+
115
118
  # Suppress stdout to prevent MoviePy and other libraries from interfering with MCP protocol
116
119
  with suppress_stdout():
117
120
  result = await cc.extract_content(extraction_request)
118
-
121
+
119
122
  # Calculate extraction time
120
123
  extraction_time = (datetime.utcnow() - start_time).total_seconds()
121
-
124
+
122
125
  # Build response - result is a ProcessSourceOutput object
123
126
  response = {
124
127
  "success": True,
@@ -132,13 +135,13 @@ async def _extract_content_impl(
132
135
  "content_length": len(result.content or ""),
133
136
  "identified_type": result.identified_type or "unknown",
134
137
  "identified_provider": result.identified_provider or "",
135
- }
138
+ },
136
139
  }
137
-
140
+
138
141
  # Add metadata from the result
139
142
  if result.metadata:
140
143
  response["metadata"].update(result.metadata)
141
-
144
+
142
145
  # Add specific metadata based on source type
143
146
  if source_type == "url":
144
147
  if result.title:
@@ -152,10 +155,10 @@ async def _extract_content_impl(
152
155
  response["metadata"]["file_path"] = result.file_path
153
156
  response["metadata"]["file_size"] = Path(file_path).stat().st_size
154
157
  response["metadata"]["file_extension"] = Path(file_path).suffix
155
-
158
+
156
159
  logger.info(f"Successfully extracted content from {source_type}: {source}")
157
160
  return response
158
-
161
+
159
162
  except Exception as e:
160
163
  logger.error(f"Error extracting content from {source_type} {source}: {str(e)}")
161
164
  return {
@@ -166,26 +169,25 @@ async def _extract_content_impl(
166
169
  "content": None,
167
170
  "metadata": {
168
171
  "extraction_timestamp": start_time.isoformat() + "Z",
169
- "error_type": type(e).__name__
170
- }
172
+ "error_type": type(e).__name__,
173
+ },
171
174
  }
172
175
 
173
176
 
174
177
  @mcp.tool
175
178
  async def extract_content(
176
- url: Optional[str] = None,
177
- file_path: Optional[str] = None
179
+ url: Optional[str] = None, file_path: Optional[str] = None
178
180
  ) -> Dict[str, Any]:
179
181
  """
180
182
  Extract content from a URL or file using Content Core's auto engine.
181
-
183
+
182
184
  Args:
183
185
  url: Optional URL to extract content from
184
186
  file_path: Optional file path to extract content from
185
-
187
+
186
188
  Returns:
187
189
  JSON object containing extracted content and metadata
188
-
190
+
189
191
  Raises:
190
192
  ValueError: If neither or both url and file_path are provided
191
193
  """
@@ -197,15 +199,16 @@ def main():
197
199
  # Additional MoviePy configuration to suppress all output
198
200
  try:
199
201
  import moviepy.config as mp_config
202
+
200
203
  mp_config.check_and_download_cmd("ffmpeg") # Pre-download to avoid logs later
201
204
  except Exception:
202
205
  pass # Ignore if MoviePy isn't available or configured
203
-
206
+
204
207
  logger.info("Starting Content Core MCP Server")
205
-
208
+
206
209
  # Run with STDIO transport for MCP compatibility
207
210
  mcp.run()
208
211
 
209
212
 
210
213
  if __name__ == "__main__":
211
- main()
214
+ main()
@@ -422,7 +422,7 @@ wheels = [
422
422
 
423
423
  [[package]]
424
424
  name = "content-core"
425
- version = "1.1.0"
425
+ version = "1.1.2"
426
426
  source = { editable = "." }
427
427
  dependencies = [
428
428
  { name = "ai-prompter" },
File without changes
File without changes
File without changes
File without changes