content-core 1.1.0__py3-none-any.whl → 1.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of content-core might be problematic. Click here for more details.

@@ -30,6 +30,7 @@ def suppress_stdout():
30
30
  finally:
31
31
  sys.stdout = original_stdout
32
32
 
33
+
33
34
  # Add parent directory to path to import content_core
34
35
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
35
36
 
@@ -38,38 +39,40 @@ import content_core as cc
38
39
  # Initialize MCP server
39
40
  mcp = FastMCP("Content Core MCP Server")
40
41
 
42
+
41
43
  async def _extract_content_impl(
42
- url: Optional[str] = None,
43
- file_path: Optional[str] = None
44
+ url: Optional[str] = None, file_path: Optional[str] = None
44
45
  ) -> Dict[str, Any]:
45
46
  """
46
- Extract content from a URL or file using Content Core's auto engine.
47
-
47
+ Extract content from a URL or file using Content Core's auto engine. This is useful for processing Youtube transcripts, website content, PDFs, ePUB, Office files, etc. You can also use it to extract transcripts from audio or video files.
48
+
48
49
  Args:
49
50
  url: Optional URL to extract content from
50
51
  file_path: Optional file path to extract content from
51
-
52
+
52
53
  Returns:
53
54
  JSON object containing extracted content and metadata
54
-
55
+
55
56
  Raises:
56
57
  ValueError: If neither or both url and file_path are provided
57
58
  """
58
59
  # Validate input - exactly one must be provided
59
- if (url is None and file_path is None) or (url is not None and file_path is not None):
60
+ if (url is None and file_path is None) or (
61
+ url is not None and file_path is not None
62
+ ):
60
63
  return {
61
64
  "success": False,
62
65
  "error": "Exactly one of 'url' or 'file_path' must be provided",
63
66
  "source_type": None,
64
67
  "source": None,
65
68
  "content": None,
66
- "metadata": None
69
+ "metadata": None,
67
70
  }
68
-
71
+
69
72
  # Determine source type and validate
70
73
  source_type = "url" if url else "file"
71
74
  source = url if url else file_path
72
-
75
+
73
76
  # Additional validation for file paths
74
77
  if file_path:
75
78
  path = Path(file_path)
@@ -80,9 +83,9 @@ async def _extract_content_impl(
80
83
  "source_type": source_type,
81
84
  "source": source,
82
85
  "content": None,
83
- "metadata": None
86
+ "metadata": None,
84
87
  }
85
-
88
+
86
89
  # Security check - ensure no directory traversal
87
90
  try:
88
91
  # Resolve to absolute path and ensure it's not trying to access sensitive areas
@@ -95,30 +98,30 @@ async def _extract_content_impl(
95
98
  "source_type": source_type,
96
99
  "source": source,
97
100
  "content": None,
98
- "metadata": None
101
+ "metadata": None,
99
102
  }
100
-
103
+
101
104
  # Build extraction request
102
105
  extraction_request = {}
103
106
  if url:
104
107
  extraction_request["url"] = url
105
108
  else:
106
109
  extraction_request["file_path"] = str(Path(file_path).resolve())
107
-
110
+
108
111
  # Track start time
109
112
  start_time = datetime.utcnow()
110
-
113
+
111
114
  try:
112
115
  # Use Content Core's extract_content with auto engine
113
116
  logger.info(f"Extracting content from {source_type}: {source}")
114
-
117
+
115
118
  # Suppress stdout to prevent MoviePy and other libraries from interfering with MCP protocol
116
119
  with suppress_stdout():
117
120
  result = await cc.extract_content(extraction_request)
118
-
121
+
119
122
  # Calculate extraction time
120
123
  extraction_time = (datetime.utcnow() - start_time).total_seconds()
121
-
124
+
122
125
  # Build response - result is a ProcessSourceOutput object
123
126
  response = {
124
127
  "success": True,
@@ -132,13 +135,13 @@ async def _extract_content_impl(
132
135
  "content_length": len(result.content or ""),
133
136
  "identified_type": result.identified_type or "unknown",
134
137
  "identified_provider": result.identified_provider or "",
135
- }
138
+ },
136
139
  }
137
-
140
+
138
141
  # Add metadata from the result
139
142
  if result.metadata:
140
143
  response["metadata"].update(result.metadata)
141
-
144
+
142
145
  # Add specific metadata based on source type
143
146
  if source_type == "url":
144
147
  if result.title:
@@ -152,10 +155,10 @@ async def _extract_content_impl(
152
155
  response["metadata"]["file_path"] = result.file_path
153
156
  response["metadata"]["file_size"] = Path(file_path).stat().st_size
154
157
  response["metadata"]["file_extension"] = Path(file_path).suffix
155
-
158
+
156
159
  logger.info(f"Successfully extracted content from {source_type}: {source}")
157
160
  return response
158
-
161
+
159
162
  except Exception as e:
160
163
  logger.error(f"Error extracting content from {source_type} {source}: {str(e)}")
161
164
  return {
@@ -166,26 +169,25 @@ async def _extract_content_impl(
166
169
  "content": None,
167
170
  "metadata": {
168
171
  "extraction_timestamp": start_time.isoformat() + "Z",
169
- "error_type": type(e).__name__
170
- }
172
+ "error_type": type(e).__name__,
173
+ },
171
174
  }
172
175
 
173
176
 
174
177
  @mcp.tool
175
178
  async def extract_content(
176
- url: Optional[str] = None,
177
- file_path: Optional[str] = None
179
+ url: Optional[str] = None, file_path: Optional[str] = None
178
180
  ) -> Dict[str, Any]:
179
181
  """
180
182
  Extract content from a URL or file using Content Core's auto engine.
181
-
183
+
182
184
  Args:
183
185
  url: Optional URL to extract content from
184
186
  file_path: Optional file path to extract content from
185
-
187
+
186
188
  Returns:
187
189
  JSON object containing extracted content and metadata
188
-
190
+
189
191
  Raises:
190
192
  ValueError: If neither or both url and file_path are provided
191
193
  """
@@ -197,15 +199,16 @@ def main():
197
199
  # Additional MoviePy configuration to suppress all output
198
200
  try:
199
201
  import moviepy.config as mp_config
202
+
200
203
  mp_config.check_and_download_cmd("ffmpeg") # Pre-download to avoid logs later
201
204
  except Exception:
202
205
  pass # Ignore if MoviePy isn't available or configured
203
-
206
+
204
207
  logger.info("Starting Content Core MCP Server")
205
-
208
+
206
209
  # Run with STDIO transport for MCP compatibility
207
210
  mcp.run()
208
211
 
209
212
 
210
213
  if __name__ == "__main__":
211
- main()
214
+ main()
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: content-core
3
- Version: 1.1.0
4
- Summary: Extract what matters from any media source
3
+ Version: 1.1.2
4
+ Summary: Extract what matters from any media source. Available as Python Library, macOS Service, CLI and MCP Server
5
5
  Author-email: LUIS NOVO <lfnovo@gmail.com>
6
6
  License-File: LICENSE
7
7
  Requires-Python: >=3.10
@@ -60,6 +60,7 @@ The primary goal of Content Core is to simplify the process of ingesting content
60
60
  * You can override this by specifying an engine, but `'auto'` is recommended for most users.
61
61
  * **Content Cleaning (Optional):** Likely integrates with LLMs (via `prompter.py` and Jinja templates) to refine and clean the extracted content.
62
62
  * **MCP Server:** Includes a Model Context Protocol (MCP) server for seamless integration with Claude Desktop and other MCP-compatible applications.
63
+ * **macOS Services:** Right-click context menu integration for Finder (extract and summarize files directly).
63
64
  * **Asynchronous:** Built with `asyncio` for efficient I/O operations.
64
65
 
65
66
  ## Getting Started
@@ -92,6 +93,18 @@ uv sync
92
93
  Content Core provides three CLI commands for extracting, cleaning, and summarizing content:
93
94
  ccore, cclean, and csum. These commands support input from text, URLs, files, or piped data (e.g., via cat file | command).
94
95
 
96
+ **Zero-install usage with uvx:**
97
+ ```bash
98
+ # Extract content
99
+ uvx --from "content-core" ccore https://example.com
100
+
101
+ # Clean content
102
+ uvx --from "content-core" cclean "messy content"
103
+
104
+ # Summarize content
105
+ uvx --from "content-core" csum "long text" --context "bullet points"
106
+ ```
107
+
95
108
  #### ccore - Extract Content
96
109
 
97
110
  Extracts content from text, URLs, or files, with optional formatting.
@@ -232,6 +245,49 @@ Add to your `claude_desktop_config.json`:
232
245
 
233
246
  For detailed setup instructions, configuration options, and usage examples, see our [MCP Documentation](docs/mcp.md).
234
247
 
248
+ ## macOS Services Integration
249
+
250
+ Content Core provides powerful right-click integration with macOS Finder, allowing you to extract and summarize content from any file without installation. Choose between clipboard or TextEdit output for maximum flexibility.
251
+
252
+ ### Available Services
253
+
254
+ Create **4 convenient services** for different workflows:
255
+
256
+ - **Extract Content → Clipboard** - Quick copy for immediate pasting
257
+ - **Extract Content → TextEdit** - Review before using
258
+ - **Summarize Content → Clipboard** - Quick summary copying
259
+ - **Summarize Content → TextEdit** - Formatted summary with headers
260
+
261
+ ### Quick Setup
262
+
263
+ 1. **Install uv** (if not already installed):
264
+ ```bash
265
+ curl -LsSf https://astral.sh/uv/install.sh | sh
266
+ ```
267
+
268
+ 2. **Create services manually** using Automator (5 minutes setup)
269
+
270
+ ### Usage
271
+
272
+ **Right-click any supported file** in Finder → **Services** → Choose your option:
273
+
274
+ - **PDFs, Word docs** - Instant text extraction
275
+ - **Videos, audio files** - Automatic transcription
276
+ - **Images** - OCR text recognition
277
+ - **Web content** - Clean text extraction
278
+ - **Multiple files** - Batch processing support
279
+
280
+ ### Features
281
+
282
+ - **Zero-install processing**: Uses `uvx` for isolated execution
283
+ - **Multiple output options**: Clipboard or TextEdit display
284
+ - **System notifications**: Visual feedback on completion
285
+ - **Wide format support**: 20+ file types supported
286
+ - **Batch processing**: Handle multiple files at once
287
+ - **Keyboard shortcuts**: Assignable hotkeys for power users
288
+
289
+ For complete setup instructions with copy-paste scripts, see [macOS Services Documentation](docs/macos.md).
290
+
235
291
  ## Using with Langchain
236
292
 
237
293
  For users integrating with the [Langchain](https://python.langchain.com/) framework, `content-core` exposes a set of compatible tools. These tools, located in the `src/content_core/tools` directory, allow you to leverage `content-core` extraction, cleaning, and summarization capabilities directly within your Langchain agents and chains.
@@ -20,7 +20,7 @@ content_core/content/identification/__init__.py,sha256=x4n8JIjDwmPvAopEEEcmZjloz
20
20
  content_core/content/summary/__init__.py,sha256=ReKCZWKfDtqlInKeh87Y1DEfiNzVWabGybEz3hS2FrI,114
21
21
  content_core/content/summary/core.py,sha256=kEabpETljzUb-yf0NcVWTOuCtayESo74gGBVDX7YTFs,550
22
22
  content_core/mcp/__init__.py,sha256=KNZYH4F9AoW1Orw1BtO3n92Cn-127hI7iF9gnGadueU,95
23
- content_core/mcp/server.py,sha256=m2A63Qle3nJ_Lw46uWkwVvYERtEw84hd7NHAn1rwdAQ,6968
23
+ content_core/mcp/server.py,sha256=ql0uXHkIbZlHQUhUQ4CaRnj19xT6t8ErydWntFgmtUg,7021
24
24
  content_core/notebooks/run.ipynb,sha256=WPBNcQUNXR5MldNMghVcU4vE4ibrVmlANa80baQn8TA,371078
25
25
  content_core/processors/audio.py,sha256=Mie20g_2Akhw6BHBVo3sHMpDRYUkqBI72lEDakscx3s,5729
26
26
  content_core/processors/docling.py,sha256=dkXehsQdfyWXfrK1K_6Pye50ABM7DxMk6TMguabM9Pc,2151
@@ -34,8 +34,8 @@ content_core/tools/__init__.py,sha256=DuJmd7fE-NpDvLP8IW1XY5MUkAQcdks52rn2jk4N8j
34
34
  content_core/tools/cleanup.py,sha256=5IdKedsFyRQMdYzgFSKtsfyxJldbroXQXHesHICNENI,523
35
35
  content_core/tools/extract.py,sha256=-r2_jsuMMXyXxGVqWhh1ilNPo_UMYAbw3Pkp1FzPy5g,577
36
36
  content_core/tools/summarize.py,sha256=DPfeglLWB08q8SvHrsKpOKZ35XjduUDs2J02ISwjdj0,596
37
- content_core-1.1.0.dist-info/METADATA,sha256=9-ppXQ7o-s8BCb2lH5xBiaiYBHmOFmXFrWntHuo9G_o,13017
38
- content_core-1.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
39
- content_core-1.1.0.dist-info/entry_points.txt,sha256=ifbBxw37b7gAxZXoduS15KtqHuMHuU58STRkEmgM2zA,147
40
- content_core-1.1.0.dist-info/licenses/LICENSE,sha256=myj0z2T4qIkenCgLsRfx7Wk6UqCQNj5c7O14Qx4zpGg,1066
41
- content_core-1.1.0.dist-info/RECORD,,
37
+ content_core-1.1.2.dist-info/METADATA,sha256=_0Rg4yeU-05hDB_91dvcMXYKMaKcMcU5C8SpkYhtiRs,15072
38
+ content_core-1.1.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
39
+ content_core-1.1.2.dist-info/entry_points.txt,sha256=ifbBxw37b7gAxZXoduS15KtqHuMHuU58STRkEmgM2zA,147
40
+ content_core-1.1.2.dist-info/licenses/LICENSE,sha256=myj0z2T4qIkenCgLsRfx7Wk6UqCQNj5c7O14Qx4zpGg,1066
41
+ content_core-1.1.2.dist-info/RECORD,,