content-core 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of content-core might be problematic. Click here for more details.

Files changed (51) hide show
  1. content_core-0.1.0/.github/PULL_REQUEST_TEMPLATE.md +22 -0
  2. content_core-0.1.0/.github/workflows/publish.yml +38 -0
  3. content_core-0.1.0/.gitignore +21 -0
  4. content_core-0.1.0/.python-version +1 -0
  5. content_core-0.1.0/.windsurfrules +1 -0
  6. content_core-0.1.0/Makefile +8 -0
  7. content_core-0.1.0/PKG-INFO +250 -0
  8. content_core-0.1.0/README.md +222 -0
  9. content_core-0.1.0/prompts/content/cleanup.jinja +16 -0
  10. content_core-0.1.0/prompts/content/summarize.jinja +25 -0
  11. content_core-0.1.0/pyproject.toml +60 -0
  12. content_core-0.1.0/src/content_core/__init__.py +214 -0
  13. content_core-0.1.0/src/content_core/common/__init__.py +21 -0
  14. content_core-0.1.0/src/content_core/common/exceptions.py +70 -0
  15. content_core-0.1.0/src/content_core/common/state.py +30 -0
  16. content_core-0.1.0/src/content_core/common/utils.py +31 -0
  17. content_core-0.1.0/src/content_core/config.py +37 -0
  18. content_core-0.1.0/src/content_core/content/__init__.py +0 -0
  19. content_core-0.1.0/src/content_core/content/cleanup/__init__.py +5 -0
  20. content_core-0.1.0/src/content_core/content/cleanup/core.py +15 -0
  21. content_core-0.1.0/src/content_core/content/extraction/__init__.py +13 -0
  22. content_core-0.1.0/src/content_core/content/extraction/graph.py +148 -0
  23. content_core-0.1.0/src/content_core/content/summary/__init__.py +5 -0
  24. content_core-0.1.0/src/content_core/content/summary/core.py +15 -0
  25. content_core-0.1.0/src/content_core/notebooks/run.ipynb +558 -0
  26. content_core-0.1.0/src/content_core/processors/audio.py +106 -0
  27. content_core-0.1.0/src/content_core/processors/office.py +331 -0
  28. content_core-0.1.0/src/content_core/processors/pdf.py +170 -0
  29. content_core-0.1.0/src/content_core/processors/text.py +37 -0
  30. content_core-0.1.0/src/content_core/processors/url.py +191 -0
  31. content_core-0.1.0/src/content_core/processors/video.py +167 -0
  32. content_core-0.1.0/src/content_core/processors/youtube.py +159 -0
  33. content_core-0.1.0/src/content_core/prompter.py +115 -0
  34. content_core-0.1.0/src/content_core/py.typed +2 -0
  35. content_core-0.1.0/src/content_core/templated_message.py +57 -0
  36. content_core-0.1.0/src/content_core/tools/__init__.py +9 -0
  37. content_core-0.1.0/src/content_core/tools/cleanup.py +15 -0
  38. content_core-0.1.0/src/content_core/tools/extract.py +21 -0
  39. content_core-0.1.0/src/content_core/tools/summarize.py +17 -0
  40. content_core-0.1.0/tests/input_content/file.docx +0 -0
  41. content_core-0.1.0/tests/input_content/file.epub +0 -0
  42. content_core-0.1.0/tests/input_content/file.md +73 -0
  43. content_core-0.1.0/tests/input_content/file.mp3 +0 -0
  44. content_core-0.1.0/tests/input_content/file.mp4 +0 -0
  45. content_core-0.1.0/tests/input_content/file.pdf +0 -0
  46. content_core-0.1.0/tests/input_content/file.pptx +0 -0
  47. content_core-0.1.0/tests/input_content/file.txt +73 -0
  48. content_core-0.1.0/tests/input_content/file.xlsx +0 -0
  49. content_core-0.1.0/tests/input_content/file_audio.mp3 +0 -0
  50. content_core-0.1.0/tests/integration/test_extraction.py +202 -0
  51. content_core-0.1.0/uv.lock +2714 -0
@@ -0,0 +1,22 @@
1
+ <!--
2
+ Thanks for contributing a pull request! Please ensure you have taken a look at CONTRIBUTING.md
3
+ -->
4
+
5
+ #### Reference Issues/PRs
6
+ <!--
7
+ Example: Fixes #1234. See also #3456.
8
+ Please use keywords (e.g., Fixes) to create link to the issues or pull requests
9
+ you resolved, so that they will automatically be closed when your pull request
10
+ is merged. See https://github.com/blog/1506-closing-issues-via-pull-requests
11
+ -->
12
+
13
+
14
+ #### What does this implement/fix? Explain your changes.
15
+
16
+
17
+ #### Any other comments?
18
+
19
+
20
+ <!--
21
+ Thanks for contributing!
22
+ -->
@@ -0,0 +1,38 @@
1
+ name: Publish
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - 'v*'
7
+ - '[0-9]*.[0-9]*.[0-9]*'
8
+
9
+ jobs:
10
+ publish:
11
+ runs-on: ubuntu-latest
12
+
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+
16
+ - name: Set up Python
17
+ uses: actions/setup-python@v5
18
+ with:
19
+ python-version: "3.10"
20
+
21
+ - name: Install uv
22
+ run: |
23
+ curl -LsSf https://astral.sh/uv/install.sh | sh
24
+ echo "$HOME/.cargo/bin" >> $GITHUB_PATH
25
+
26
+ - name: Install dependencies and build tools
27
+ run: |
28
+ uv venv
29
+ rm -rf dist
30
+ uv sync
31
+
32
+ - name: Build package
33
+ run: uv build
34
+
35
+ - name: Publish to PyPI
36
+ env:
37
+ PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
38
+ run: uv publish --token "$PYPI_TOKEN"
@@ -0,0 +1,21 @@
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
11
+ .env
12
+
13
+ **/tmp
14
+
15
+ **/*.DS_Store
16
+ .vscode/
17
+
18
+ ai_docs/
19
+
20
+ todo.md
21
+ WIP/
@@ -0,0 +1 @@
1
+ 3.10
@@ -0,0 +1 @@
1
+ Also use uv as the package manager: uv run, uv sync, uv add.
@@ -0,0 +1,8 @@
1
+ test:
2
+ uv run pytest -v
3
+
4
+ build-docs:
5
+ repomix . --include "**/*.py,**/*.yaml" --compress --style xml -o ai_docs/core.txt
6
+
7
+ ruff:
8
+ ruff check . --fix
@@ -0,0 +1,250 @@
1
+ Metadata-Version: 2.4
2
+ Name: content-core
3
+ Version: 0.1.0
4
+ Summary: Extract what matters from any media source
5
+ Author-email: LUIS NOVO <lfnovo@gmail.com>
6
+ Requires-Python: >=3.10
7
+ Requires-Dist: aiohttp>=3.11.16
8
+ Requires-Dist: bs4>=0.0.2
9
+ Requires-Dist: dicttoxml>=1.7.16
10
+ Requires-Dist: esperanto>=1.2.0
11
+ Requires-Dist: google-genai>=1.10.0
12
+ Requires-Dist: jinja2>=3.1.6
13
+ Requires-Dist: langdetect>=1.0.9
14
+ Requires-Dist: langgraph>=0.3.29
15
+ Requires-Dist: loguru>=0.7.3
16
+ Requires-Dist: openai>=1.73.0
17
+ Requires-Dist: openpyxl>=3.1.5
18
+ Requires-Dist: pandas>=2.2.3
19
+ Requires-Dist: pydub>=0.25.1
20
+ Requires-Dist: pymupdf>=1.25.5
21
+ Requires-Dist: python-docx>=1.1.2
22
+ Requires-Dist: python-dotenv>=1.1.0
23
+ Requires-Dist: python-magic>=0.4.27
24
+ Requires-Dist: python-pptx>=1.0.2
25
+ Requires-Dist: validators>=0.34.0
26
+ Requires-Dist: youtube-transcript-api>=1.0.3
27
+ Description-Content-Type: text/markdown
28
+
29
+ # Content Core
30
+
31
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
32
+
33
+ **Content Core** is a versatile Python library designed to extract and process content from various sources, providing a unified interface for handling text, web pages, and local files.
34
+
35
+ ## Overview
36
+
37
+ The primary goal of Content Core is to simplify the process of ingesting content from diverse origins. Whether you have raw text, a URL pointing to an article, or a local file like a video or markdown document, Content Core aims to extract the meaningful content for further use.
38
+
39
+ ## Key Features
40
+
41
+ * **Multi-Source Extraction:** Handles content from:
42
+ * Direct text strings.
43
+ * Web URLs (using robust extraction methods).
44
+ * Local files (including automatic transcription for video/audio files and parsing for text-based formats).
45
+ * **Intelligent Processing:** Applies appropriate extraction techniques based on the source type.
46
+ * **Content Cleaning (Optional):** Likely integrates with LLMs (via `prompter.py` and Jinja templates) to refine and clean the extracted content.
47
+ * **Asynchronous:** Built with `asyncio` for efficient I/O operations.
48
+
49
+ ## Getting Started
50
+
51
+ ### Installation
52
+
53
+ Install Content Core using `pip`:
54
+
55
+ ```bash
56
+ # Install the package
57
+ pip install content-core
58
+ ```
59
+
60
+ Alternatively, if you’re developing locally:
61
+
62
+ ```bash
63
+ # Clone the repository
64
+ git clone https://github.com/lfnovo/content-core
65
+ cd content-core
66
+
67
+ # Install with uv
68
+ uv sync
69
+ ```
70
+
71
+ ### Command-Line Interface
72
+
73
+ Content Core provides three CLI commands for extracting, cleaning, and summarizing content:
74
+ ccore, cclean, and csum. These commands support input from text, URLs, files, or piped data (e.g., via cat file | command).
75
+
76
+ #### ccore - Extract Content
77
+
78
+ Extracts content from text, URLs, or files, with optional formatting.
79
+ Usage:
80
+ ```bash
81
+ ccore [-f|--format xml|json|text] [-d|--debug] [content]
82
+ ```
83
+ Options:
84
+ - `-f`, `--format`: Output format (xml, json, or text). Default: text.
85
+ - `-d`, `--debug`: Enable debug logging.
86
+ - `content`: Input content (text, URL, or file path). If omitted, reads from stdin.
87
+
88
+ Examples:
89
+
90
+ ```bash
91
+ # Extract from a URL as text
92
+ ccore https://example.com
93
+
94
+ # Extract from a file as JSON
95
+ ccore -f json document.pdf
96
+
97
+ # Extract from piped text as XML
98
+ echo "Sample text" | ccore --format xml
99
+ ```
100
+
101
+ #### cclean - Clean Content
102
+ Cleans content by removing unnecessary formatting, spaces, or artifacts. Accepts text, JSON, XML input, URLs, or file paths.
103
+ Usage:
104
+
105
+ ```bash
106
+ cclean [-d|--debug] [content]
107
+ ```
108
+
109
+ Options:
110
+ - `-d`, `--debug`: Enable debug logging.
111
+ - `content`: Input content to clean (text, URL, file path, JSON, or XML). If omitted, reads from stdin.
112
+
113
+ Examples:
114
+
115
+ ```bash
116
+ # Clean a text string
117
+ cclean " messy text "
118
+
119
+ # Clean piped JSON
120
+ echo '{"content": " messy text "}' | cclean
121
+
122
+ # Clean content from a URL
123
+ cclean https://example.com
124
+
125
+ # Clean a file’s content
126
+ cclean document.txt
127
+ ```
128
+
129
+ ### csum - Summarize Content
130
+
131
+ Summarizes content with an optional context to guide the summary style. Accepts text, JSON, XML input, URLs, or file paths.
132
+
133
+ Usage:
134
+
135
+ ```bash
136
+ csum [--context "context text"] [-d|--debug] [content]
137
+ ```
138
+
139
+ Options:
140
+ - `--context`: Context for summarization (e.g., "explain to a child"). Default: none.
141
+ - `-d`, `--debug`: Enable debug logging.
142
+ - `content`: Input content to summarize (text, URL, file path, JSON, or XML). If omitted, reads from stdin.
143
+
144
+ Examples:
145
+
146
+ ```bash
147
+ # Summarize text
148
+ csum "AI is transforming industries."
149
+
150
+ # Summarize with context
151
+ csum --context "in bullet points" "AI is transforming industries."
152
+
153
+ # Summarize piped content
154
+ cat article.txt | csum --context "one sentence"
155
+
156
+ # Summarize content from URL
157
+ csum https://example.com
158
+
159
+ # Summarize a file's content
160
+ csum document.txt
161
+ ```
162
+
163
+ ## Using with Langchain
164
+
165
+ For users integrating with the [Langchain](https://python.langchain.com/) framework, `content-core` exposes a set of compatible tools. These tools, located in the `src/content_core/tools` directory, allow you to leverage `content-core` extraction, cleaning, and summarization capabilities directly within your Langchain agents and chains.
166
+
167
+ You can import and use these tools like any other Langchain tool. For example:
168
+
169
+ ```python
170
+ from content_core.tools import extract_content_tool, cleanup_content_tool, summarize_content_tool
171
+ from langchain.agents import initialize_agent, AgentType
172
+
173
+ tools = [extract_content_tool, cleanup_content_tool, summarize_content_tool]
174
+ agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
175
+ agent.run("Extract the content from https://example.com and then summarize it.")
176
+ ```
177
+
178
+ Refer to the source code in `src/content_core/tools` for specific tool implementations and usage details.
179
+
180
+ ## Basic Usage
181
+
182
+ The core functionality revolves around the extract_content function.
183
+
184
+ ```python
185
+ import asyncio
186
+ from content_core.extraction import extract_content
187
+
188
+ async def main():
189
+ # Extract from raw text
190
+ text_data = await extract_content({"content": "This is my sample text content."})
191
+ print(text_data)
192
+
193
+ # Extract from a URL
194
+ url_data = await extract_content({"url": "https://www.example.com"})
195
+ print(url_data)
196
+
197
+ # Extract from a local video file (gets transcript)
198
+ video_data = await extract_content({"file_path": "path/to/your/video.mp4"})
199
+ print(video_data)
200
+
201
+ # Extract from a local markdown file
202
+ md_data = await extract_content({"file_path": "path/to/your/document.md"})
203
+ print(md_data)
204
+
205
+ if __name__ == "__main__":
206
+ asyncio.run(main())
207
+ ```
208
+
209
+ (See `src/content_core/notebooks/run.ipynb` for more detailed examples.)
210
+
211
+ ## Configuration
212
+
213
+ Configuration settings (like API keys for external services, logging levels) can be managed through environment variables or `.env` files, loaded automatically via `python-dotenv`.
214
+
215
+ Example `.env`:
216
+
217
+ ```plaintext
218
+ OPENAI_API_KEY=your-key-here
219
+ GOOGLE_API_KEY=your-key-here
220
+ ```
221
+
222
+ ## Development
223
+
224
+ To set up a development environment:
225
+
226
+ ```bash
227
+ # Clone the repository
228
+ git clone <repository-url>
229
+ cd content-core
230
+
231
+ # Create virtual environment and install dependencies
232
+ uv venv
233
+ source .venv/bin/activate
234
+ uv sync --group dev
235
+
236
+ # Run tests
237
+ make test
238
+
239
+ # Lint code
240
+ make lint
241
+
242
+ # See all commands
243
+ make help
244
+ ```
245
+
246
+ ## Contributing
247
+ Contributions are welcome! Please follow standard practices (fork, feature branch, pull request).
248
+
249
+ ## License
250
+ This project is licensed under the MIT License - see the LICENSE file for details.
@@ -0,0 +1,222 @@
1
+ # Content Core
2
+
3
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
4
+
5
+ **Content Core** is a versatile Python library designed to extract and process content from various sources, providing a unified interface for handling text, web pages, and local files.
6
+
7
+ ## Overview
8
+
9
+ The primary goal of Content Core is to simplify the process of ingesting content from diverse origins. Whether you have raw text, a URL pointing to an article, or a local file like a video or markdown document, Content Core aims to extract the meaningful content for further use.
10
+
11
+ ## Key Features
12
+
13
+ * **Multi-Source Extraction:** Handles content from:
14
+ * Direct text strings.
15
+ * Web URLs (using robust extraction methods).
16
+ * Local files (including automatic transcription for video/audio files and parsing for text-based formats).
17
+ * **Intelligent Processing:** Applies appropriate extraction techniques based on the source type.
18
+ * **Content Cleaning (Optional):** Likely integrates with LLMs (via `prompter.py` and Jinja templates) to refine and clean the extracted content.
19
+ * **Asynchronous:** Built with `asyncio` for efficient I/O operations.
20
+
21
+ ## Getting Started
22
+
23
+ ### Installation
24
+
25
+ Install Content Core using `pip`:
26
+
27
+ ```bash
28
+ # Install the package
29
+ pip install content-core
30
+ ```
31
+
32
+ Alternatively, if you’re developing locally:
33
+
34
+ ```bash
35
+ # Clone the repository
36
+ git clone https://github.com/lfnovo/content-core
37
+ cd content-core
38
+
39
+ # Install with uv
40
+ uv sync
41
+ ```
42
+
43
+ ### Command-Line Interface
44
+
45
+ Content Core provides three CLI commands for extracting, cleaning, and summarizing content:
46
+ ccore, cclean, and csum. These commands support input from text, URLs, files, or piped data (e.g., via cat file | command).
47
+
48
+ #### ccore - Extract Content
49
+
50
+ Extracts content from text, URLs, or files, with optional formatting.
51
+ Usage:
52
+ ```bash
53
+ ccore [-f|--format xml|json|text] [-d|--debug] [content]
54
+ ```
55
+ Options:
56
+ - `-f`, `--format`: Output format (xml, json, or text). Default: text.
57
+ - `-d`, `--debug`: Enable debug logging.
58
+ - `content`: Input content (text, URL, or file path). If omitted, reads from stdin.
59
+
60
+ Examples:
61
+
62
+ ```bash
63
+ # Extract from a URL as text
64
+ ccore https://example.com
65
+
66
+ # Extract from a file as JSON
67
+ ccore -f json document.pdf
68
+
69
+ # Extract from piped text as XML
70
+ echo "Sample text" | ccore --format xml
71
+ ```
72
+
73
+ #### cclean - Clean Content
74
+ Cleans content by removing unnecessary formatting, spaces, or artifacts. Accepts text, JSON, XML input, URLs, or file paths.
75
+ Usage:
76
+
77
+ ```bash
78
+ cclean [-d|--debug] [content]
79
+ ```
80
+
81
+ Options:
82
+ - `-d`, `--debug`: Enable debug logging.
83
+ - `content`: Input content to clean (text, URL, file path, JSON, or XML). If omitted, reads from stdin.
84
+
85
+ Examples:
86
+
87
+ ```bash
88
+ # Clean a text string
89
+ cclean " messy text "
90
+
91
+ # Clean piped JSON
92
+ echo '{"content": " messy text "}' | cclean
93
+
94
+ # Clean content from a URL
95
+ cclean https://example.com
96
+
97
+ # Clean a file’s content
98
+ cclean document.txt
99
+ ```
100
+
101
+ ### csum - Summarize Content
102
+
103
+ Summarizes content with an optional context to guide the summary style. Accepts text, JSON, XML input, URLs, or file paths.
104
+
105
+ Usage:
106
+
107
+ ```bash
108
+ csum [--context "context text"] [-d|--debug] [content]
109
+ ```
110
+
111
+ Options:
112
+ - `--context`: Context for summarization (e.g., "explain to a child"). Default: none.
113
+ - `-d`, `--debug`: Enable debug logging.
114
+ - `content`: Input content to summarize (text, URL, file path, JSON, or XML). If omitted, reads from stdin.
115
+
116
+ Examples:
117
+
118
+ ```bash
119
+ # Summarize text
120
+ csum "AI is transforming industries."
121
+
122
+ # Summarize with context
123
+ csum --context "in bullet points" "AI is transforming industries."
124
+
125
+ # Summarize piped content
126
+ cat article.txt | csum --context "one sentence"
127
+
128
+ # Summarize content from URL
129
+ csum https://example.com
130
+
131
+ # Summarize a file's content
132
+ csum document.txt
133
+ ```
134
+
135
+ ## Using with Langchain
136
+
137
+ For users integrating with the [Langchain](https://python.langchain.com/) framework, `content-core` exposes a set of compatible tools. These tools, located in the `src/content_core/tools` directory, allow you to leverage `content-core` extraction, cleaning, and summarization capabilities directly within your Langchain agents and chains.
138
+
139
+ You can import and use these tools like any other Langchain tool. For example:
140
+
141
+ ```python
142
+ from content_core.tools import extract_content_tool, cleanup_content_tool, summarize_content_tool
143
+ from langchain.agents import initialize_agent, AgentType
144
+
145
+ tools = [extract_content_tool, cleanup_content_tool, summarize_content_tool]
146
+ agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
147
+ agent.run("Extract the content from https://example.com and then summarize it.")
148
+ ```
149
+
150
+ Refer to the source code in `src/content_core/tools` for specific tool implementations and usage details.
151
+
152
+ ## Basic Usage
153
+
154
+ The core functionality revolves around the extract_content function.
155
+
156
+ ```python
157
+ import asyncio
158
+ from content_core.extraction import extract_content
159
+
160
+ async def main():
161
+ # Extract from raw text
162
+ text_data = await extract_content({"content": "This is my sample text content."})
163
+ print(text_data)
164
+
165
+ # Extract from a URL
166
+ url_data = await extract_content({"url": "https://www.example.com"})
167
+ print(url_data)
168
+
169
+ # Extract from a local video file (gets transcript)
170
+ video_data = await extract_content({"file_path": "path/to/your/video.mp4"})
171
+ print(video_data)
172
+
173
+ # Extract from a local markdown file
174
+ md_data = await extract_content({"file_path": "path/to/your/document.md"})
175
+ print(md_data)
176
+
177
+ if __name__ == "__main__":
178
+ asyncio.run(main())
179
+ ```
180
+
181
+ (See `src/content_core/notebooks/run.ipynb` for more detailed examples.)
182
+
183
+ ## Configuration
184
+
185
+ Configuration settings (like API keys for external services, logging levels) can be managed through environment variables or `.env` files, loaded automatically via `python-dotenv`.
186
+
187
+ Example `.env`:
188
+
189
+ ```plaintext
190
+ OPENAI_API_KEY=your-key-here
191
+ GOOGLE_API_KEY=your-key-here
192
+ ```
193
+
194
+ ## Development
195
+
196
+ To set up a development environment:
197
+
198
+ ```bash
199
+ # Clone the repository
200
+ git clone <repository-url>
201
+ cd content-core
202
+
203
+ # Create virtual environment and install dependencies
204
+ uv venv
205
+ source .venv/bin/activate
206
+ uv sync --group dev
207
+
208
+ # Run tests
209
+ make test
210
+
211
+ # Lint code
212
+ make lint
213
+
214
+ # See all commands
215
+ make help
216
+ ```
217
+
218
+ ## Contributing
219
+ Contributions are welcome! Please follow standard practices (fork, feature branch, pull request).
220
+
221
+ ## License
222
+ This project is licensed under the MIT License - see the LICENSE file for details.
@@ -0,0 +1,16 @@
1
+ # GOAL
2
+
3
+ Adjust the content below to make it clean and readable:
4
+ Remove repeated strings that do not add value to the text.
5
+
6
+ Remove any content unrelated to the text itself (e.g., metadata, artifacts, or extraction errors).
7
+
8
+ Format the output as unstructured but clear text.
9
+
10
+ Do not add extra text, introductions, conclusions, or commentary—only rewrite the provided content as it is.
11
+
12
+ Do not interpret, analyze, or alter the meaning, intent, or narrative of the text—just reformat it for clarity and readability.
13
+
14
+ Do not change the text structure, do not write conclusions about it. Your only job is to make it readable.
15
+
16
+ Keep the text in its original language, regardless of what it is.
@@ -0,0 +1,25 @@
1
+ You are an AI assistant for a personal study platform.
2
+
3
+ In this platform, your user collects various articles and content from the Internet for reference and study.
4
+
5
+ Your role is to summarize the selected content as densely as possible, helping the reader extract maximum value from it without reading the full text.
6
+ Focus solely on the content's value, avoiding unnecessary comments or messages.
7
+
8
+ The summary should be dense, rich in characters, and designed to create a powerful vector representation.
9
+ If the user provided additional context, follow its instructions. Otherwise, summary the whole content.
10
+
11
+ Do not return any acknowledgments or greetings—only the summary.
12
+
13
+ CONTENT:
14
+
15
+ {{ content }}
16
+
17
+ {% if context %}
18
+ CONTEXT:
19
+
20
+ User has provided the aditional context for your task:
21
+ {{context}}
22
+ {% endif%}
23
+
24
+
25
+ SUMMARY:
@@ -0,0 +1,60 @@
1
+ [project]
2
+ name = "content-core"
3
+ version = "0.1.0"
4
+ description = "Extract what matters from any media source"
5
+ readme = "README.md"
6
+ authors = [
7
+ { name = "LUIS NOVO", email = "lfnovo@gmail.com" }
8
+ ]
9
+ requires-python = ">=3.10"
10
+ dependencies = [
11
+ "aiohttp>=3.11.16",
12
+ "bs4>=0.0.2",
13
+ "esperanto>=1.2.0",
14
+ "google-genai>=1.10.0",
15
+ "jinja2>=3.1.6",
16
+ "langdetect>=1.0.9",
17
+ "loguru>=0.7.3",
18
+ "openai>=1.73.0",
19
+ "openpyxl>=3.1.5",
20
+ "pandas>=2.2.3",
21
+ "pydub>=0.25.1",
22
+ "pymupdf>=1.25.5",
23
+ "python-docx>=1.1.2",
24
+ "python-dotenv>=1.1.0",
25
+ "python-magic>=0.4.27",
26
+ "python-pptx>=1.0.2",
27
+ "youtube-transcript-api>=1.0.3",
28
+ "langgraph>=0.3.29",
29
+ "dicttoxml>=1.7.16",
30
+ "validators>=0.34.0",
31
+ ]
32
+
33
+ [project.scripts]
34
+ ccore = "content_core:ccore"
35
+ cclean = "content_core:cclean"
36
+ csum = "content_core:csum"
37
+
38
+ [tool.hatch.metadata]
39
+ allow-direct-references = true
40
+
41
+ [build-system]
42
+ requires = ["hatchling", "pip"]
43
+ build-backend = "hatchling.build"
44
+
45
+ [tool.setuptools]
46
+ package-dir = {"content_core" = "src/content_core"}
47
+
48
+ [dependency-groups]
49
+ dev = [
50
+ "ipykernel>=4.0.1",
51
+ "ipywidgets>=4.0.0",
52
+ "pyperclip>=1.9.0",
53
+ "pytest>=7.2.0",
54
+ "pytest-asyncio>=0.21.0",
55
+ ]
56
+
57
+ [tool.pytest.ini_options]
58
+ pythonpath = ["src"]
59
+ asyncio_mode = "auto"
60
+ asyncio_default_fixture_loop_scope = "function"