content-core 0.8.0__tar.gz → 0.8.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of content-core might be problematic. Click here for more details.

Files changed (60) hide show
  1. {content_core-0.8.0 → content_core-0.8.1}/PKG-INFO +6 -15
  2. {content_core-0.8.0 → content_core-0.8.1}/README.md +2 -8
  3. {content_core-0.8.0 → content_core-0.8.1}/pyproject.toml +4 -5
  4. {content_core-0.8.0 → content_core-0.8.1}/uv.lock +7 -32
  5. {content_core-0.8.0 → content_core-0.8.1}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  6. {content_core-0.8.0 → content_core-0.8.1}/.github/workflows/publish.yml +0 -0
  7. {content_core-0.8.0 → content_core-0.8.1}/.gitignore +0 -0
  8. {content_core-0.8.0 → content_core-0.8.1}/.python-version +0 -0
  9. {content_core-0.8.0 → content_core-0.8.1}/CONTRIBUTING.md +0 -0
  10. {content_core-0.8.0 → content_core-0.8.1}/LICENSE +0 -0
  11. {content_core-0.8.0 → content_core-0.8.1}/Makefile +0 -0
  12. {content_core-0.8.0 → content_core-0.8.1}/docs/processors.md +0 -0
  13. {content_core-0.8.0 → content_core-0.8.1}/docs/usage.md +0 -0
  14. {content_core-0.8.0 → content_core-0.8.1}/prompts/content/cleanup.jinja +0 -0
  15. {content_core-0.8.0 → content_core-0.8.1}/prompts/content/summarize.jinja +0 -0
  16. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/__init__.py +0 -0
  17. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/cc_config.yaml +0 -0
  18. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/common/__init__.py +0 -0
  19. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/common/exceptions.py +0 -0
  20. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/common/state.py +0 -0
  21. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/common/types.py +0 -0
  22. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/common/utils.py +0 -0
  23. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/config.py +0 -0
  24. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/content/__init__.py +0 -0
  25. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/content/cleanup/__init__.py +0 -0
  26. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/content/cleanup/core.py +0 -0
  27. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/content/extraction/__init__.py +0 -0
  28. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/content/extraction/graph.py +0 -0
  29. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/content/summary/__init__.py +0 -0
  30. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/content/summary/core.py +0 -0
  31. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/logging.py +0 -0
  32. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/models.py +0 -0
  33. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/models_config.yaml +0 -0
  34. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/notebooks/run.ipynb +0 -0
  35. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/processors/audio.py +0 -0
  36. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/processors/docling.py +0 -0
  37. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/processors/office.py +0 -0
  38. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/processors/pdf.py +0 -0
  39. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/processors/text.py +0 -0
  40. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/processors/url.py +0 -0
  41. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/processors/video.py +0 -0
  42. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/processors/youtube.py +0 -0
  43. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/py.typed +0 -0
  44. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/templated_message.py +0 -0
  45. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/tools/__init__.py +0 -0
  46. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/tools/cleanup.py +0 -0
  47. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/tools/extract.py +0 -0
  48. {content_core-0.8.0 → content_core-0.8.1}/src/content_core/tools/summarize.py +0 -0
  49. {content_core-0.8.0 → content_core-0.8.1}/tests/input_content/file.docx +0 -0
  50. {content_core-0.8.0 → content_core-0.8.1}/tests/input_content/file.epub +0 -0
  51. {content_core-0.8.0 → content_core-0.8.1}/tests/input_content/file.md +0 -0
  52. {content_core-0.8.0 → content_core-0.8.1}/tests/input_content/file.mp3 +0 -0
  53. {content_core-0.8.0 → content_core-0.8.1}/tests/input_content/file.mp4 +0 -0
  54. {content_core-0.8.0 → content_core-0.8.1}/tests/input_content/file.pdf +0 -0
  55. {content_core-0.8.0 → content_core-0.8.1}/tests/input_content/file.pptx +0 -0
  56. {content_core-0.8.0 → content_core-0.8.1}/tests/input_content/file.txt +0 -0
  57. {content_core-0.8.0 → content_core-0.8.1}/tests/input_content/file.xlsx +0 -0
  58. {content_core-0.8.0 → content_core-0.8.1}/tests/input_content/file_audio.mp3 +0 -0
  59. {content_core-0.8.0 → content_core-0.8.1}/tests/integration/test_extraction.py +0 -0
  60. {content_core-0.8.0 → content_core-0.8.1}/tests/unit/test_docling.py +0 -0
@@ -1,17 +1,18 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: content-core
3
- Version: 0.8.0
3
+ Version: 0.8.1
4
4
  Summary: Extract what matters from any media source
5
5
  Author-email: LUIS NOVO <lfnovo@gmail.com>
6
6
  License-File: LICENSE
7
7
  Requires-Python: >=3.10
8
8
  Requires-Dist: ai-prompter>=0.2.3
9
9
  Requires-Dist: aiohttp>=3.11
10
+ Requires-Dist: asciidoc>=10.2.1
10
11
  Requires-Dist: bs4>=0.0.2
11
12
  Requires-Dist: dicttoxml>=1.7.16
13
+ Requires-Dist: docling>=2.34.0
12
14
  Requires-Dist: esperanto[openai]>=1.2.0
13
15
  Requires-Dist: firecrawl-py>=2.7.0
14
- Requires-Dist: firecrawl>=2.7.0
15
16
  Requires-Dist: jinja2>=3.1.6
16
17
  Requires-Dist: langdetect>=1.0.9
17
18
  Requires-Dist: langgraph>=0.3.29
@@ -19,6 +20,7 @@ Requires-Dist: loguru>=0.7.3
19
20
  Requires-Dist: moviepy>=2.1.2
20
21
  Requires-Dist: openpyxl>=3.1.5
21
22
  Requires-Dist: pandas>=2.2.3
23
+ Requires-Dist: pillow>=10.4.0
22
24
  Requires-Dist: pymupdf>=1.25.5
23
25
  Requires-Dist: python-docx>=1.1.2
24
26
  Requires-Dist: python-dotenv>=1.1.0
@@ -27,11 +29,6 @@ Requires-Dist: python-pptx>=1.0.2
27
29
  Requires-Dist: readability-lxml>=0.8.4.1
28
30
  Requires-Dist: validators>=0.34.0
29
31
  Requires-Dist: youtube-transcript-api>=1.0.3
30
- Provides-Extra: docling
31
- Requires-Dist: asciidoc; extra == 'docling'
32
- Requires-Dist: docling; extra == 'docling'
33
- Requires-Dist: pandas; extra == 'docling'
34
- Requires-Dist: pillow; extra == 'docling'
35
32
  Description-Content-Type: text/markdown
36
33
 
37
34
  # Content Core
@@ -69,8 +66,6 @@ Install Content Core using `pip`:
69
66
  ```bash
70
67
  # Install the package (without Docling)
71
68
  pip install content-core
72
- # Install with Docling support
73
- pip install content-core[docling]
74
69
  ```
75
70
 
76
71
  Alternatively, if you’re developing locally:
@@ -257,15 +252,11 @@ if __name__ == "__main__":
257
252
 
258
253
  Content Core supports an optional Docling-based extraction engine for rich document formats (PDF, DOCX, PPTX, XLSX, Markdown, AsciiDoc, HTML, CSV, Images).
259
254
 
260
- ### Installation
261
-
262
- ```bash
263
- # Install with Docling support
264
- pip install content-core[docling]
265
- ```
266
255
 
267
256
  ### Enabling Docling
268
257
 
258
+ Docling is not the default engine when parsing documents. If you don't want to use it, you need to set engine to "simple".
259
+
269
260
  #### Via configuration file
270
261
 
271
262
  In your `cc_config.yaml` or custom config, set:
@@ -33,8 +33,6 @@ Install Content Core using `pip`:
33
33
  ```bash
34
34
  # Install the package (without Docling)
35
35
  pip install content-core
36
- # Install with Docling support
37
- pip install content-core[docling]
38
36
  ```
39
37
 
40
38
  Alternatively, if you’re developing locally:
@@ -221,15 +219,11 @@ if __name__ == "__main__":
221
219
 
222
220
  Content Core supports an optional Docling-based extraction engine for rich document formats (PDF, DOCX, PPTX, XLSX, Markdown, AsciiDoc, HTML, CSV, Images).
223
221
 
224
- ### Installation
225
-
226
- ```bash
227
- # Install with Docling support
228
- pip install content-core[docling]
229
- ```
230
222
 
231
223
  ### Enabling Docling
232
224
 
225
+ Docling is not the default engine when parsing documents. If you don't want to use it, you need to set engine to "simple".
226
+
233
227
  #### Via configuration file
234
228
 
235
229
  In your `cc_config.yaml` or custom config, set:
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "content-core"
3
- version = "0.8.0"
3
+ version = "0.8.1"
4
4
  description = "Extract what matters from any media source"
5
5
  readme = "README.md"
6
6
  homepage = "https://github.com/lfnovo/content-core"
@@ -29,13 +29,12 @@ dependencies = [
29
29
  "ai-prompter>=0.2.3",
30
30
  "moviepy>=2.1.2",
31
31
  "readability-lxml>=0.8.4.1",
32
- "firecrawl>=2.7.0",
33
32
  "firecrawl-py>=2.7.0",
33
+ "docling>=2.34.0",
34
+ "pillow>=10.4.0",
35
+ "asciidoc>=10.2.1",
34
36
  ]
35
37
 
36
- [project.optional-dependencies]
37
- docling = ["docling", "Pillow", "pandas", "asciidoc"]
38
-
39
38
  [project.scripts]
40
39
  ccore = "content_core:ccore"
41
40
  cclean = "content_core:cclean"
@@ -410,15 +410,16 @@ wheels = [
410
410
 
411
411
  [[package]]
412
412
  name = "content-core"
413
- version = "0.8.0"
413
+ version = "0.8.1"
414
414
  source = { editable = "." }
415
415
  dependencies = [
416
416
  { name = "ai-prompter" },
417
417
  { name = "aiohttp" },
418
+ { name = "asciidoc" },
418
419
  { name = "bs4" },
419
420
  { name = "dicttoxml" },
421
+ { name = "docling" },
420
422
  { name = "esperanto", extra = ["openai"] },
421
- { name = "firecrawl" },
422
423
  { name = "firecrawl-py" },
423
424
  { name = "jinja2" },
424
425
  { name = "langdetect" },
@@ -427,6 +428,7 @@ dependencies = [
427
428
  { name = "moviepy" },
428
429
  { name = "openpyxl" },
429
430
  { name = "pandas" },
431
+ { name = "pillow" },
430
432
  { name = "pymupdf" },
431
433
  { name = "python-docx" },
432
434
  { name = "python-dotenv" },
@@ -437,14 +439,6 @@ dependencies = [
437
439
  { name = "youtube-transcript-api" },
438
440
  ]
439
441
 
440
- [package.optional-dependencies]
441
- docling = [
442
- { name = "asciidoc" },
443
- { name = "docling" },
444
- { name = "pandas" },
445
- { name = "pillow" },
446
- ]
447
-
448
442
  [package.dev-dependencies]
449
443
  dev = [
450
444
  { name = "ipykernel" },
@@ -459,12 +453,11 @@ dev = [
459
453
  requires-dist = [
460
454
  { name = "ai-prompter", specifier = ">=0.2.3" },
461
455
  { name = "aiohttp", specifier = ">=3.11" },
462
- { name = "asciidoc", marker = "extra == 'docling'" },
456
+ { name = "asciidoc", specifier = ">=10.2.1" },
463
457
  { name = "bs4", specifier = ">=0.0.2" },
464
458
  { name = "dicttoxml", specifier = ">=1.7.16" },
465
- { name = "docling", marker = "extra == 'docling'" },
459
+ { name = "docling", specifier = ">=2.34.0" },
466
460
  { name = "esperanto", extras = ["openai"], specifier = ">=1.2.0" },
467
- { name = "firecrawl", specifier = ">=2.7.0" },
468
461
  { name = "firecrawl-py", specifier = ">=2.7.0" },
469
462
  { name = "jinja2", specifier = ">=3.1.6" },
470
463
  { name = "langdetect", specifier = ">=1.0.9" },
@@ -473,8 +466,7 @@ requires-dist = [
473
466
  { name = "moviepy", specifier = ">=2.1.2" },
474
467
  { name = "openpyxl", specifier = ">=3.1.5" },
475
468
  { name = "pandas", specifier = ">=2.2.3" },
476
- { name = "pandas", marker = "extra == 'docling'" },
477
- { name = "pillow", marker = "extra == 'docling'" },
469
+ { name = "pillow", specifier = ">=10.4.0" },
478
470
  { name = "pymupdf", specifier = ">=1.25.5" },
479
471
  { name = "python-docx", specifier = ">=1.1.2" },
480
472
  { name = "python-dotenv", specifier = ">=1.1.0" },
@@ -790,23 +782,6 @@ wheels = [
790
782
  { url = "https://files.pythonhosted.org/packages/18/79/1b8fa1bb3568781e84c9200f951c735f3f157429f44be0495da55894d620/filetype-1.2.0-py2.py3-none-any.whl", hash = "sha256:7ce71b6880181241cf7ac8697a2f1eb6a8bd9b429f7ad6d27b8db9ba5f1c2d25", size = 19970 },
791
783
  ]
792
784
 
793
- [[package]]
794
- name = "firecrawl"
795
- version = "2.7.0"
796
- source = { registry = "https://pypi.org/simple" }
797
- dependencies = [
798
- { name = "aiohttp" },
799
- { name = "nest-asyncio" },
800
- { name = "pydantic" },
801
- { name = "python-dotenv" },
802
- { name = "requests" },
803
- { name = "websockets" },
804
- ]
805
- sdist = { url = "https://files.pythonhosted.org/packages/f9/61/291b74aa0e6e8641779b6c606077e9c90a2446513ce4be480ebdaf22a521/firecrawl-2.7.0.tar.gz", hash = "sha256:c612c38ceec056d3ef058fcdda1bea1d3f6bb556e859dd9c2b690a7c1514a57b", size = 37821 }
806
- wheels = [
807
- { url = "https://files.pythonhosted.org/packages/48/38/12115a9c793c15ec88ee8512d748ad15ed48490dbc00169f6bec15f444f8/firecrawl-2.7.0-py3-none-any.whl", hash = "sha256:cf01d629eb4513756218fd4170e5a34d1a5994cb73f6fa7fa6828e7f6ba95fb7", size = 37674 },
808
- ]
809
-
810
785
  [[package]]
811
786
  name = "firecrawl-py"
812
787
  version = "2.7.0"
File without changes
File without changes
File without changes
File without changes