content-core 1.0.3__tar.gz → 1.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of content-core might be problematic. Click here for more details.

Files changed (62) hide show
  1. {content_core-1.0.3 → content_core-1.0.4}/PKG-INFO +1 -1
  2. {content_core-1.0.3 → content_core-1.0.4}/pyproject.toml +1 -1
  3. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/content/summary/core.py +1 -1
  4. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/processors/youtube.py +17 -10
  5. {content_core-1.0.3 → content_core-1.0.4}/uv.lock +1 -1
  6. {content_core-1.0.3 → content_core-1.0.4}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  7. {content_core-1.0.3 → content_core-1.0.4}/.github/workflows/publish.yml +0 -0
  8. {content_core-1.0.3 → content_core-1.0.4}/.gitignore +0 -0
  9. {content_core-1.0.3 → content_core-1.0.4}/.python-version +0 -0
  10. {content_core-1.0.3 → content_core-1.0.4}/CONTRIBUTING.md +0 -0
  11. {content_core-1.0.3 → content_core-1.0.4}/LICENSE +0 -0
  12. {content_core-1.0.3 → content_core-1.0.4}/Makefile +0 -0
  13. {content_core-1.0.3 → content_core-1.0.4}/README.md +0 -0
  14. {content_core-1.0.3 → content_core-1.0.4}/docs/processors.md +0 -0
  15. {content_core-1.0.3 → content_core-1.0.4}/docs/usage.md +0 -0
  16. {content_core-1.0.3 → content_core-1.0.4}/prompts/content/cleanup.jinja +0 -0
  17. {content_core-1.0.3 → content_core-1.0.4}/prompts/content/summarize.jinja +0 -0
  18. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/__init__.py +0 -0
  19. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/cc_config.yaml +0 -0
  20. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/common/__init__.py +0 -0
  21. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/common/exceptions.py +0 -0
  22. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/common/state.py +0 -0
  23. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/common/types.py +0 -0
  24. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/common/utils.py +0 -0
  25. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/config.py +0 -0
  26. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/content/__init__.py +0 -0
  27. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/content/cleanup/__init__.py +0 -0
  28. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/content/cleanup/core.py +0 -0
  29. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/content/extraction/__init__.py +0 -0
  30. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/content/extraction/graph.py +0 -0
  31. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/content/identification/__init__.py +0 -0
  32. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/content/summary/__init__.py +0 -0
  33. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/logging.py +0 -0
  34. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/models.py +0 -0
  35. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/models_config.yaml +0 -0
  36. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/notebooks/run.ipynb +0 -0
  37. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/processors/audio.py +0 -0
  38. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/processors/docling.py +0 -0
  39. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/processors/office.py +0 -0
  40. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/processors/pdf.py +0 -0
  41. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/processors/text.py +0 -0
  42. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/processors/url.py +0 -0
  43. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/processors/video.py +0 -0
  44. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/py.typed +0 -0
  45. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/templated_message.py +0 -0
  46. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/tools/__init__.py +0 -0
  47. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/tools/cleanup.py +0 -0
  48. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/tools/extract.py +0 -0
  49. {content_core-1.0.3 → content_core-1.0.4}/src/content_core/tools/summarize.py +0 -0
  50. {content_core-1.0.3 → content_core-1.0.4}/tests/input_content/file.docx +0 -0
  51. {content_core-1.0.3 → content_core-1.0.4}/tests/input_content/file.epub +0 -0
  52. {content_core-1.0.3 → content_core-1.0.4}/tests/input_content/file.md +0 -0
  53. {content_core-1.0.3 → content_core-1.0.4}/tests/input_content/file.mp3 +0 -0
  54. {content_core-1.0.3 → content_core-1.0.4}/tests/input_content/file.mp4 +0 -0
  55. {content_core-1.0.3 → content_core-1.0.4}/tests/input_content/file.pdf +0 -0
  56. {content_core-1.0.3 → content_core-1.0.4}/tests/input_content/file.pptx +0 -0
  57. {content_core-1.0.3 → content_core-1.0.4}/tests/input_content/file.txt +0 -0
  58. {content_core-1.0.3 → content_core-1.0.4}/tests/input_content/file.xlsx +0 -0
  59. {content_core-1.0.3 → content_core-1.0.4}/tests/input_content/file_audio.mp3 +0 -0
  60. {content_core-1.0.3 → content_core-1.0.4}/tests/integration/test_cli.py +0 -0
  61. {content_core-1.0.3 → content_core-1.0.4}/tests/integration/test_extraction.py +0 -0
  62. {content_core-1.0.3 → content_core-1.0.4}/tests/unit/test_docling.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: content-core
3
- Version: 1.0.3
3
+ Version: 1.0.4
4
4
  Summary: Extract what matters from any media source
5
5
  Author-email: LUIS NOVO <lfnovo@gmail.com>
6
6
  License-File: LICENSE
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "content-core"
3
- version = "1.0.3"
3
+ version = "1.0.4"
4
4
  description = "Extract what matters from any media source"
5
5
  readme = "README.md"
6
6
  homepage = "https://github.com/lfnovo/content-core"
@@ -8,7 +8,7 @@ async def summarize(content: str, context: str) -> str:
8
8
  templated_message_fn = partial(templated_message, model=ModelFactory.get_model('summary_model'))
9
9
  response = await templated_message_fn(
10
10
  TemplatedMessageInput(
11
- user_prompt_template="content/summarize",
11
+ user_prompt_template="prompts/content/summarize",
12
12
  data={"content": content, "context": context},
13
13
  )
14
14
  )
@@ -3,13 +3,12 @@ import ssl
3
3
 
4
4
  import aiohttp
5
5
  from bs4 import BeautifulSoup
6
- from youtube_transcript_api import YouTubeTranscriptApi # type: ignore
7
- from youtube_transcript_api.formatters import TextFormatter # type: ignore
8
-
9
6
  from content_core.common import ProcessSourceState
10
7
  from content_core.common.exceptions import NoTranscriptFound
11
8
  from content_core.config import CONFIG
12
9
  from content_core.logging import logger
10
+ from youtube_transcript_api import YouTubeTranscriptApi # type: ignore
11
+ from youtube_transcript_api.formatters import TextFormatter # type: ignore
13
12
 
14
13
  ssl._create_default_https_context = ssl._create_unverified_context
15
14
 
@@ -137,7 +136,7 @@ def extract_transcript_pytubefix(url, languages=["en", "es", "pt"]):
137
136
  from pytubefix import YouTube
138
137
 
139
138
  yt = YouTube(url)
140
- print(yt.captions)
139
+ logger.debug(f"Captions: {yt.captions}")
141
140
 
142
141
  # Try to get captions in the preferred languages
143
142
  if yt.captions:
@@ -149,12 +148,20 @@ def extract_transcript_pytubefix(url, languages=["en", "es", "pt"]):
149
148
  caption = yt.captions[f"a.{lang}"]
150
149
  break
151
150
  else: # No preferred language found, use the first available
152
- caption_key = next(iter(yt.captions))
153
- caption = yt.captions[caption_key]
154
-
155
- srt_captions = caption.generate_srt_captions()
156
- txt_captions = caption.generate_txt_captions()
157
- return txt_captions, srt_captions
151
+ caption_key = list(yt.captions.keys())[0]
152
+ caption = yt.captions[caption_key.code]
153
+ try:
154
+ srt_captions = caption.generate_srt_captions()
155
+ txt_captions = caption.generate_txt_captions()
156
+ return txt_captions, srt_captions
157
+ except KeyError as e:
158
+ logger.error(f"KeyError while generating captions for {caption}: {e}")
159
+ return None, None
160
+ except Exception as e:
161
+ logger.error(
162
+ f"Unexpected error while generating captions for {caption}: {e}"
163
+ )
164
+ return None, None
158
165
 
159
166
  return None, None
160
167
 
@@ -410,7 +410,7 @@ wheels = [
410
410
 
411
411
  [[package]]
412
412
  name = "content-core"
413
- version = "1.0.3"
413
+ version = "1.0.4"
414
414
  source = { editable = "." }
415
415
  dependencies = [
416
416
  { name = "ai-prompter" },
File without changes
File without changes
File without changes
File without changes
File without changes