content-core 1.0.3__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of content-core might be problematic. Click here for more details.

@@ -8,7 +8,7 @@ async def summarize(content: str, context: str) -> str:
8
8
  templated_message_fn = partial(templated_message, model=ModelFactory.get_model('summary_model'))
9
9
  response = await templated_message_fn(
10
10
  TemplatedMessageInput(
11
- user_prompt_template="content/summarize",
11
+ user_prompt_template="prompts/content/summarize",
12
12
  data={"content": content, "context": context},
13
13
  )
14
14
  )
@@ -3,13 +3,12 @@ import ssl
3
3
 
4
4
  import aiohttp
5
5
  from bs4 import BeautifulSoup
6
- from youtube_transcript_api import YouTubeTranscriptApi # type: ignore
7
- from youtube_transcript_api.formatters import TextFormatter # type: ignore
8
-
9
6
  from content_core.common import ProcessSourceState
10
7
  from content_core.common.exceptions import NoTranscriptFound
11
8
  from content_core.config import CONFIG
12
9
  from content_core.logging import logger
10
+ from youtube_transcript_api import YouTubeTranscriptApi # type: ignore
11
+ from youtube_transcript_api.formatters import TextFormatter # type: ignore
13
12
 
14
13
  ssl._create_default_https_context = ssl._create_unverified_context
15
14
 
@@ -137,7 +136,7 @@ def extract_transcript_pytubefix(url, languages=["en", "es", "pt"]):
137
136
  from pytubefix import YouTube
138
137
 
139
138
  yt = YouTube(url)
140
- print(yt.captions)
139
+ logger.debug(f"Captions: {yt.captions}")
141
140
 
142
141
  # Try to get captions in the preferred languages
143
142
  if yt.captions:
@@ -149,12 +148,20 @@ def extract_transcript_pytubefix(url, languages=["en", "es", "pt"]):
149
148
  caption = yt.captions[f"a.{lang}"]
150
149
  break
151
150
  else: # No preferred language found, use the first available
152
- caption_key = next(iter(yt.captions))
153
- caption = yt.captions[caption_key]
154
-
155
- srt_captions = caption.generate_srt_captions()
156
- txt_captions = caption.generate_txt_captions()
157
- return txt_captions, srt_captions
151
+ caption_key = list(yt.captions.keys())[0]
152
+ caption = yt.captions[caption_key.code]
153
+ try:
154
+ srt_captions = caption.generate_srt_captions()
155
+ txt_captions = caption.generate_txt_captions()
156
+ return txt_captions, srt_captions
157
+ except KeyError as e:
158
+ logger.error(f"KeyError while generating captions for {caption}: {e}")
159
+ return None, None
160
+ except Exception as e:
161
+ logger.error(
162
+ f"Unexpected error while generating captions for {caption}: {e}"
163
+ )
164
+ return None, None
158
165
 
159
166
  return None, None
160
167
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: content-core
3
- Version: 1.0.3
3
+ Version: 1.0.4
4
4
  Summary: Extract what matters from any media source
5
5
  Author-email: LUIS NOVO <lfnovo@gmail.com>
6
6
  License-File: LICENSE
@@ -18,7 +18,7 @@ content_core/content/extraction/__init__.py,sha256=TaYw6CAcG62GZfsJxeZ6VJDLP85BU
18
18
  content_core/content/extraction/graph.py,sha256=Nn2iaQc6YJ4Qt8WKTolwUQUNNqUlwpV8YnijESGvnD0,7605
19
19
  content_core/content/identification/__init__.py,sha256=x4n8JIjDwmPvAopEEEcmZjlozg-zGbMq_s9VYdBjzYU,169
20
20
  content_core/content/summary/__init__.py,sha256=ReKCZWKfDtqlInKeh87Y1DEfiNzVWabGybEz3hS2FrI,114
21
- content_core/content/summary/core.py,sha256=LejUbPxnRD0sbO6MupiIb-IHLxEUGU5beBZwmIiBncc,542
21
+ content_core/content/summary/core.py,sha256=kEabpETljzUb-yf0NcVWTOuCtayESo74gGBVDX7YTFs,550
22
22
  content_core/notebooks/run.ipynb,sha256=WPBNcQUNXR5MldNMghVcU4vE4ibrVmlANa80baQn8TA,371078
23
23
  content_core/processors/audio.py,sha256=Mie20g_2Akhw6BHBVo3sHMpDRYUkqBI72lEDakscx3s,5729
24
24
  content_core/processors/docling.py,sha256=dkXehsQdfyWXfrK1K_6Pye50ABM7DxMk6TMguabM9Pc,2151
@@ -27,13 +27,13 @@ content_core/processors/pdf.py,sha256=9jf-eROAqw6yQwdlbsxPXsaJXY26hVG7nSTPH9n4af
27
27
  content_core/processors/text.py,sha256=kKHA60-NYjLmCTYUnk8TdJxQQ0Shkg-K61Ezqaelz7k,1158
28
28
  content_core/processors/url.py,sha256=6WT8Sw2VHiKyhgWXi_jZjKjwnT_QPSPcH4P99RKbjgU,7521
29
29
  content_core/processors/video.py,sha256=3WnZwTswvTLm8PtQhKwoqJ2BH6YZi62dMUjALwJiebo,5196
30
- content_core/processors/youtube.py,sha256=y-1KH_1uYyO6l6QpjBtzP1KSBjclYeGlj6zDrloAcPc,7490
30
+ content_core/processors/youtube.py,sha256=MOeZboVfM9_C87L5mnUVvsbQeKoznwJoYn1wP1_hA_U,7869
31
31
  content_core/tools/__init__.py,sha256=DuJmd7fE-NpDvLP8IW1XY5MUkAQcdks52rn2jk4N8jQ,231
32
32
  content_core/tools/cleanup.py,sha256=5IdKedsFyRQMdYzgFSKtsfyxJldbroXQXHesHICNENI,523
33
33
  content_core/tools/extract.py,sha256=-r2_jsuMMXyXxGVqWhh1ilNPo_UMYAbw3Pkp1FzPy5g,577
34
34
  content_core/tools/summarize.py,sha256=DPfeglLWB08q8SvHrsKpOKZ35XjduUDs2J02ISwjdj0,596
35
- content_core-1.0.3.dist-info/METADATA,sha256=R6woFztrB88fGVSOVMkrBFAgqwPAPrIw5MClIUKq1Xo,11908
36
- content_core-1.0.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
37
- content_core-1.0.3.dist-info/entry_points.txt,sha256=9fGQUk6bxBVXj9PRwfWVPn54ClSEJV7J-KBLXtjOhQw,99
38
- content_core-1.0.3.dist-info/licenses/LICENSE,sha256=myj0z2T4qIkenCgLsRfx7Wk6UqCQNj5c7O14Qx4zpGg,1066
39
- content_core-1.0.3.dist-info/RECORD,,
35
+ content_core-1.0.4.dist-info/METADATA,sha256=SdXexgOV0tc4ArCYWjxrZog4esHJxW0zh8pdnZFqLi8,11908
36
+ content_core-1.0.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
37
+ content_core-1.0.4.dist-info/entry_points.txt,sha256=9fGQUk6bxBVXj9PRwfWVPn54ClSEJV7J-KBLXtjOhQw,99
38
+ content_core-1.0.4.dist-info/licenses/LICENSE,sha256=myj0z2T4qIkenCgLsRfx7Wk6UqCQNj5c7O14Qx4zpGg,1066
39
+ content_core-1.0.4.dist-info/RECORD,,