content-core 1.2.1__py3-none-any.whl → 1.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of content-core might be problematic. Click here for more details.
- content_core/processors/url.py +2 -2
- content_core/processors/youtube.py +4 -3
- {content_core-1.2.1.dist-info → content_core-1.2.2.dist-info}/METADATA +1 -1
- {content_core-1.2.1.dist-info → content_core-1.2.2.dist-info}/RECORD +7 -7
- {content_core-1.2.1.dist-info → content_core-1.2.2.dist-info}/WHEEL +0 -0
- {content_core-1.2.1.dist-info → content_core-1.2.2.dist-info}/entry_points.txt +0 -0
- {content_core-1.2.1.dist-info → content_core-1.2.2.dist-info}/licenses/LICENSE +0 -0
content_core/processors/url.py
CHANGED
|
@@ -38,10 +38,10 @@ async def url_provider(state: ProcessSourceState):
|
|
|
38
38
|
or mime in SUPPORTED_FITZ_TYPES
|
|
39
39
|
or mime in SUPPORTED_OFFICE_TYPES
|
|
40
40
|
):
|
|
41
|
-
logger.
|
|
41
|
+
logger.debug(f"Identified type for {url}: {mime}")
|
|
42
42
|
return_dict["identified_type"] = mime
|
|
43
43
|
else:
|
|
44
|
-
logger.
|
|
44
|
+
logger.debug(f"Identified type for {url}: article")
|
|
45
45
|
return_dict["identified_type"] = "article"
|
|
46
46
|
return return_dict
|
|
47
47
|
|
|
@@ -3,12 +3,13 @@ import ssl
|
|
|
3
3
|
|
|
4
4
|
import aiohttp
|
|
5
5
|
from bs4 import BeautifulSoup
|
|
6
|
+
from youtube_transcript_api import YouTubeTranscriptApi # type: ignore
|
|
7
|
+
from youtube_transcript_api.formatters import TextFormatter # type: ignore
|
|
8
|
+
|
|
6
9
|
from content_core.common import ProcessSourceState
|
|
7
10
|
from content_core.common.exceptions import NoTranscriptFound
|
|
8
11
|
from content_core.config import CONFIG
|
|
9
12
|
from content_core.logging import logger
|
|
10
|
-
from youtube_transcript_api import YouTubeTranscriptApi # type: ignore
|
|
11
|
-
from youtube_transcript_api.formatters import TextFormatter # type: ignore
|
|
12
13
|
|
|
13
14
|
ssl._create_default_https_context = ssl._create_unverified_context
|
|
14
15
|
|
|
@@ -172,7 +173,7 @@ async def extract_youtube_transcript(state: ProcessSourceState):
|
|
|
172
173
|
"""
|
|
173
174
|
|
|
174
175
|
assert state.url, "No URL provided"
|
|
175
|
-
logger.
|
|
176
|
+
logger.debug(f"Extracting transcript from URL: {state.url}")
|
|
176
177
|
languages = CONFIG.get("youtube_transcripts", {}).get(
|
|
177
178
|
"preferred_languages", ["en", "es", "pt"]
|
|
178
179
|
)
|
|
@@ -28,15 +28,15 @@ content_core/processors/docling.py,sha256=lf_NHh255gn4d2EymJYqyH2QiAgQDiJCY3t6Ne
|
|
|
28
28
|
content_core/processors/office.py,sha256=DXkfmjqUhmhP6rJaO5Z5Y9sv-iK0zaPZ3waynFIPtsk,12153
|
|
29
29
|
content_core/processors/pdf.py,sha256=TTDhfV2INtXumFDjLJFNMRfpbJ_tqwIcSBDzuThKxJI,10617
|
|
30
30
|
content_core/processors/text.py,sha256=kKHA60-NYjLmCTYUnk8TdJxQQ0Shkg-K61Ezqaelz7k,1158
|
|
31
|
-
content_core/processors/url.py,sha256=
|
|
31
|
+
content_core/processors/url.py,sha256=To0LTtMVNN3M83CdodQaZFuU7-IMM5w9QOHRKNV8PVI,7532
|
|
32
32
|
content_core/processors/video.py,sha256=3WnZwTswvTLm8PtQhKwoqJ2BH6YZi62dMUjALwJiebo,5196
|
|
33
|
-
content_core/processors/youtube.py,sha256=
|
|
33
|
+
content_core/processors/youtube.py,sha256=_qvxI9qTdxu3l1fKLuJARFt8KtZVFJ3JJBLkq1hAAXo,7868
|
|
34
34
|
content_core/tools/__init__.py,sha256=DuJmd7fE-NpDvLP8IW1XY5MUkAQcdks52rn2jk4N8jQ,231
|
|
35
35
|
content_core/tools/cleanup.py,sha256=5IdKedsFyRQMdYzgFSKtsfyxJldbroXQXHesHICNENI,523
|
|
36
36
|
content_core/tools/extract.py,sha256=-r2_jsuMMXyXxGVqWhh1ilNPo_UMYAbw3Pkp1FzPy5g,577
|
|
37
37
|
content_core/tools/summarize.py,sha256=DPfeglLWB08q8SvHrsKpOKZ35XjduUDs2J02ISwjdj0,596
|
|
38
|
-
content_core-1.2.
|
|
39
|
-
content_core-1.2.
|
|
40
|
-
content_core-1.2.
|
|
41
|
-
content_core-1.2.
|
|
42
|
-
content_core-1.2.
|
|
38
|
+
content_core-1.2.2.dist-info/METADATA,sha256=876oFuYDvAzsfOBRRe9t04SprQZne0Xg4BIGhOnzshs,19676
|
|
39
|
+
content_core-1.2.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
40
|
+
content_core-1.2.2.dist-info/entry_points.txt,sha256=ifbBxw37b7gAxZXoduS15KtqHuMHuU58STRkEmgM2zA,147
|
|
41
|
+
content_core-1.2.2.dist-info/licenses/LICENSE,sha256=myj0z2T4qIkenCgLsRfx7Wk6UqCQNj5c7O14Qx4zpGg,1066
|
|
42
|
+
content_core-1.2.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|