kabigon 0.5.3__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kabigon/__init__.py +1 -0
- kabigon/compose.py +1 -1
- kabigon/errors.py +23 -0
- kabigon/firecrawl.py +33 -0
- kabigon/loader.py +0 -4
- kabigon/pdf.py +1 -6
- kabigon/reel.py +1 -6
- {kabigon-0.5.3.dist-info → kabigon-0.6.0.dist-info}/METADATA +2 -1
- kabigon-0.6.0.dist-info/RECORD +20 -0
- kabigon-0.5.3.dist-info/RECORD +0 -18
- {kabigon-0.5.3.dist-info → kabigon-0.6.0.dist-info}/WHEEL +0 -0
- {kabigon-0.5.3.dist-info → kabigon-0.6.0.dist-info}/entry_points.txt +0 -0
- {kabigon-0.5.3.dist-info → kabigon-0.6.0.dist-info}/licenses/LICENSE +0 -0
kabigon/__init__.py
CHANGED
kabigon/compose.py
CHANGED
kabigon/errors.py
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
class LoaderError(Exception):
|
2
|
+
pass
|
3
|
+
|
4
|
+
|
5
|
+
class FirecrawlKeyError(LoaderError):
|
6
|
+
def __init__(self) -> None:
|
7
|
+
super().__init__("FIRECRAWL_API_KEY is not set.")
|
8
|
+
|
9
|
+
|
10
|
+
class FirecrawlError(LoaderError):
|
11
|
+
def __init__(self, url: str, error: str) -> None:
|
12
|
+
msg = f"Failed to load URL: {url}, got: {error}"
|
13
|
+
super().__init__(msg)
|
14
|
+
|
15
|
+
|
16
|
+
class NotPDFError(LoaderError):
|
17
|
+
def __init__(self, url: str) -> None:
|
18
|
+
super().__init__(f"URL is not a PDF: {url}")
|
19
|
+
|
20
|
+
|
21
|
+
class NotReelURLError(LoaderError):
|
22
|
+
def __init__(self, url: str):
|
23
|
+
super().__init__(f"URL is not an Instagram Reel: {url}")
|
kabigon/firecrawl.py
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
import os
|
2
|
+
|
3
|
+
from firecrawl import FirecrawlApp
|
4
|
+
|
5
|
+
from .errors import FirecrawlError
|
6
|
+
from .errors import FirecrawlKeyError
|
7
|
+
from .loader import Loader
|
8
|
+
|
9
|
+
|
10
|
+
class FirecrawlLoader(Loader):
|
11
|
+
def __init__(self, timeout: int | None = None) -> None:
|
12
|
+
self.timeout = timeout
|
13
|
+
|
14
|
+
api_key = os.getenv("FIRECRAWL_API_KEY")
|
15
|
+
if not api_key:
|
16
|
+
raise FirecrawlKeyError()
|
17
|
+
|
18
|
+
self.app = FirecrawlApp(api_key=api_key)
|
19
|
+
|
20
|
+
def load(self, url: str) -> str:
|
21
|
+
result = self.app.scrape_url(
|
22
|
+
url,
|
23
|
+
formats=["markdown"],
|
24
|
+
timeout=self.timeout,
|
25
|
+
)
|
26
|
+
|
27
|
+
if not result.success:
|
28
|
+
raise FirecrawlError(url, result.error)
|
29
|
+
|
30
|
+
return result.markdown
|
31
|
+
|
32
|
+
async def async_load(self, url: str) -> str:
|
33
|
+
return self.load(url)
|
kabigon/loader.py
CHANGED
kabigon/pdf.py
CHANGED
@@ -6,8 +6,8 @@ from typing import Any
|
|
6
6
|
import httpx
|
7
7
|
from pypdf import PdfReader
|
8
8
|
|
9
|
+
from .errors import NotPDFError
|
9
10
|
from .loader import Loader
|
10
|
-
from .loader import LoaderError
|
11
11
|
|
12
12
|
DEFAULT_HEADERS = {
|
13
13
|
"Accept-Language": "zh-TW,zh;q=0.9,ja;q=0.8,en-US;q=0.7,en;q=0.6",
|
@@ -15,11 +15,6 @@ DEFAULT_HEADERS = {
|
|
15
15
|
}
|
16
16
|
|
17
17
|
|
18
|
-
class NotPDFError(LoaderError):
|
19
|
-
def __init__(self, url: str) -> None:
|
20
|
-
super().__init__(f"URL is not a PDF: {url}")
|
21
|
-
|
22
|
-
|
23
18
|
class PDFLoader(Loader):
|
24
19
|
def load(self, url_or_file: str) -> str:
|
25
20
|
if not url_or_file.startswith("http"):
|
kabigon/reel.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
+
from .errors import NotReelURLError
|
1
2
|
from .httpx import HttpxLoader
|
2
3
|
from .loader import Loader
|
3
|
-
from .loader import LoaderError
|
4
4
|
from .ytdlp import YtdlpLoader
|
5
5
|
|
6
6
|
|
@@ -8,11 +8,6 @@ def is_reel_url(url: str) -> bool:
|
|
8
8
|
return url.startswith("https://www.instagram.com/reel")
|
9
9
|
|
10
10
|
|
11
|
-
class NotReelURLError(LoaderError):
|
12
|
-
def __init__(self, url: str):
|
13
|
-
super().__init__(f"URL is not an Instagram Reel: {url}")
|
14
|
-
|
15
|
-
|
16
11
|
class ReelLoader(Loader):
|
17
12
|
def __init__(self) -> None:
|
18
13
|
self.httpx_loader = HttpxLoader()
|
@@ -1,11 +1,12 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: kabigon
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.6.0
|
4
4
|
Author-email: narumi <toucans-cutouts0f@icloud.com>
|
5
5
|
License-File: LICENSE
|
6
6
|
Requires-Python: >=3.10
|
7
7
|
Requires-Dist: aioytt>=0.2.4
|
8
8
|
Requires-Dist: click>=8.1.8
|
9
|
+
Requires-Dist: firecrawl-py>=2.4.1
|
9
10
|
Requires-Dist: httpx>=0.28.1
|
10
11
|
Requires-Dist: loguru>=0.7.3
|
11
12
|
Requires-Dist: markdownify>=0.14.1
|
@@ -0,0 +1,20 @@
|
|
1
|
+
kabigon/__init__.py,sha256=L64FtFCQQxkqXXHDkW2rniWGAnufmC9WG6KBwutXU4w,539
|
2
|
+
kabigon/cli.py,sha256=XSTyD1RFqq2Qok_52kSjJlBLUXl6t-K9QtsxCfB15o4,611
|
3
|
+
kabigon/compose.py,sha256=Yhn9fe0mylVLpK01sT9OWWItIY1gD8TZLQ4qY8hAeSQ,2110
|
4
|
+
kabigon/errors.py,sha256=tYokuDQ1-U25zq3hWu-GsYJBDpDq3VuObUijnd7Nc7M,613
|
5
|
+
kabigon/firecrawl.py,sha256=Xnrlhco_R58x5kwGy9ZCKTnVqS4Pp-D3G0u-qnuGEsU,800
|
6
|
+
kabigon/httpx.py,sha256=uDdLks6zVzirY7-mnsJkypX86kAI5XmUVfK-lFifdJA,895
|
7
|
+
kabigon/loader.py,sha256=KhOJvlzLDM0o05o4VqMRgiyRVjofkzuGEcihQagj-8M,452
|
8
|
+
kabigon/pdf.py,sha256=yJcgkdMMF52baFsFxJp9Jn89KsTKBboLTLwD3gs6U5U,1775
|
9
|
+
kabigon/playwright.py,sha256=MZ-r0Ej2wWAOJkDLwYRvO77wcDvh38KXz2wgDsCTgm0,1358
|
10
|
+
kabigon/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
11
|
+
kabigon/reel.py,sha256=J2QOxGMYi_HaEscQPIipPEoHGN_iksGxR6pV_XvryME,929
|
12
|
+
kabigon/singlefile.py,sha256=CeTT2WPYm0vb1xWPNdyBN4uHRw9hRqfZm68D-nEcUA8,1800
|
13
|
+
kabigon/utils.py,sha256=eNTLtHLSB2erDac2HH3jWemgfr8Ou_ozwVb8h9BD-4g,922
|
14
|
+
kabigon/youtube.py,sha256=HoiFNq0ookPL7_rO_wloBaY8yTIX6xP8A77F7y02q64,1166
|
15
|
+
kabigon/ytdlp.py,sha256=kG1fXqU650otOWespjOSkGK_-jk1wO-sWiR60_UPJxY,3125
|
16
|
+
kabigon-0.6.0.dist-info/METADATA,sha256=zsjAoTye8jH1NtI_ZylcCI8Vt-S0CqQWcN-ca39cbLY,1078
|
17
|
+
kabigon-0.6.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
18
|
+
kabigon-0.6.0.dist-info/entry_points.txt,sha256=O3FYAO9w-NQvlGMJrBvtrnGHSK2QkUnQBTa30YXRbVE,45
|
19
|
+
kabigon-0.6.0.dist-info/licenses/LICENSE,sha256=H2T3_RTgmcngMeC7p_SXT3GwBLkd2DaNgAZuxulcfiA,1066
|
20
|
+
kabigon-0.6.0.dist-info/RECORD,,
|
kabigon-0.5.3.dist-info/RECORD
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
kabigon/__init__.py,sha256=7ll3ePlHNbZq-CmrGMrQouLCUSmuRsZ9yAj2JOzr7HY,500
|
2
|
-
kabigon/cli.py,sha256=XSTyD1RFqq2Qok_52kSjJlBLUXl6t-K9QtsxCfB15o4,611
|
3
|
-
kabigon/compose.py,sha256=DO0hOJgEAX7ZLOS53dcE6V9zi7Tr9oGNW8koPHsx9eM,2110
|
4
|
-
kabigon/httpx.py,sha256=uDdLks6zVzirY7-mnsJkypX86kAI5XmUVfK-lFifdJA,895
|
5
|
-
kabigon/loader.py,sha256=D5xUPJb3uAygmBaN_sX56ZpGcGsVz-ueHOXC7gSGaxM,493
|
6
|
-
kabigon/pdf.py,sha256=Q9XuBdKDrDQJ8BNvY7Lgt6dpGeA_ylGGHWOE3euiI_8,1904
|
7
|
-
kabigon/playwright.py,sha256=MZ-r0Ej2wWAOJkDLwYRvO77wcDvh38KXz2wgDsCTgm0,1358
|
8
|
-
kabigon/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
9
|
-
kabigon/reel.py,sha256=TP_oKYXABXYja2A9damTBWR3MVYA7aZyxbIvCuTcq40,1062
|
10
|
-
kabigon/singlefile.py,sha256=CeTT2WPYm0vb1xWPNdyBN4uHRw9hRqfZm68D-nEcUA8,1800
|
11
|
-
kabigon/utils.py,sha256=eNTLtHLSB2erDac2HH3jWemgfr8Ou_ozwVb8h9BD-4g,922
|
12
|
-
kabigon/youtube.py,sha256=HoiFNq0ookPL7_rO_wloBaY8yTIX6xP8A77F7y02q64,1166
|
13
|
-
kabigon/ytdlp.py,sha256=kG1fXqU650otOWespjOSkGK_-jk1wO-sWiR60_UPJxY,3125
|
14
|
-
kabigon-0.5.3.dist-info/METADATA,sha256=gzcVy_2l4kAo_heevYpN-NsHY9f7RMFlFGk6rkSCVQU,1043
|
15
|
-
kabigon-0.5.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
16
|
-
kabigon-0.5.3.dist-info/entry_points.txt,sha256=O3FYAO9w-NQvlGMJrBvtrnGHSK2QkUnQBTa30YXRbVE,45
|
17
|
-
kabigon-0.5.3.dist-info/licenses/LICENSE,sha256=H2T3_RTgmcngMeC7p_SXT3GwBLkd2DaNgAZuxulcfiA,1066
|
18
|
-
kabigon-0.5.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|