kabigon 0.5.2__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kabigon/__init__.py CHANGED
@@ -5,6 +5,7 @@ from typing import Final
5
5
  from loguru import logger
6
6
 
7
7
  from .compose import Compose
8
+ from .firecrawl import FirecrawlLoader
8
9
  from .httpx import HttpxLoader
9
10
  from .loader import Loader
10
11
  from .pdf import PDFLoader
kabigon/compose.py CHANGED
@@ -3,8 +3,8 @@ from urllib.parse import urlunparse
3
3
 
4
4
  from loguru import logger
5
5
 
6
+ from .errors import LoaderError
6
7
  from .loader import Loader
7
- from .loader import LoaderError
8
8
 
9
9
  REPLACEMENTS = {
10
10
  "api.fxtwitter.com": [
kabigon/errors.py ADDED
@@ -0,0 +1,23 @@
1
+ class LoaderError(Exception):
2
+ pass
3
+
4
+
5
+ class FirecrawlKeyError(LoaderError):
6
+ def __init__(self) -> None:
7
+ super().__init__("FIRECRAWL_API_KEY is not set.")
8
+
9
+
10
+ class FirecrawlError(LoaderError):
11
+ def __init__(self, url: str, error: str) -> None:
12
+ msg = f"Failed to load URL: {url}, got: {error}"
13
+ super().__init__(msg)
14
+
15
+
16
+ class NotPDFError(LoaderError):
17
+ def __init__(self, url: str) -> None:
18
+ super().__init__(f"URL is not a PDF: {url}")
19
+
20
+
21
+ class NotReelURLError(LoaderError):
22
+ def __init__(self, url: str):
23
+ super().__init__(f"URL is not an Instagram Reel: {url}")
kabigon/firecrawl.py ADDED
@@ -0,0 +1,33 @@
1
+ import os
2
+
3
+ from firecrawl import FirecrawlApp
4
+
5
+ from .errors import FirecrawlError
6
+ from .errors import FirecrawlKeyError
7
+ from .loader import Loader
8
+
9
+
10
+ class FirecrawlLoader(Loader):
11
+ def __init__(self, timeout: int | None = None) -> None:
12
+ self.timeout = timeout
13
+
14
+ api_key = os.getenv("FIRECRAWL_API_KEY")
15
+ if not api_key:
16
+ raise FirecrawlKeyError()
17
+
18
+ self.app = FirecrawlApp(api_key=api_key)
19
+
20
+ def load(self, url: str) -> str:
21
+ result = self.app.scrape_url(
22
+ url,
23
+ formats=["markdown"],
24
+ timeout=self.timeout,
25
+ )
26
+
27
+ if not result.success:
28
+ raise FirecrawlError(url, result.error)
29
+
30
+ return result.markdown
31
+
32
+ async def async_load(self, url: str) -> str:
33
+ return self.load(url)
kabigon/loader.py CHANGED
@@ -14,7 +14,3 @@ class Loader:
14
14
  with concurrent.futures.ProcessPoolExecutor() as executor:
15
15
  result = await loop.run_in_executor(executor, self.load, url)
16
16
  return result
17
-
18
-
19
- class LoaderError(Exception):
20
- pass
kabigon/pdf.py CHANGED
@@ -6,8 +6,8 @@ from typing import Any
6
6
  import httpx
7
7
  from pypdf import PdfReader
8
8
 
9
+ from .errors import NotPDFError
9
10
  from .loader import Loader
10
- from .loader import LoaderError
11
11
 
12
12
  DEFAULT_HEADERS = {
13
13
  "Accept-Language": "zh-TW,zh;q=0.9,ja;q=0.8,en-US;q=0.7,en;q=0.6",
@@ -15,11 +15,6 @@ DEFAULT_HEADERS = {
15
15
  }
16
16
 
17
17
 
18
- class NotPDFError(LoaderError):
19
- def __init__(self, url: str) -> None:
20
- super().__init__(f"URL is not a PDF: {url}")
21
-
22
-
23
18
  class PDFLoader(Loader):
24
19
  def load(self, url_or_file: str) -> str:
25
20
  if not url_or_file.startswith("http"):
kabigon/reel.py CHANGED
@@ -1,6 +1,6 @@
1
+ from .errors import NotReelURLError
1
2
  from .httpx import HttpxLoader
2
3
  from .loader import Loader
3
- from .loader import LoaderError
4
4
  from .ytdlp import YtdlpLoader
5
5
 
6
6
 
@@ -8,11 +8,6 @@ def is_reel_url(url: str) -> bool:
8
8
  return url.startswith("https://www.instagram.com/reel")
9
9
 
10
10
 
11
- class NotReelURLError(LoaderError):
12
- def __init__(self, url: str):
13
- super().__init__(f"URL is not an Instagram Reel: {url}")
14
-
15
-
16
11
  class ReelLoader(Loader):
17
12
  def __init__(self) -> None:
18
13
  self.httpx_loader = HttpxLoader()
@@ -1,11 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kabigon
3
- Version: 0.5.2
3
+ Version: 0.6.0
4
4
  Author-email: narumi <toucans-cutouts0f@icloud.com>
5
5
  License-File: LICENSE
6
6
  Requires-Python: >=3.10
7
7
  Requires-Dist: aioytt>=0.2.4
8
8
  Requires-Dist: click>=8.1.8
9
+ Requires-Dist: firecrawl-py>=2.4.1
9
10
  Requires-Dist: httpx>=0.28.1
10
11
  Requires-Dist: loguru>=0.7.3
11
12
  Requires-Dist: markdownify>=0.14.1
@@ -0,0 +1,20 @@
1
+ kabigon/__init__.py,sha256=L64FtFCQQxkqXXHDkW2rniWGAnufmC9WG6KBwutXU4w,539
2
+ kabigon/cli.py,sha256=XSTyD1RFqq2Qok_52kSjJlBLUXl6t-K9QtsxCfB15o4,611
3
+ kabigon/compose.py,sha256=Yhn9fe0mylVLpK01sT9OWWItIY1gD8TZLQ4qY8hAeSQ,2110
4
+ kabigon/errors.py,sha256=tYokuDQ1-U25zq3hWu-GsYJBDpDq3VuObUijnd7Nc7M,613
5
+ kabigon/firecrawl.py,sha256=Xnrlhco_R58x5kwGy9ZCKTnVqS4Pp-D3G0u-qnuGEsU,800
6
+ kabigon/httpx.py,sha256=uDdLks6zVzirY7-mnsJkypX86kAI5XmUVfK-lFifdJA,895
7
+ kabigon/loader.py,sha256=KhOJvlzLDM0o05o4VqMRgiyRVjofkzuGEcihQagj-8M,452
8
+ kabigon/pdf.py,sha256=yJcgkdMMF52baFsFxJp9Jn89KsTKBboLTLwD3gs6U5U,1775
9
+ kabigon/playwright.py,sha256=MZ-r0Ej2wWAOJkDLwYRvO77wcDvh38KXz2wgDsCTgm0,1358
10
+ kabigon/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
11
+ kabigon/reel.py,sha256=J2QOxGMYi_HaEscQPIipPEoHGN_iksGxR6pV_XvryME,929
12
+ kabigon/singlefile.py,sha256=CeTT2WPYm0vb1xWPNdyBN4uHRw9hRqfZm68D-nEcUA8,1800
13
+ kabigon/utils.py,sha256=eNTLtHLSB2erDac2HH3jWemgfr8Ou_ozwVb8h9BD-4g,922
14
+ kabigon/youtube.py,sha256=HoiFNq0ookPL7_rO_wloBaY8yTIX6xP8A77F7y02q64,1166
15
+ kabigon/ytdlp.py,sha256=kG1fXqU650otOWespjOSkGK_-jk1wO-sWiR60_UPJxY,3125
16
+ kabigon-0.6.0.dist-info/METADATA,sha256=zsjAoTye8jH1NtI_ZylcCI8Vt-S0CqQWcN-ca39cbLY,1078
17
+ kabigon-0.6.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
18
+ kabigon-0.6.0.dist-info/entry_points.txt,sha256=O3FYAO9w-NQvlGMJrBvtrnGHSK2QkUnQBTa30YXRbVE,45
19
+ kabigon-0.6.0.dist-info/licenses/LICENSE,sha256=H2T3_RTgmcngMeC7p_SXT3GwBLkd2DaNgAZuxulcfiA,1066
20
+ kabigon-0.6.0.dist-info/RECORD,,
@@ -1,18 +0,0 @@
1
- kabigon/__init__.py,sha256=7ll3ePlHNbZq-CmrGMrQouLCUSmuRsZ9yAj2JOzr7HY,500
2
- kabigon/cli.py,sha256=XSTyD1RFqq2Qok_52kSjJlBLUXl6t-K9QtsxCfB15o4,611
3
- kabigon/compose.py,sha256=DO0hOJgEAX7ZLOS53dcE6V9zi7Tr9oGNW8koPHsx9eM,2110
4
- kabigon/httpx.py,sha256=uDdLks6zVzirY7-mnsJkypX86kAI5XmUVfK-lFifdJA,895
5
- kabigon/loader.py,sha256=D5xUPJb3uAygmBaN_sX56ZpGcGsVz-ueHOXC7gSGaxM,493
6
- kabigon/pdf.py,sha256=Q9XuBdKDrDQJ8BNvY7Lgt6dpGeA_ylGGHWOE3euiI_8,1904
7
- kabigon/playwright.py,sha256=MZ-r0Ej2wWAOJkDLwYRvO77wcDvh38KXz2wgDsCTgm0,1358
8
- kabigon/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
9
- kabigon/reel.py,sha256=TP_oKYXABXYja2A9damTBWR3MVYA7aZyxbIvCuTcq40,1062
10
- kabigon/singlefile.py,sha256=CeTT2WPYm0vb1xWPNdyBN4uHRw9hRqfZm68D-nEcUA8,1800
11
- kabigon/utils.py,sha256=eNTLtHLSB2erDac2HH3jWemgfr8Ou_ozwVb8h9BD-4g,922
12
- kabigon/youtube.py,sha256=HoiFNq0ookPL7_rO_wloBaY8yTIX6xP8A77F7y02q64,1166
13
- kabigon/ytdlp.py,sha256=kG1fXqU650otOWespjOSkGK_-jk1wO-sWiR60_UPJxY,3125
14
- kabigon-0.5.2.dist-info/METADATA,sha256=rwkyQAYqBeRyoITRCkIojOHlmxCGXTLAr5dr4FOAKjI,1043
15
- kabigon-0.5.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
16
- kabigon-0.5.2.dist-info/entry_points.txt,sha256=O3FYAO9w-NQvlGMJrBvtrnGHSK2QkUnQBTa30YXRbVE,45
17
- kabigon-0.5.2.dist-info/licenses/LICENSE,sha256=H2T3_RTgmcngMeC7p_SXT3GwBLkd2DaNgAZuxulcfiA,1066
18
- kabigon-0.5.2.dist-info/RECORD,,