rawfy 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rawfy-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,98 @@
1
+ Metadata-Version: 2.4
2
+ Name: rawfy
3
+ Version: 0.1.0
4
+ Summary: Python wrapper for the Rawfy AI agent skill — converts any URL into structured, agent-readable content
5
+ Author: rishiiicreates
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/rishiiicreates/rawfy
8
+ Project-URL: Repository, https://github.com/rishiiicreates/rawfy
9
+ Project-URL: Issues, https://github.com/rishiiicreates/rawfy/issues
10
+ Keywords: ai,agent,mcp,web,scraper,markdown,rawfy
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Internet :: WWW/HTTP
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Classifier: Topic :: Text Processing :: Markup :: Markdown
22
+ Requires-Python: >=3.10
23
+ Description-Content-Type: text/markdown
24
+
25
+ # Rawfy Python Wrapper
26
+
27
+ Python bindings for the [Rawfy](https://github.com/rishiiicreates/rawfy) AI agent skill.
28
+
29
+ ## Prerequisites
30
+
31
+ - **Node.js >= 18** — Rawfy is a Node.js tool
32
+ - **Rawfy CLI** — `npm install -g rawfy`
33
+
34
+ ## Install
35
+
36
+ ```bash
37
+ pip install rawfy
38
+ ```
39
+
40
+ ## Quick Start
41
+
42
+ ```python
43
+ from rawfy import fetch, metadata
44
+
45
+ # Full page fetch (returns WSM markdown)
46
+ content = fetch("https://example.com")
47
+
48
+ # Get structured JSON data
49
+ import json
50
+ data = fetch("https://example.com", format="json")
51
+ parsed = json.loads(data)
52
+
53
+ # Or use the convenience wrapper
54
+ from rawfy import fetch_json
55
+ data = fetch_json("https://example.com")
56
+ print(data["metadata"]["title"])
57
+
58
+ # Lightweight metadata only
59
+ meta = metadata("https://docs.python.org")
60
+ print(f"Title: {meta['title']}, Words: {meta['word_count']}")
61
+ ```
62
+
63
+ ## API
64
+
65
+ ### `fetch(url, *, format="markdown", vision=False, no_playwright=False, max_tokens=50000, timeout=30)`
66
+
67
+ Fetch and process a URL. Returns the content as a string.
68
+
69
+ ### `fetch_json(url, **kwargs)`
70
+
71
+ Same as `fetch()` with `format="json"`, returns a parsed dict.
72
+
73
+ ### `metadata(url, *, no_playwright=False, timeout=15)`
74
+
75
+ Fetch only page metadata (lightweight). Returns a dict.
76
+
77
+ ### `check_installation()`
78
+
79
+ Check if Rawfy and dependencies are installed. Returns a status dict.
80
+
81
+ ## Error Handling
82
+
83
+ ```python
84
+ from rawfy import fetch, RawfyError
85
+
86
+ try:
87
+ content = fetch("https://example.com")
88
+ except RawfyError as e:
89
+ print(f"Error [{e.code}]: {e}")
90
+ print(f"URL: {e.url}")
91
+ ```
92
+
93
+ ## CLI
94
+
95
+ ```bash
96
+ python -m rawfy https://example.com
97
+ python -m rawfy https://example.com --format json
98
+ ```
rawfy-0.1.0/README.md ADDED
@@ -0,0 +1,74 @@
1
+ # Rawfy Python Wrapper
2
+
3
+ Python bindings for the [Rawfy](https://github.com/rishiiicreates/rawfy) AI agent skill.
4
+
5
+ ## Prerequisites
6
+
7
+ - **Node.js >= 18** — Rawfy is a Node.js tool
8
+ - **Rawfy CLI** — `npm install -g rawfy`
9
+
10
+ ## Install
11
+
12
+ ```bash
13
+ pip install rawfy
14
+ ```
15
+
16
+ ## Quick Start
17
+
18
+ ```python
19
+ from rawfy import fetch, metadata
20
+
21
+ # Full page fetch (returns WSM markdown)
22
+ content = fetch("https://example.com")
23
+
24
+ # Get structured JSON data
25
+ import json
26
+ data = fetch("https://example.com", format="json")
27
+ parsed = json.loads(data)
28
+
29
+ # Or use the convenience wrapper
30
+ from rawfy import fetch_json
31
+ data = fetch_json("https://example.com")
32
+ print(data["metadata"]["title"])
33
+
34
+ # Lightweight metadata only
35
+ meta = metadata("https://docs.python.org")
36
+ print(f"Title: {meta['title']}, Words: {meta['word_count']}")
37
+ ```
38
+
39
+ ## API
40
+
41
+ ### `fetch(url, *, format="markdown", vision=False, no_playwright=False, max_tokens=50000, timeout=30)`
42
+
43
+ Fetch and process a URL. Returns the content as a string.
44
+
45
+ ### `fetch_json(url, **kwargs)`
46
+
47
+ Same as `fetch()` with `format="json"`, returns a parsed dict.
48
+
49
+ ### `metadata(url, *, no_playwright=False, timeout=15)`
50
+
51
+ Fetch only page metadata (lightweight). Returns a dict.
52
+
53
+ ### `check_installation()`
54
+
55
+ Check if Rawfy and dependencies are installed. Returns a status dict.
56
+
57
+ ## Error Handling
58
+
59
+ ```python
60
+ from rawfy import fetch, RawfyError
61
+
62
+ try:
63
+ content = fetch("https://example.com")
64
+ except RawfyError as e:
65
+ print(f"Error [{e.code}]: {e}")
66
+ print(f"URL: {e.url}")
67
+ ```
68
+
69
+ ## CLI
70
+
71
+ ```bash
72
+ python -m rawfy https://example.com
73
+ python -m rawfy https://example.com --format json
74
+ ```
@@ -0,0 +1,38 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "rawfy"
7
+ version = "0.1.0"
8
+ description = "Python wrapper for the Rawfy AI agent skill — converts any URL into structured, agent-readable content"
9
+ readme = "README.md"
10
+ license = {text = "MIT"}
11
+ authors = [{name = "rishiiicreates"}]
12
+ requires-python = ">=3.10"
13
+ classifiers = [
14
+ "Development Status :: 4 - Beta",
15
+ "Intended Audience :: Developers",
16
+ "License :: OSI Approved :: MIT License",
17
+ "Programming Language :: Python :: 3",
18
+ "Programming Language :: Python :: 3.10",
19
+ "Programming Language :: Python :: 3.11",
20
+ "Programming Language :: Python :: 3.12",
21
+ "Programming Language :: Python :: 3.13",
22
+ "Topic :: Internet :: WWW/HTTP",
23
+ "Topic :: Software Development :: Libraries :: Python Modules",
24
+ "Topic :: Text Processing :: Markup :: Markdown",
25
+ ]
26
+ keywords = ["ai", "agent", "mcp", "web", "scraper", "markdown", "rawfy"]
27
+
28
+ [project.urls]
29
+ Homepage = "https://github.com/rishiiicreates/rawfy"
30
+ Repository = "https://github.com/rishiiicreates/rawfy"
31
+ Issues = "https://github.com/rishiiicreates/rawfy/issues"
32
+
33
+ [project.scripts]
34
+ rawfy-py = "rawfy.client:main"
35
+
36
+ [tool.setuptools.packages.find]
37
+ where = ["."]
38
+ include = ["rawfy*"]
@@ -0,0 +1,26 @@
1
+ """
2
+ Rawfy — Python wrapper for the Rawfy AI agent skill.
3
+
4
+ Provides a Pythonic interface to the Rawfy CLI for fetching and
5
+ processing web pages into agent-readable content.
6
+
7
+ Usage:
8
+ from rawfy import fetch, metadata
9
+
10
+ # Full page fetch
11
+ result = fetch("https://example.com")
12
+ print(result)
13
+
14
+ # JSON format for structured data
15
+ data = fetch("https://example.com", format="json")
16
+ import json
17
+ parsed = json.loads(data)
18
+
19
+ # Metadata only (lightweight)
20
+ meta = metadata("https://example.com")
21
+ """
22
+
23
+ from rawfy.client import fetch, metadata, RawfyError
24
+
25
+ __version__ = "0.1.0"
26
+ __all__ = ["fetch", "metadata", "RawfyError", "__version__"]
@@ -0,0 +1,5 @@
1
+ """Allow running as `python -m rawfy`."""
2
+ from rawfy.client import main
3
+
4
+ if __name__ == "__main__":
5
+ main()
@@ -0,0 +1,291 @@
1
+ """
2
+ Rawfy Python client — subprocess wrapper around the Rawfy CLI.
3
+
4
+ This module shells out to the `rawfy` Node.js CLI and parses
5
+ the output. It requires Node.js >= 18 and `rawfy` to be installed
6
+ globally or available via npx.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ import shutil
13
+ import subprocess
14
+ import sys
15
+ from typing import Any, Literal
16
+
17
+
18
+ class RawfyError(Exception):
19
+ """Raised when the Rawfy CLI returns an error."""
20
+
21
+ def __init__(self, message: str, code: str | None = None, url: str | None = None):
22
+ self.code = code
23
+ self.url = url
24
+ super().__init__(message)
25
+
26
+ def __repr__(self) -> str:
27
+ parts = [f"RawfyError({self.args[0]!r}"]
28
+ if self.code:
29
+ parts.append(f", code={self.code!r}")
30
+ if self.url:
31
+ parts.append(f", url={self.url!r}")
32
+ parts.append(")")
33
+ return "".join(parts)
34
+
35
+
36
+ OutputFormat = Literal["markdown", "json", "text"]
37
+
38
+
39
+ def _find_rawfy_cli() -> list[str]:
40
+ """
41
+ Locate the rawfy CLI executable.
42
+
43
+ Search order:
44
+ 1. `rawfy` in PATH (global npm install)
45
+ 2. `npx rawfy` (local install / npx resolution)
46
+ 3. `node node_modules/.bin/rawfy` (project-local)
47
+
48
+ Returns the command prefix as a list of strings.
49
+ """
50
+ # 1. Check if rawfy is directly in PATH
51
+ if shutil.which("rawfy"):
52
+ return ["rawfy"]
53
+
54
+ # 2. Check if npx is available
55
+ if shutil.which("npx"):
56
+ return ["npx", "-y", "rawfy"]
57
+
58
+ # 3. Check if node is available for direct execution
59
+ if shutil.which("node"):
60
+ return ["node", "node_modules/.bin/rawfy"]
61
+
62
+ raise RawfyError(
63
+ "Could not find rawfy CLI. Install it with: npm install -g rawfy",
64
+ code="CLI_NOT_FOUND",
65
+ )
66
+
67
+
68
+ def fetch(
69
+ url: str,
70
+ *,
71
+ format: OutputFormat = "markdown",
72
+ vision: bool = False,
73
+ no_playwright: bool = False,
74
+ max_tokens: int = 50_000,
75
+ timeout: int = 30,
76
+ ) -> str:
77
+ """
78
+ Fetch a URL and return its content in the specified format.
79
+
80
+ Args:
81
+ url: The URL to fetch and process.
82
+ format: Output format — "markdown" (default), "json", or "text".
83
+ vision: Enable vision API for image descriptions.
84
+ no_playwright: Skip Playwright, use static fetch only.
85
+ max_tokens: Maximum output tokens (default: 50000).
86
+ timeout: Subprocess timeout in seconds (default: 30).
87
+
88
+ Returns:
89
+ The processed page content as a string.
90
+
91
+ Raises:
92
+ RawfyError: If the fetch fails or the CLI is not found.
93
+ """
94
+ cmd = _find_rawfy_cli()
95
+ cmd.extend(["fetch", url, "--format", format, "--max-tokens", str(max_tokens)])
96
+
97
+ if vision:
98
+ cmd.append("--vision")
99
+ if no_playwright:
100
+ cmd.append("--no-playwright")
101
+
102
+ try:
103
+ result = subprocess.run(
104
+ cmd,
105
+ capture_output=True,
106
+ text=True,
107
+ timeout=timeout,
108
+ )
109
+ except FileNotFoundError:
110
+ raise RawfyError(
111
+ "Node.js not found. Rawfy requires Node.js >= 18.",
112
+ code="NODE_NOT_FOUND",
113
+ )
114
+ except subprocess.TimeoutExpired:
115
+ raise RawfyError(
116
+ f"Rawfy timed out after {timeout}s fetching {url}",
117
+ code="TIMEOUT",
118
+ url=url,
119
+ )
120
+
121
+ if result.returncode != 0:
122
+ stderr = result.stderr.strip()
123
+ raise RawfyError(
124
+ stderr or f"rawfy fetch failed with exit code {result.returncode}",
125
+ code="FETCH_FAILED",
126
+ url=url,
127
+ )
128
+
129
+ return result.stdout
130
+
131
+
132
+ def fetch_json(
133
+ url: str,
134
+ *,
135
+ vision: bool = False,
136
+ no_playwright: bool = False,
137
+ max_tokens: int = 50_000,
138
+ timeout: int = 30,
139
+ ) -> dict[str, Any]:
140
+ """
141
+ Fetch a URL and return structured data as a Python dict.
142
+
143
+ Convenience wrapper around fetch() with format="json".
144
+
145
+ Args:
146
+ url: The URL to fetch and process.
147
+ vision: Enable vision API for image descriptions.
148
+ no_playwright: Skip Playwright, use static fetch only.
149
+ max_tokens: Maximum output tokens (default: 50000).
150
+ timeout: Subprocess timeout in seconds (default: 30).
151
+
152
+ Returns:
153
+ Parsed JSON as a dict with keys: metadata, content, media,
154
+ interactive_elements, fetch_stats.
155
+
156
+ Raises:
157
+ RawfyError: If the fetch fails or JSON parsing fails.
158
+ """
159
+ raw = fetch(
160
+ url,
161
+ format="json",
162
+ vision=vision,
163
+ no_playwright=no_playwright,
164
+ max_tokens=max_tokens,
165
+ timeout=timeout,
166
+ )
167
+
168
+ try:
169
+ return json.loads(raw)
170
+ except json.JSONDecodeError as e:
171
+ raise RawfyError(
172
+ f"Failed to parse rawfy JSON output: {e}",
173
+ code="JSON_PARSE_ERROR",
174
+ url=url,
175
+ )
176
+
177
+
178
+ def metadata(
179
+ url: str,
180
+ *,
181
+ no_playwright: bool = False,
182
+ timeout: int = 15,
183
+ ) -> dict[str, Any]:
184
+ """
185
+ Fetch only the metadata for a URL (lightweight).
186
+
187
+ Returns title, description, type, language, word count, etc.
188
+ without processing media or generating full content.
189
+
190
+ Args:
191
+ url: The URL to fetch metadata for.
192
+ no_playwright: Skip Playwright, use static fetch only.
193
+ timeout: Subprocess timeout in seconds (default: 15).
194
+
195
+ Returns:
196
+ Metadata dict with keys: url, title, description, type,
197
+ lang, word_count, reading_time_minutes, etc.
198
+
199
+ Raises:
200
+ RawfyError: If the fetch fails.
201
+ """
202
+ # Use the JSON format and extract just the metadata
203
+ data = fetch_json(
204
+ url,
205
+ no_playwright=no_playwright,
206
+ max_tokens=10_000,
207
+ timeout=timeout,
208
+ )
209
+ return data.get("metadata", {})
210
+
211
+
212
+ def check_installation() -> dict[str, Any]:
213
+ """
214
+ Check if Rawfy and its dependencies are properly installed.
215
+
216
+ Returns:
217
+ Dict with installation status:
218
+ {
219
+ "node": True/False,
220
+ "rawfy_cli": True/False,
221
+ "playwright": True/False,
222
+ "version": "0.1.0" or None
223
+ }
224
+ """
225
+ status: dict[str, Any] = {
226
+ "node": False,
227
+ "rawfy_cli": False,
228
+ "playwright": False,
229
+ "version": None,
230
+ }
231
+
232
+ # Check Node.js
233
+ if shutil.which("node"):
234
+ status["node"] = True
235
+
236
+ # Check rawfy CLI
237
+ try:
238
+ cmd = _find_rawfy_cli()
239
+ result = subprocess.run(
240
+ [*cmd, "version"],
241
+ capture_output=True,
242
+ text=True,
243
+ timeout=5,
244
+ )
245
+ if result.returncode == 0:
246
+ status["rawfy_cli"] = True
247
+ status["version"] = result.stdout.strip()
248
+ except (RawfyError, subprocess.TimeoutExpired, FileNotFoundError):
249
+ pass
250
+
251
+ # Check Playwright
252
+ try:
253
+ result = subprocess.run(
254
+ ["npx", "playwright", "--version"],
255
+ capture_output=True,
256
+ text=True,
257
+ timeout=5,
258
+ )
259
+ if result.returncode == 0:
260
+ status["playwright"] = True
261
+ except (subprocess.TimeoutExpired, FileNotFoundError):
262
+ pass
263
+
264
+ return status
265
+
266
+
267
+ def main() -> None:
268
+ """CLI entry point for python -m rawfy."""
269
+ if len(sys.argv) < 2:
270
+ print("Usage: python -m rawfy <url> [--format markdown|json|text]")
271
+ sys.exit(1)
272
+
273
+ url = sys.argv[1]
274
+ fmt: OutputFormat = "markdown"
275
+
276
+ if "--format" in sys.argv:
277
+ idx = sys.argv.index("--format")
278
+ if idx + 1 < len(sys.argv):
279
+ fmt = sys.argv[idx + 1] # type: ignore[assignment]
280
+
281
+ try:
282
+ output = fetch(url, format=fmt)
283
+ print(output)
284
+ except RawfyError as e:
285
+ print(f"Error: {e}", file=sys.stderr)
286
+ sys.exit(1)
287
+
288
+
289
+ # Entry point for `python -m rawfy`
290
+ if __name__ == "__main__":
291
+ main()
@@ -0,0 +1,98 @@
1
+ Metadata-Version: 2.4
2
+ Name: rawfy
3
+ Version: 0.1.0
4
+ Summary: Python wrapper for the Rawfy AI agent skill — converts any URL into structured, agent-readable content
5
+ Author: rishiiicreates
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/rishiiicreates/rawfy
8
+ Project-URL: Repository, https://github.com/rishiiicreates/rawfy
9
+ Project-URL: Issues, https://github.com/rishiiicreates/rawfy/issues
10
+ Keywords: ai,agent,mcp,web,scraper,markdown,rawfy
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Internet :: WWW/HTTP
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Classifier: Topic :: Text Processing :: Markup :: Markdown
22
+ Requires-Python: >=3.10
23
+ Description-Content-Type: text/markdown
24
+
25
+ # Rawfy Python Wrapper
26
+
27
+ Python bindings for the [Rawfy](https://github.com/rishiiicreates/rawfy) AI agent skill.
28
+
29
+ ## Prerequisites
30
+
31
+ - **Node.js >= 18** — Rawfy is a Node.js tool
32
+ - **Rawfy CLI** — `npm install -g rawfy`
33
+
34
+ ## Install
35
+
36
+ ```bash
37
+ pip install rawfy
38
+ ```
39
+
40
+ ## Quick Start
41
+
42
+ ```python
43
+ from rawfy import fetch, metadata
44
+
45
+ # Full page fetch (returns WSM markdown)
46
+ content = fetch("https://example.com")
47
+
48
+ # Get structured JSON data
49
+ import json
50
+ data = fetch("https://example.com", format="json")
51
+ parsed = json.loads(data)
52
+
53
+ # Or use the convenience wrapper
54
+ from rawfy import fetch_json
55
+ data = fetch_json("https://example.com")
56
+ print(data["metadata"]["title"])
57
+
58
+ # Lightweight metadata only
59
+ meta = metadata("https://docs.python.org")
60
+ print(f"Title: {meta['title']}, Words: {meta['word_count']}")
61
+ ```
62
+
63
+ ## API
64
+
65
+ ### `fetch(url, *, format="markdown", vision=False, no_playwright=False, max_tokens=50000, timeout=30)`
66
+
67
+ Fetch and process a URL. Returns the content as a string.
68
+
69
+ ### `fetch_json(url, **kwargs)`
70
+
71
+ Same as `fetch()` with `format="json"`, returns a parsed dict.
72
+
73
+ ### `metadata(url, *, no_playwright=False, timeout=15)`
74
+
75
+ Fetch only page metadata (lightweight). Returns a dict.
76
+
77
+ ### `check_installation()`
78
+
79
+ Check if Rawfy and dependencies are installed. Returns a status dict.
80
+
81
+ ## Error Handling
82
+
83
+ ```python
84
+ from rawfy import fetch, RawfyError
85
+
86
+ try:
87
+ content = fetch("https://example.com")
88
+ except RawfyError as e:
89
+ print(f"Error [{e.code}]: {e}")
90
+ print(f"URL: {e.url}")
91
+ ```
92
+
93
+ ## CLI
94
+
95
+ ```bash
96
+ python -m rawfy https://example.com
97
+ python -m rawfy https://example.com --format json
98
+ ```
@@ -0,0 +1,10 @@
1
+ README.md
2
+ pyproject.toml
3
+ rawfy/__init__.py
4
+ rawfy/__main__.py
5
+ rawfy/client.py
6
+ rawfy.egg-info/PKG-INFO
7
+ rawfy.egg-info/SOURCES.txt
8
+ rawfy.egg-info/dependency_links.txt
9
+ rawfy.egg-info/entry_points.txt
10
+ rawfy.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ rawfy-py = rawfy.client:main
@@ -0,0 +1 @@
1
+ rawfy
rawfy-0.1.0/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+