@seanyao/roll 0.5.0 → 2.602.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. package/CHANGELOG.md +717 -0
  2. package/LICENSE +21 -0
  3. package/README.md +65 -165
  4. package/bin/dream-test-quality-scan +110 -0
  5. package/bin/roll +14897 -815
  6. package/conventions/config.yaml +17 -1
  7. package/conventions/global/AGENTS.md +146 -100
  8. package/conventions/global/CLAUDE.md +1 -21
  9. package/conventions/global/GEMINI.md +8 -22
  10. package/conventions/global/project_rules.md +9 -0
  11. package/conventions/templates/backend-service/AGENTS.md +30 -81
  12. package/conventions/templates/backend-service/GEMINI.md +3 -3
  13. package/conventions/templates/backend-service/project_rules.md +16 -0
  14. package/conventions/templates/cli/AGENTS.md +31 -58
  15. package/conventions/templates/cli/CLAUDE.md +3 -5
  16. package/conventions/templates/cli/GEMINI.md +3 -3
  17. package/conventions/templates/cli/project_rules.md +16 -0
  18. package/conventions/templates/frontend-only/AGENTS.md +29 -64
  19. package/conventions/templates/frontend-only/GEMINI.md +3 -3
  20. package/conventions/templates/frontend-only/project_rules.md +14 -0
  21. package/conventions/templates/fullstack/AGENTS.md +31 -79
  22. package/conventions/templates/fullstack/CLAUDE.md +1 -1
  23. package/conventions/templates/fullstack/GEMINI.md +3 -3
  24. package/conventions/templates/fullstack/project_rules.md +15 -0
  25. package/lib/README.md +42 -0
  26. package/lib/__pycache__/github_sync.cpython-314.pyc +0 -0
  27. package/lib/__pycache__/loop-fmt.cpython-314.pyc +0 -0
  28. package/lib/__pycache__/loop_result_eval.cpython-314.pyc +0 -0
  29. package/lib/__pycache__/loop_unstick.cpython-314.pyc +0 -0
  30. package/lib/__pycache__/model_prices.cpython-314.pyc +0 -0
  31. package/lib/__pycache__/prices_fetcher.cpython-314.pyc +0 -0
  32. package/lib/__pycache__/roll-home.cpython-314.pyc +0 -0
  33. package/lib/__pycache__/roll-loop-status.cpython-314.pyc +0 -0
  34. package/lib/__pycache__/roll_git.cpython-314.pyc +0 -0
  35. package/lib/__pycache__/roll_render.cpython-314.pyc +0 -0
  36. package/lib/__pycache__/slides-render.cpython-314.pyc +0 -0
  37. package/lib/agent_usage/README.md +49 -0
  38. package/lib/agent_usage/__init__.py +108 -0
  39. package/lib/agent_usage/__pycache__/__init__.cpython-314.pyc +0 -0
  40. package/lib/agent_usage/__pycache__/gemini.cpython-314.pyc +0 -0
  41. package/lib/agent_usage/__pycache__/kimi.cpython-314.pyc +0 -0
  42. package/lib/agent_usage/__pycache__/openai.cpython-314.pyc +0 -0
  43. package/lib/agent_usage/__pycache__/pi.cpython-314.pyc +0 -0
  44. package/lib/agent_usage/__pycache__/pi_emit.cpython-314.pyc +0 -0
  45. package/lib/agent_usage/__pycache__/qwen.cpython-314.pyc +0 -0
  46. package/lib/agent_usage/gemini.py +127 -0
  47. package/lib/agent_usage/kimi.py +278 -0
  48. package/lib/agent_usage/kimi_emit.py +123 -0
  49. package/lib/agent_usage/openai.py +126 -0
  50. package/lib/agent_usage/pi.py +200 -0
  51. package/lib/agent_usage/pi_emit.py +135 -0
  52. package/lib/agent_usage/qwen.py +128 -0
  53. package/lib/backfill-pi-usage.py +243 -0
  54. package/lib/changelog_audit.py +155 -0
  55. package/lib/changelog_generate.py +263 -0
  56. package/lib/context_feed_budget.sh +194 -0
  57. package/lib/github_sync.py +876 -0
  58. package/lib/i18n/README.md +54 -0
  59. package/lib/i18n/agent.sh +75 -0
  60. package/lib/i18n/alert.sh +20 -0
  61. package/lib/i18n/backlog.sh +96 -0
  62. package/lib/i18n/brief.sh +5 -0
  63. package/lib/i18n/changelog.sh +5 -0
  64. package/lib/i18n/ci.sh +15 -0
  65. package/lib/i18n/debug.sh +0 -0
  66. package/lib/i18n/doctor.sh +44 -0
  67. package/lib/i18n/dream.sh +0 -0
  68. package/lib/i18n/init.sh +91 -0
  69. package/lib/i18n/lang.sh +10 -0
  70. package/lib/i18n/loop.sh +140 -0
  71. package/lib/i18n/migrate.sh +74 -0
  72. package/lib/i18n/offboard.sh +31 -0
  73. package/lib/i18n/onboard.sh +0 -0
  74. package/lib/i18n/peer.sh +41 -0
  75. package/lib/i18n/peer_help.sh +25 -0
  76. package/lib/i18n/peer_reset.sh +7 -0
  77. package/lib/i18n/peer_status.sh +5 -0
  78. package/lib/i18n/prices.sh +3 -0
  79. package/lib/i18n/prices_refresh.sh +17 -0
  80. package/lib/i18n/prices_show.sh +7 -0
  81. package/lib/i18n/propose.sh +0 -0
  82. package/lib/i18n/release.sh +0 -0
  83. package/lib/i18n/research.sh +0 -0
  84. package/lib/i18n/review_pr.sh +0 -0
  85. package/lib/i18n/sentinel.sh +0 -0
  86. package/lib/i18n/setup.sh +3 -0
  87. package/lib/i18n/shared.sh +157 -0
  88. package/lib/i18n/skills/roll-brief.sh +47 -0
  89. package/lib/i18n/skills/roll-build.sh +97 -0
  90. package/lib/i18n/skills/roll-design.sh +18 -0
  91. package/lib/i18n/skills/roll-fix.sh +53 -0
  92. package/lib/i18n/skills/roll-loop.sh +28 -0
  93. package/lib/i18n/skills/roll-onboard.sh +33 -0
  94. package/lib/i18n/skills_catalog.sh +30 -0
  95. package/lib/i18n/slides.sh +3 -0
  96. package/lib/i18n/slides_build.sh +38 -0
  97. package/lib/i18n/slides_delete.sh +19 -0
  98. package/lib/i18n/slides_list.sh +14 -0
  99. package/lib/i18n/slides_logs.sh +12 -0
  100. package/lib/i18n/slides_new.sh +15 -0
  101. package/lib/i18n/slides_preview.sh +14 -0
  102. package/lib/i18n/slides_templates.sh +7 -0
  103. package/lib/i18n/status.sh +21 -0
  104. package/lib/i18n/update.sh +24 -0
  105. package/lib/i18n.sh +211 -0
  106. package/lib/loop-exit-summary.py +393 -0
  107. package/lib/loop-fmt.py +589 -0
  108. package/lib/loop_pick_agent.py +316 -0
  109. package/lib/loop_result_eval.py +469 -0
  110. package/lib/loop_unstick.py +180 -0
  111. package/lib/model_prices.py +186 -0
  112. package/lib/prices/README.md +35 -0
  113. package/lib/prices/snapshot-2026-05-22.json +22 -0
  114. package/lib/prices/snapshot-2026-05-23-deepseek.json +15 -0
  115. package/lib/prices/snapshot-2026-05-23-kimi.json +14 -0
  116. package/lib/prices_fetcher.py +285 -0
  117. package/lib/roll-backlog.py +225 -0
  118. package/lib/roll-brief.py +286 -0
  119. package/lib/roll-help.py +158 -0
  120. package/lib/roll-home.py +556 -0
  121. package/lib/roll-init.py +156 -0
  122. package/lib/roll-loop-status.py +1683 -0
  123. package/lib/roll-loop-story.py +191 -0
  124. package/lib/roll-onboard-render.py +378 -0
  125. package/lib/roll-peer.py +252 -0
  126. package/lib/roll-plan-validate.py +386 -0
  127. package/lib/roll-setup.py +102 -0
  128. package/lib/roll-status.py +367 -0
  129. package/lib/roll_git.py +41 -0
  130. package/lib/roll_render.py +414 -0
  131. package/lib/slides/components/README.md +123 -0
  132. package/lib/slides/components/cards-2.html +9 -0
  133. package/lib/slides/components/cards-3.html +9 -0
  134. package/lib/slides/components/cards-4.html +9 -0
  135. package/lib/slides/components/compare.html +22 -0
  136. package/lib/slides/components/highlight.html +9 -0
  137. package/lib/slides/components/pipeline.html +12 -0
  138. package/lib/slides/components/plain.html +7 -0
  139. package/lib/slides/components/quote.html +4 -0
  140. package/lib/slides/components/timeline.html +9 -0
  141. package/lib/slides/templates/introduction-v3.html +571 -0
  142. package/lib/slides/templates/pitch.html +0 -0
  143. package/lib/slides-render.py +778 -0
  144. package/lib/slides-validate.py +357 -0
  145. package/lib/test_quality_gate.py +143 -0
  146. package/package.json +8 -7
  147. package/skills/roll-.changelog/SKILL.md +406 -33
  148. package/skills/roll-.clarify/SKILL.md +5 -2
  149. package/skills/roll-.dream/SKILL.md +374 -0
  150. package/skills/roll-.echo/SKILL.md +5 -2
  151. package/skills/roll-.qa/SKILL.md +57 -3
  152. package/skills/roll-.review/SKILL.md +42 -3
  153. package/skills/roll-brief/SKILL.md +209 -0
  154. package/skills/roll-build/SKILL.md +308 -63
  155. package/skills/roll-debug/SKILL.md +341 -162
  156. package/skills/roll-debug/injectable-bb.js +263 -0
  157. package/skills/roll-deck/SKILL.md +296 -0
  158. package/skills/roll-design/ENGINEERING_CHECKLIST.md +1 -1
  159. package/skills/roll-design/SKILL.md +727 -94
  160. package/skills/roll-doc/SKILL.md +595 -0
  161. package/skills/roll-doctor/SKILL.md +192 -0
  162. package/skills/roll-fix/SKILL.md +149 -32
  163. package/skills/{roll-jot → roll-idea}/SKILL.md +18 -10
  164. package/skills/roll-loop/SKILL.md +578 -0
  165. package/skills/roll-notes/SKILL.md +103 -0
  166. package/skills/roll-onboard/SKILL.md +234 -0
  167. package/skills/roll-peer/SKILL.md +336 -0
  168. package/skills/roll-propose/SKILL.md +157 -0
  169. package/skills/roll-review-pr/SKILL.md +58 -0
  170. package/skills/roll-sentinel/SKILL.md +11 -2
  171. package/skills/roll-spar/SKILL.md +8 -6
  172. package/template/.github/workflows/ci.yml +5 -2
  173. package/template/AGENTS.md +20 -74
  174. package/skills/roll-research/SKILL.md +0 -307
  175. package/skills/roll-research/references/schema.json +0 -162
  176. package/skills/roll-research/scripts/md_to_pdf.py +0 -289
  177. package/tools/roll-fetch/SKILL.md +0 -182
  178. package/tools/roll-fetch/package.json +0 -15
  179. package/tools/roll-fetch/smart-web-fetch.js +0 -558
  180. package/tools/roll-probe/SKILL.md +0 -84
  181. /package/template/{BACKLOG.md → .roll/backlog.md} +0 -0
@@ -1,289 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Deep Research Report: Markdown to PDF converter (WeasyPrint)
4
- Usage: python md_to_pdf.py input.md output.pdf [--title "Report Title"] [--author "Author"]
5
-
6
- Dependencies: pip install weasyprint markdown --break-system-packages
7
- """
8
-
9
- import sys
10
- import os
11
- import re
12
- import argparse
13
- import markdown
14
-
15
- # ── CSS Styles ──
16
- CSS_TEMPLATE = """
17
- @page {
18
- size: A4;
19
- margin: 25mm 20mm 20mm 20mm;
20
-
21
- @top-center {
22
- content: "HEADER_TEXT";
23
- font-family: "Droid Sans Fallback", Helvetica, Arial, sans-serif;
24
- font-size: 8pt;
25
- color: #95a5a6;
26
- border-bottom: 0.5pt solid #ecf0f1;
27
- padding-bottom: 3mm;
28
- }
29
-
30
- @bottom-center {
31
- content: "Page " counter(page);
32
- font-family: "Droid Sans Fallback", Helvetica, Arial, sans-serif;
33
- font-size: 8pt;
34
- color: #95a5a6;
35
- border-top: 0.8pt solid #1a5276;
36
- padding-top: 2mm;
37
- }
38
- }
39
-
40
- @page :first {
41
- @top-center { content: none; }
42
- @bottom-center { content: none; }
43
- }
44
-
45
- body {
46
- font-family: "Droid Sans Fallback", Helvetica, Arial, sans-serif;
47
- font-size: 10.5pt;
48
- line-height: 1.75;
49
- color: #2c3e50;
50
- text-align: justify;
51
- }
52
-
53
- /* Cover page */
54
- .cover {
55
- page-break-after: always;
56
- text-align: center;
57
- padding-top: 45%;
58
- }
59
- .cover h1 {
60
- font-size: 28pt;
61
- color: #1a5276;
62
- margin-bottom: 8mm;
63
- font-weight: bold;
64
- letter-spacing: 2pt;
65
- }
66
- .cover .subtitle {
67
- font-size: 14pt;
68
- color: #95a5a6;
69
- margin-bottom: 6mm;
70
- }
71
- .cover .meta {
72
- font-size: 11pt;
73
- color: #95a5a6;
74
- margin-bottom: 4mm;
75
- }
76
- .cover .divider {
77
- width: 60%;
78
- margin: 8mm auto;
79
- border: none;
80
- border-top: 1.5pt solid #1a5276;
81
- }
82
-
83
- /* H1 */
84
- h1 {
85
- font-size: 20pt;
86
- color: #1a5276;
87
- margin-top: 16mm;
88
- margin-bottom: 6mm;
89
- padding-bottom: 3mm;
90
- border-bottom: 2pt solid #1a5276;
91
- page-break-before: always;
92
- font-weight: bold;
93
- }
94
-
95
- /* H2 */
96
- h2 {
97
- font-size: 14pt;
98
- color: #1e8449;
99
- margin-top: 10mm;
100
- margin-bottom: 5mm;
101
- font-weight: bold;
102
- }
103
-
104
- /* H3 */
105
- h3 {
106
- font-size: 12pt;
107
- color: #2e86c1;
108
- margin-top: 6mm;
109
- margin-bottom: 3mm;
110
- font-weight: bold;
111
- }
112
-
113
- h4 {
114
- font-size: 11pt;
115
- color: #5b2c6f;
116
- margin-top: 5mm;
117
- margin-bottom: 2mm;
118
- font-weight: bold;
119
- }
120
-
121
- /* Paragraphs */
122
- p {
123
- margin-top: 1.5mm;
124
- margin-bottom: 1.5mm;
125
- orphans: 3;
126
- widows: 3;
127
- }
128
-
129
- /* Blockquotes */
130
- blockquote {
131
- margin: 4mm 0;
132
- padding: 4mm 4mm 4mm 10mm;
133
- background: #f8f9fa;
134
- border-left: 3pt solid #1a5276;
135
- color: #5d6d7e;
136
- font-size: 10pt;
137
- }
138
- blockquote p {
139
- margin: 1mm 0;
140
- }
141
-
142
- /* Bold */
143
- strong, b {
144
- font-weight: bold;
145
- color: #1a252f;
146
- }
147
-
148
- /* Inline code */
149
- code {
150
- font-family: "Courier New", Courier, monospace;
151
- background: #fdf2e9;
152
- color: #c0392b;
153
- padding: 0.5mm 1.5mm;
154
- border-radius: 2pt;
155
- font-size: 9.5pt;
156
- }
157
-
158
- /* Tables */
159
- table {
160
- width: 100%;
161
- border-collapse: collapse;
162
- margin: 4mm 0;
163
- font-size: 9.5pt;
164
- }
165
- thead th {
166
- background: #1a5276;
167
- color: white;
168
- padding: 3mm;
169
- text-align: left;
170
- font-weight: bold;
171
- }
172
- tbody td {
173
- padding: 2.5mm 3mm;
174
- border-bottom: 0.5pt solid #bdc3c7;
175
- }
176
- tbody tr:nth-child(even) {
177
- background: #f8f9fa;
178
- }
179
-
180
- /* Horizontal rule */
181
- hr {
182
- border: none;
183
- border-top: 0.5pt solid #bdc3c7;
184
- margin: 4mm 0;
185
- }
186
-
187
- /* Lists */
188
- ul, ol {
189
- margin: 2mm 0;
190
- padding-left: 8mm;
191
- }
192
- li {
193
- margin-bottom: 1mm;
194
- }
195
-
196
- /* Links */
197
- a {
198
- color: #2e86c1;
199
- text-decoration: none;
200
- }
201
- """
202
-
203
-
204
- def md_to_html(md_text, title="Deep Research Report", subtitle="Let's roll",
205
- meta_line="", author="roll"):
206
- """Convert Markdown to HTML with cover page"""
207
-
208
- # Convert body with markdown library
209
- html_body = markdown.markdown(
210
- md_text,
211
- extensions=['tables', 'fenced_code', 'nl2br'],
212
- output_format='html5'
213
- )
214
-
215
- # Extract first H1 for cover (remove from body)
216
- first_h1_match = re.search(r'<h1>(.*?)</h1>', html_body)
217
- if first_h1_match:
218
- extracted_title = first_h1_match.group(1)
219
- if not title or title == "Deep Research Report":
220
- title = extracted_title
221
- html_body = html_body.replace(first_h1_match.group(0), '', 1)
222
-
223
- # Replace header placeholder in CSS
224
- css = CSS_TEMPLATE.replace("HEADER_TEXT", f"{title} | Deep Research Report")
225
-
226
- # Build cover page
227
- cover_html = f"""
228
- <div class="cover">
229
- <h1 style="page-break-before: avoid; border: none;">{title}</h1>
230
- <div class="subtitle">{subtitle}</div>
231
- {"<div class='meta'>" + meta_line + "</div>" if meta_line else ""}
232
- <hr class="divider">
233
- <div class="meta">Author: {author}</div>
234
- </div>
235
- """
236
-
237
- full_html = f"""<!DOCTYPE html>
238
- <html lang="en">
239
- <head>
240
- <meta charset="UTF-8">
241
- <style>{css}</style>
242
- </head>
243
- <body>
244
- {cover_html}
245
- {html_body}
246
- </body>
247
- </html>"""
248
-
249
- return full_html
250
-
251
-
252
- def main():
253
- parser = argparse.ArgumentParser(description="Deep Research Report: Markdown to PDF")
254
- parser.add_argument("input", help="Input Markdown file path")
255
- parser.add_argument("output", help="Output PDF file path")
256
- parser.add_argument("--title", default=None, help="Report title")
257
- parser.add_argument("--author", default="roll", help="Author name")
258
- parser.add_argument("--subtitle", default="Let's roll", help="Report subtitle")
259
- args = parser.parse_args()
260
-
261
- with open(args.input, "r", encoding="utf-8") as f:
262
- md_text = f.read()
263
-
264
- # Extract metadata line
265
- meta_line = ""
266
- for line in md_text.split("\n"):
267
- stripped = line.strip().lstrip(">").strip()
268
- if "research date" in stripped.lower() or "field:" in stripped.lower() or "subject type" in stripped.lower():
269
- meta_line = stripped
270
- break
271
-
272
- html = md_to_html(md_text, title=args.title or "Deep Research Report",
273
- subtitle=args.subtitle, meta_line=meta_line, author=args.author)
274
-
275
- # Save intermediate HTML (for debugging)
276
- html_path = args.output.replace('.pdf', '.html')
277
- with open(html_path, 'w', encoding='utf-8') as f:
278
- f.write(html)
279
- print(f"[OK] HTML generated: {html_path}")
280
-
281
- # Convert to PDF
282
- from weasyprint import HTML
283
- HTML(string=html).write_pdf(args.output)
284
- size_kb = os.path.getsize(args.output) / 1024
285
- print(f"[OK] PDF generated: {args.output} ({size_kb:.1f} KB)")
286
-
287
-
288
- if __name__ == "__main__":
289
- main()
@@ -1,182 +0,0 @@
1
- ---
2
- hidden: true
3
- name: roll-fetch
4
- description: Web page fetching and crawling for AI agents. Extract content from URLs for research, documentation, and competitive analysis.
5
- ---
6
-
7
- # Roll Fetch - Web Content Extraction
8
-
9
- Extract content from web pages for research and analysis.
10
-
11
- ## When to Use
12
-
13
- - Product research (competitor analysis)
14
- - Technical documentation gathering
15
- - Code examples and best practices
16
- - Full site crawling for backup/analysis
17
-
18
- ## Environment Setup
19
-
20
- Configure API keys per machine:
21
-
22
- ```bash
23
- # Required for Tavily
24
- export TAVILY_API_KEY=tvly-dev-...
25
-
26
- # Optional for cloud browser fallback
27
- export BROWSER_USE_API_KEY=bu-...
28
- ```
29
-
30
- Or create `.env` file in project root:
31
- ```
32
- TAVILY_API_KEY=tvly-dev-...
33
- BROWSER_USE_API_KEY=bu-...
34
- ```
35
-
36
- ## Methods
37
-
38
- ### 1. Tavily API (Recommended)
39
-
40
- Best quality extraction, requires `TAVILY_API_KEY`.
41
-
42
- ```bash
43
- # Using Tavily CLI or API
44
- curl -X POST https://api.tavily.com/extract \
45
- -H "Content-Type: application/json" \
46
- -d '{
47
- "urls": ["https://example.com"],
48
- "api_key": "your_tavily_api_key"
49
- }'
50
- ```
51
-
52
- **Pros**: AI-optimized extraction, handles complex layouts
53
- **Cons**: Requires API key, rate limited
54
-
55
- ### 2. LLM Native Fetch (Default)
56
-
57
- Use your built-in URL fetching capability directly.
58
-
59
- **When to use**: When Tavily is unavailable or for quick checks.
60
-
61
- **Note**: Most modern AI agents (Kimi, Codex, Claude) have native URL fetching. Use `FetchURL` tool or equivalent.
62
-
63
- ### 3. Browser Automation (Fallback)
64
-
65
- Local browser automation for stubborn pages using **[browser-use](https://github.com/browser-use/browser-use)**.
66
-
67
- **How to Choose:**
68
-
69
- | If | Then Use | Why |
70
- |----|---------|-----|
71
- | `BROWSER_USE_API_KEY` in env | **Cloud** | Managed browsers, less setup |
72
- | No API key, but `browser-use` installed | **Local** | Free, no external dependency |
73
- | Neither | Skip to manual extraction | Tell user "Need browser automation setup" |
74
-
75
- **Option A: Local (Free, No API Key)**
76
- ```python
77
- from browser_use import Agent, Browser, BrowserConfig
78
- import asyncio
79
-
80
- async def fetch_page(url):
81
- # Pure local, no API key needed
82
- browser = Browser(config=BrowserConfig(headless=True))
83
- await browser.start()
84
- page = await browser.get_current_page()
85
- await page.goto(url)
86
- content = await page.content()
87
- await browser.stop()
88
- return content
89
-
90
- # Run
91
- content = asyncio.run(fetch_page("https://example.com"))
92
- ```
93
-
94
- **Option B: Cloud API**
95
- ```python
96
- from browser_use import Agent
97
-
98
- agent = Agent(
99
- task=f"Extract the main content from {url} and return as markdown",
100
- llm="moonshot" # or openai, anthropic
101
- )
102
- result = await agent.run()
103
- ```
104
-
105
- **Setup** (Local):
106
- ```bash
107
- pip install browser-use
108
- playwright install chromium
109
- ```
110
-
111
- ## Usage
112
-
113
- ### CLI Usage (via smart-web-fetch.js)
114
-
115
- ```bash
116
- # Auto mode (Tavily → Native → Browser)
117
- node smart-web-fetch.js fetch https://example.com
118
-
119
- # Explicit method
120
- node smart-web-fetch.js fetch https://example.com tavily
121
- node smart-web-fetch.js fetch https://example.com native
122
- node smart-web-fetch.js fetch https://example.com browser
123
-
124
- # Search
125
- node smart-web-fetch.js search "Python async" 5
126
- ```
127
-
128
- ### Programmatic Usage
129
-
130
- ```javascript
131
- const { smartFetch, smartSearch } = require('./smart-web-fetch.js');
132
-
133
- // Fetch a page
134
- const result = await smartFetch('https://example.com');
135
- console.log(result.content);
136
-
137
- // Search
138
- const searchResult = await smartSearch('OpenAI GPT-5', 5);
139
- console.log(searchResult.results);
140
- ```
141
-
142
- ### Single Page Fetch
143
-
144
- ```
145
- User: "Fetch https://docs.example.com/api"
146
- → Use smart-web-fetch.js with auto mode
147
- → Return clean markdown content
148
- ```
149
-
150
- ### Full Site Crawl
151
-
152
- ```
153
- User: "Crawl https://docs.example.com"
154
- → Use smart-web-fetch.js recursively
155
- → Extract all internal links
156
- → Recursively fetch up to max depth (default: 2)
157
- → Save each page as separate markdown file
158
- ```
159
-
160
- ## Output Format
161
-
162
- Always return clean Markdown:
163
- - Extract main content only (remove nav, ads, footers)
164
- - Preserve code blocks and tables
165
- - Include source URL as header
166
-
167
- ## Quality Check
168
-
169
- Validate extracted content:
170
- - Min length: 500 chars (reject if shorter)
171
- - Check for captcha/error messages
172
- - Verify main content structure (headings, paragraphs)
173
-
174
- ## Examples
175
-
176
- | Task | Method | Command |
177
- |------|--------|---------|
178
- | Quick article | Auto | `node smart-web-fetch.js fetch https://blog.example.com` |
179
- | API docs | Tavily | `node smart-web-fetch.js fetch https://docs.example.com tavily` |
180
- | SPA site | Browser | `node smart-web-fetch.js fetch https://spa.example.com browser` |
181
- | Search | Tavily | `node smart-web-fetch.js search "Python async" 5` |
182
- | Fallback test | Native | `node smart-web-fetch.js fetch https://example.com native` |
@@ -1,15 +0,0 @@
1
- {
2
- "name": "smart-web-fetch",
3
- "version": "1.0.0",
4
- "description": "Intelligent web fetching with automatic Tavily → Scrapling fallback",
5
- "main": "smart-web-fetch.js",
6
- "bin": {
7
- "smart-web-fetch": "./smart-web-fetch.js"
8
- },
9
- "scripts": {
10
- "test": "node smart-web-fetch.js fetch https://example.com"
11
- },
12
- "keywords": ["web-scraping", "tavily", "scrapling", "fallback"],
13
- "author": "R0_lobster",
14
- "license": "MIT"
15
- }