firecrawl-cli 1.17.0 → 1.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. package/README.md +13 -35
  2. package/dist/__tests__/commands/init.test.js +4 -2
  3. package/dist/__tests__/commands/init.test.js.map +1 -1
  4. package/dist/__tests__/commands/setup.test.js +6 -2
  5. package/dist/__tests__/commands/setup.test.js.map +1 -1
  6. package/dist/commands/init.d.ts.map +1 -1
  7. package/dist/commands/init.js +27 -4
  8. package/dist/commands/init.js.map +1 -1
  9. package/dist/commands/monitor.d.ts +23 -0
  10. package/dist/commands/monitor.d.ts.map +1 -0
  11. package/dist/commands/monitor.js +395 -0
  12. package/dist/commands/monitor.js.map +1 -0
  13. package/dist/commands/setup.d.ts +1 -1
  14. package/dist/commands/setup.d.ts.map +1 -1
  15. package/dist/commands/setup.js +9 -5
  16. package/dist/commands/setup.js.map +1 -1
  17. package/dist/commands/skills-install.d.ts +9 -1
  18. package/dist/commands/skills-install.d.ts.map +1 -1
  19. package/dist/commands/skills-install.js +13 -2
  20. package/dist/commands/skills-install.js.map +1 -1
  21. package/dist/index.js +7 -8
  22. package/dist/index.js.map +1 -1
  23. package/package.json +2 -2
  24. package/dist/commands/experimental/backends.d.ts +0 -19
  25. package/dist/commands/experimental/backends.d.ts.map +0 -1
  26. package/dist/commands/experimental/backends.js +0 -74
  27. package/dist/commands/experimental/backends.js.map +0 -1
  28. package/dist/commands/experimental/index.d.ts +0 -13
  29. package/dist/commands/experimental/index.d.ts.map +0 -1
  30. package/dist/commands/experimental/index.js +0 -200
  31. package/dist/commands/experimental/index.js.map +0 -1
  32. package/dist/commands/experimental/shared.d.ts +0 -17
  33. package/dist/commands/experimental/shared.d.ts.map +0 -1
  34. package/dist/commands/experimental/shared.js +0 -152
  35. package/dist/commands/experimental/shared.js.map +0 -1
  36. package/dist/commands/experimental/workflows/company-directories.d.ts +0 -11
  37. package/dist/commands/experimental/workflows/company-directories.d.ts.map +0 -1
  38. package/dist/commands/experimental/workflows/company-directories.js +0 -245
  39. package/dist/commands/experimental/workflows/company-directories.js.map +0 -1
  40. package/dist/commands/experimental/workflows/competitive-intel.d.ts +0 -11
  41. package/dist/commands/experimental/workflows/competitive-intel.d.ts.map +0 -1
  42. package/dist/commands/experimental/workflows/competitive-intel.js +0 -226
  43. package/dist/commands/experimental/workflows/competitive-intel.js.map +0 -1
  44. package/dist/commands/experimental/workflows/competitor-analysis.d.ts +0 -10
  45. package/dist/commands/experimental/workflows/competitor-analysis.d.ts.map +0 -1
  46. package/dist/commands/experimental/workflows/competitor-analysis.js +0 -196
  47. package/dist/commands/experimental/workflows/competitor-analysis.js.map +0 -1
  48. package/dist/commands/experimental/workflows/dashboard-reporting.d.ts +0 -11
  49. package/dist/commands/experimental/workflows/dashboard-reporting.d.ts.map +0 -1
  50. package/dist/commands/experimental/workflows/dashboard-reporting.js +0 -254
  51. package/dist/commands/experimental/workflows/dashboard-reporting.js.map +0 -1
  52. package/dist/commands/experimental/workflows/deep-research.d.ts +0 -11
  53. package/dist/commands/experimental/workflows/deep-research.d.ts.map +0 -1
  54. package/dist/commands/experimental/workflows/deep-research.js +0 -159
  55. package/dist/commands/experimental/workflows/deep-research.js.map +0 -1
  56. package/dist/commands/experimental/workflows/demo.d.ts +0 -11
  57. package/dist/commands/experimental/workflows/demo.d.ts.map +0 -1
  58. package/dist/commands/experimental/workflows/demo.js +0 -190
  59. package/dist/commands/experimental/workflows/demo.js.map +0 -1
  60. package/dist/commands/experimental/workflows/knowledge-base.d.ts +0 -11
  61. package/dist/commands/experimental/workflows/knowledge-base.d.ts.map +0 -1
  62. package/dist/commands/experimental/workflows/knowledge-base.js +0 -319
  63. package/dist/commands/experimental/workflows/knowledge-base.js.map +0 -1
  64. package/dist/commands/experimental/workflows/knowledge-ingest.d.ts +0 -12
  65. package/dist/commands/experimental/workflows/knowledge-ingest.d.ts.map +0 -1
  66. package/dist/commands/experimental/workflows/knowledge-ingest.js +0 -251
  67. package/dist/commands/experimental/workflows/knowledge-ingest.js.map +0 -1
  68. package/dist/commands/experimental/workflows/lead-gen.d.ts +0 -11
  69. package/dist/commands/experimental/workflows/lead-gen.d.ts.map +0 -1
  70. package/dist/commands/experimental/workflows/lead-gen.js +0 -257
  71. package/dist/commands/experimental/workflows/lead-gen.js.map +0 -1
  72. package/dist/commands/experimental/workflows/lead-research.d.ts +0 -11
  73. package/dist/commands/experimental/workflows/lead-research.d.ts.map +0 -1
  74. package/dist/commands/experimental/workflows/lead-research.js +0 -146
  75. package/dist/commands/experimental/workflows/lead-research.js.map +0 -1
  76. package/dist/commands/experimental/workflows/market-research.d.ts +0 -11
  77. package/dist/commands/experimental/workflows/market-research.d.ts.map +0 -1
  78. package/dist/commands/experimental/workflows/market-research.js +0 -260
  79. package/dist/commands/experimental/workflows/market-research.js.map +0 -1
  80. package/dist/commands/experimental/workflows/qa.d.ts +0 -11
  81. package/dist/commands/experimental/workflows/qa.d.ts.map +0 -1
  82. package/dist/commands/experimental/workflows/qa.js +0 -184
  83. package/dist/commands/experimental/workflows/qa.js.map +0 -1
  84. package/dist/commands/experimental/workflows/research-papers.d.ts +0 -11
  85. package/dist/commands/experimental/workflows/research-papers.d.ts.map +0 -1
  86. package/dist/commands/experimental/workflows/research-papers.js +0 -151
  87. package/dist/commands/experimental/workflows/research-papers.js.map +0 -1
  88. package/dist/commands/experimental/workflows/seo-audit.d.ts +0 -11
  89. package/dist/commands/experimental/workflows/seo-audit.d.ts.map +0 -1
  90. package/dist/commands/experimental/workflows/seo-audit.js +0 -155
  91. package/dist/commands/experimental/workflows/seo-audit.js.map +0 -1
  92. package/dist/commands/experimental/workflows/shop.d.ts +0 -11
  93. package/dist/commands/experimental/workflows/shop.d.ts.map +0 -1
  94. package/dist/commands/experimental/workflows/shop.js +0 -155
  95. package/dist/commands/experimental/workflows/shop.js.map +0 -1
@@ -1,319 +0,0 @@
1
- "use strict";
2
- /**
3
- * Workflow: Knowledge Base
4
- *
5
- * A single command that adapts based on the user's goal: local reference docs,
6
- * RAG-ready chunks, fine-tuning datasets, or full doc site mirrors. All output
7
- * follows the `.firecrawl/<hostname>/<path>/index.md` convention.
8
- */
9
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- var desc = Object.getOwnPropertyDescriptor(m, k);
12
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
13
- desc = { enumerable: true, get: function() { return m[k]; } };
14
- }
15
- Object.defineProperty(o, k2, desc);
16
- }) : (function(o, m, k, k2) {
17
- if (k2 === undefined) k2 = k;
18
- o[k2] = m[k];
19
- }));
20
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
21
- Object.defineProperty(o, "default", { enumerable: true, value: v });
22
- }) : function(o, v) {
23
- o["default"] = v;
24
- });
25
- var __importStar = (this && this.__importStar) || (function () {
26
- var ownKeys = function(o) {
27
- ownKeys = Object.getOwnPropertyNames || function (o) {
28
- var ar = [];
29
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
30
- return ar;
31
- };
32
- return ownKeys(o);
33
- };
34
- return function (mod) {
35
- if (mod && mod.__esModule) return mod;
36
- var result = {};
37
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
38
- __setModuleDefault(result, mod);
39
- return result;
40
- };
41
- })();
42
- Object.defineProperty(exports, "__esModule", { value: true });
43
- exports.register = register;
44
- const backends_1 = require("../backends");
45
- const shared_1 = require("../shared");
46
- // ─── Input gathering ────────────────────────────────────────────────────────
47
- async function gatherInputs(prefill) {
48
- const { input, select } = await Promise.resolve().then(() => __importStar(require('@inquirer/prompts')));
49
- const rawSource = prefill?.source ||
50
- (await input({
51
- message: 'What do you want to build a knowledge base from? (URL or topic)',
52
- validate: (0, shared_1.validateRequired)('URL or topic'),
53
- }));
54
- const goal = await select({
55
- message: 'What are you building this for?',
56
- choices: [
57
- {
58
- name: 'Local reference (organized markdown files)',
59
- value: 'reference',
60
- },
61
- {
62
- name: 'RAG / embedding pipeline (chunked, with metadata)',
63
- value: 'rag',
64
- },
65
- { name: 'Fine-tuning dataset (JSONL training data)', value: 'train' },
66
- { name: 'Documentation scrape (mirror a doc site)', value: 'docs' },
67
- ],
68
- });
69
- let trainFormat = '';
70
- let trainExamples = '';
71
- if (goal === 'train') {
72
- trainFormat = (await select({
73
- message: 'Training data format?',
74
- choices: [
75
- { name: 'OpenAI JSONL (messages array)', value: 'openai' },
76
- { name: 'Alpaca (instruction/input/output)', value: 'alpaca' },
77
- { name: 'ShareGPT (conversations)', value: 'sharegpt' },
78
- ],
79
- }));
80
- trainExamples = await input({
81
- message: 'Roughly how many training examples?',
82
- default: '100',
83
- });
84
- }
85
- const depth = await select({
86
- message: 'How thorough?',
87
- choices: [
88
- { name: 'Quick (5-10 sources)', value: 'quick' },
89
- { name: 'Thorough (15-25 sources)', value: 'thorough' },
90
- { name: 'Exhaustive (25+ sources)', value: 'exhaustive' },
91
- ],
92
- });
93
- const context = await input({
94
- message: 'Any specific focus or instructions? (leave blank to skip)',
95
- default: '',
96
- });
97
- const outputDir = await input({
98
- message: 'Output directory?',
99
- default: '.firecrawl/',
100
- });
101
- return {
102
- source: (0, shared_1.normalizeSource)(rawSource),
103
- goal,
104
- depth,
105
- context,
106
- outputDir,
107
- trainFormat,
108
- trainExamples,
109
- };
110
- }
111
- // ─── System prompt ──────────────────────────────────────────────────────────
112
- const FILE_CONVENTION = `## File Organization
113
-
114
- **IMPORTANT:** Follow the same structure as \`firecrawl download\`. Save all files under \`.firecrawl/\` using nested directories that mirror each URL's hostname and path:
115
-
116
- \`\`\`
117
- .firecrawl/
118
- <hostname>/
119
- <path>/
120
- index.md # Page content as clean markdown
121
- \`\`\`
122
-
123
- For example, \`https://docs.stripe.com/api/charges\` becomes:
124
- \`\`\`
125
- .firecrawl/docs.stripe.com/api/charges/index.md
126
- \`\`\`
127
-
128
- Strip \`www.\` from hostnames. Each page gets its own directory with an \`index.md\` inside it.`;
129
- function buildGoalInstructions(opts) {
130
- switch (opts.goal) {
131
- case 'reference':
132
- return `${FILE_CONVENTION}
133
-
134
- Also create these at the root of \`${opts.outputDir}\`:
135
- - \`index.md\` -- Table of contents with links to all scraped pages
136
- - \`sources.json\` -- All URLs scraped with metadata (title, type, url)
137
-
138
- Each markdown file should have frontmatter:
139
- \`\`\`yaml
140
- ---
141
- title: "Page Title"
142
- url: "https://..."
143
- source: "Source Name"
144
- type: "docs | article | tutorial | reference | discussion"
145
- ---
146
- \`\`\`
147
-
148
- Focus on clean, readable markdown. Preserve code examples and formatting.`;
149
- case 'rag':
150
- return `${FILE_CONVENTION}
151
-
152
- After scraping, chunk each page into embedding-ready pieces (500-1500 tokens). Save chunks alongside the source:
153
- \`\`\`
154
- .firecrawl/<hostname>/<path>/
155
- index.md # Full page content
156
- chunks/
157
- 001.md # Chunk 1
158
- 002.md # Chunk 2
159
- \`\`\`
160
-
161
- Each chunk file should have frontmatter:
162
- \`\`\`yaml
163
- ---
164
- title: "Page Title"
165
- url: "https://..."
166
- chunk: 1
167
- total_chunks: 5
168
- section: "Section Name"
169
- ---
170
- \`\`\`
171
-
172
- Also create \`${opts.outputDir}/manifest.json\` listing every chunk with its metadata for easy ingestion into a vector store.`;
173
- case 'train':
174
- return `${FILE_CONVENTION}
175
-
176
- Scrape source pages into the \`.firecrawl/\` directory structure first, then generate training data from the scraped content.
177
-
178
- ## Training Data Format
179
-
180
- ${opts.trainFormat === 'openai'
181
- ? `OpenAI fine-tuning JSONL. Each line:
182
- \`\`\`json
183
- {"messages": [{"role": "system", "content": "..."}, {"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]}
184
- \`\`\``
185
- : opts.trainFormat === 'alpaca'
186
- ? `Alpaca format JSONL. Each line:
187
- \`\`\`json
188
- {"instruction": "...", "input": "...", "output": "..."}
189
- \`\`\``
190
- : `ShareGPT conversation JSONL. Each line:
191
- \`\`\`json
192
- {"conversations": [{"from": "human", "value": "..."}, {"from": "gpt", "value": "..."}]}
193
- \`\`\``}
194
-
195
- Target ~${opts.trainExamples} examples.
196
-
197
- Save the dataset to \`training-data.jsonl\` in the current directory.
198
-
199
- Also save \`training-metadata.json\` with:
200
- - Total examples generated
201
- - Sources used (URLs)
202
- - Topic coverage breakdown
203
- - Format used
204
-
205
- ### Quality Guidelines
206
-
207
- - Each example should be self-contained and accurate
208
- - Vary the instruction style (questions, commands, scenarios)
209
- - Include code examples where relevant
210
- - Remove boilerplate, navigation, and ads from scraped content
211
- - Cite the source URL in a metadata field for traceability
212
- - Deduplicate similar examples`;
213
- case 'docs':
214
- return `${FILE_CONVENTION}
215
-
216
- Also create \`${opts.outputDir}/index.md\` as a table of contents linking to all scraped pages, organized by section.
217
-
218
- Each markdown file should have frontmatter:
219
- \`\`\`yaml
220
- ---
221
- title: "Page Title"
222
- url: "https://..."
223
- section: "Section Name"
224
- ---
225
- \`\`\`
226
-
227
- Be thorough. Scrape every page, preserve all code examples. This content will be used as LLM context, so accuracy matters.`;
228
- default:
229
- return FILE_CONVENTION;
230
- }
231
- }
232
- function buildAgentStrategy(goal) {
233
- switch (goal) {
234
- case 'docs':
235
- return `## Agent Assignments
236
-
237
- Spawn agents based on the doc structure:
238
- 1. **Section Agent** (one per major section) -- Scrape all pages in the section. Save each page as clean markdown. Preserve code examples and formatting.
239
-
240
- Start by mapping the site with \`firecrawl map\` to discover all pages, then divide by section.`;
241
- case 'train':
242
- return `## Agent Assignments
243
-
244
- Spawn agents by source type:
245
- 1. **Documentation Agent** -- Scrape official docs. Generate instruction/response pairs from doc sections (e.g., "How do I X?" with the answer from docs).
246
- 2. **Tutorial Agent** -- Scrape tutorials and how-to articles. Generate step-by-step instruction pairs.
247
- 3. **Q&A Agent** -- Scrape Stack Overflow, GitHub discussions, forums. Extract real question/answer pairs.
248
- 4. **Reference Agent** -- Scrape reference material. Generate factual Q&A pairs.`;
249
- default:
250
- return `## Agent Assignments
251
-
252
- Spawn agents by source type:
253
- 1. **Official Docs Agent** -- Find and scrape official documentation, reference material, specs.
254
- 2. **Articles & Tutorials Agent** -- Find and scrape the best articles, blog posts, tutorials.
255
- 3. **Community & Discussions Agent** -- Find and scrape relevant forum posts, Stack Overflow answers, GitHub discussions.
256
- 4. **Reference Agent** -- Wikipedia, glossaries, standards documents, whitepapers.
257
-
258
- Adjust agents based on what sources exist for the topic.`;
259
- }
260
- }
261
- function buildSystemPrompt(opts) {
262
- const depthInstructions = {
263
- quick: 'Find and scrape 5-10 of the best sources.',
264
- thorough: 'Find and scrape 15-25 sources covering different perspectives.',
265
- exhaustive: 'Find and scrape 25+ sources including primary docs, articles, tutorials, and reference material.',
266
- };
267
- return `You are a knowledge base team lead powered by Firecrawl. You scrape web content and organize it into structured, LLM-ready formats.
268
-
269
- ${shared_1.FIRECRAWL_TOOLS_BLOCK}
270
-
271
- ## Depth
272
-
273
- ${depthInstructions[opts.depth]}
274
-
275
- ## Your Strategy
276
-
277
- You are a **team lead**. Your job is to:
278
-
279
- 1. **Find the best sources** -- ${opts.goal === 'docs' ? 'Map the documentation site to discover all pages.' : 'Search broadly to identify the most valuable sources on the topic.'}
280
- 2. **Spawn parallel subagents** -- Divide the work across agents. Each scrapes their assigned sources.
281
- 3. **Collect and organize** -- Build the final output structure from all agent results.
282
-
283
- ${buildAgentStrategy(opts.goal)}
284
-
285
- ${shared_1.SUBAGENT_INSTRUCTIONS}
286
-
287
- ${buildGoalInstructions(opts)}
288
-
289
- ---
290
-
291
- Tell the user the output path when done.
292
-
293
- Start immediately.`;
294
- }
295
- // ─── Command registration ───────────────────────────────────────────────────
296
- function register(parentCmd, backend) {
297
- const config = backends_1.BACKENDS[backend];
298
- parentCmd
299
- .command('knowledge-base')
300
- .description('Build a knowledge base from web content (docs, RAG, fine-tuning)')
301
- .argument('[source]', 'URL or topic to build from')
302
- .option('-y, --yes', 'Auto-approve all tool permissions')
303
- .action(async (source, options) => {
304
- const inputs = await gatherInputs(source ? { source } : undefined);
305
- const skipPermissions = options.yes || (await (0, shared_1.askPermissionMode)(backend));
306
- console.log(`\nLaunching ${config.displayName}...\n`);
307
- (0, backends_1.launchAgent)(backend, buildSystemPrompt({
308
- goal: inputs.goal,
309
- depth: inputs.depth,
310
- outputDir: inputs.outputDir,
311
- trainFormat: inputs.trainFormat,
312
- trainExamples: inputs.trainExamples,
313
- }), (0, shared_1.buildMessage)([
314
- `Build a knowledge base from: ${inputs.source}`,
315
- inputs.context,
316
- ]), skipPermissions);
317
- });
318
- }
319
- //# sourceMappingURL=knowledge-base.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"knowledge-base.js","sourceRoot":"","sources":["../../../../src/commands/experimental/workflows/knowledge-base.ts"],"names":[],"mappings":";AAAA;;;;;;GAMG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA2TH,4BAgCC;AAxVD,0CAAkE;AAClE,sCAOmB;AAcnB,+EAA+E;AAE/E,KAAK,UAAU,YAAY,CAAC,OAA6B;IACvD,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,wDAAa,mBAAmB,GAAC,CAAC;IAE5D,MAAM,SAAS,GACb,OAAO,EAAE,MAAM;QACf,CAAC,MAAM,KAAK,CAAC;YACX,OAAO,EACL,iEAAiE;YACnE,QAAQ,EAAE,IAAA,yBAAgB,EAAC,cAAc,CAAC;SAC3C,CAAC,CAAC,CAAC;IAEN,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC;QACxB,OAAO,EAAE,iCAAiC;QAC1C,OAAO,EAAE;YACP;gBACE,IAAI,EAAE,4CAA4C;gBAClD,KAAK,EAAE,WAAW;aACnB;YACD;gBACE,IAAI,EAAE,mDAAmD;gBACzD,KAAK,EAAE,KAAK;aACb;YACD,EAAE,IAAI,EAAE,2CAA2C,EAAE,KAAK,EAAE,OAAO,EAAE;YACrE,EAAE,IAAI,EAAE,0CAA0C,EAAE,KAAK,EAAE,MAAM,EAAE;SACpE;KACF,CAAC,CAAC;IAEH,IAAI,WAAW,GAAG,EAAE,CAAC;IACrB,IAAI,aAAa,GAAG,EAAE,CAAC;IAEvB,IAAI,IAAI,KAAK,OAAO,EAAE,CAAC;QACrB,WAAW,GAAG,CAAC,MAAM,MAAM,CAAC;YAC1B,OAAO,EAAE,uBAAuB;YAChC,OAAO,EAAE;gBACP,EAAE,IAAI,EAAE,+BAA+B,EAAE,KAAK,EAAE,QAAQ,EAAE;gBAC1D,EAAE,IAAI,EAAE,mCAAmC,EAAE,KAAK,EAAE,QAAQ,EAAE;gBAC9D,EAAE,IAAI,EAAE,0BAA0B,EAAE,KAAK,EAAE,UAAU,EAAE;aACxD;SACF,CAAC,CAAW,CAAC;QAEd,aAAa,GAAG,MAAM,KAAK,CAAC;YAC1B,OAAO,EAAE,qCAAqC;YAC9C,OAAO,EAAE,KAAK;SACf,CAAC,CAAC;IACL,CAAC;IAED,MAAM,KAAK,GAAG,MAAM,MAAM,CAAC;QACzB,OAAO,EAAE,eAAe;QACxB,OAAO,EAAE;YACP,EAAE,IAAI,EAAE,sBAAsB,EAAE,KAAK,EAAE,OAAO,EAAE;YAChD,EAAE,IAAI,EAAE,0BAA0B,EAAE,KAAK,EAAE,UAAU,EAAE;YACvD,EAAE,IAAI,EAAE,0BAA0B,EAAE,KAAK,EAAE,YAAY,EAAE;SAC1D;KACF,CAAC,CAAC;IAEH,MAAM,OAAO,GAAG,MAAM,KAAK,CAAC;QAC1B,OAAO,EAAE,2DAA2D;QACpE,OAAO,EAAE,EAAE;KACZ,CAAC,CAAC;IAEH,MAAM,SAAS,GAAG,MAAM,KAAK,CAAC;QAC5B,OAAO,EAAE,mBAAmB;QAC5B,OAAO,EAAE,aAAa;KACvB,CAAC,CAAC;IAEH,OAAO;QACL,MAAM,EAAE,IAAA,wBAAe,EAAC,SAAS,CAAC;QAClC,IAAI;QACJ,KAAK;QACL,OAAO;QACP,SAAS;QACT,WAAW;QACX,aAAa;KACd,CAAC;AACJ,CAAC;AAED,+EAA+E;AAE/E,MAAM,eAAe,GAAG;;;;;;;;;;;;;;;;gGAgBwE,CAAC;AAEjG,SAAS,qBAAqB,CAAC,IAK9B;IACC,QAAQ,IAAI,CAAC,IAAI,EAAE,CAAC;QAClB,KAAK,WAAW;YACd,OAAO,GAAG,eAAe;;qCAEM,IAAI,CAAC,SAAS;;;;;;;;;;;;;;0EAcuB,CAAC;QAEvE,KAAK,KAAK;YACR,OAAO,GAAG,eAAe;;;;;;;;;;;;;;;;;;;;;;gBAsBf,IAAI,CAAC,SAAS,gGAAgG,CAAC;QAE3H,KAAK,OAAO;YACV,OAAO,GAAG,eAAe;;;;;;EAO7B,IAAI,CAAC,WAAW,KAAK,QAAQ;gBAC3B,CAAC,CAAC;;;OAGC;gBACH,CAAC,CAAC,IAAI,CAAC,WAAW,KAAK,QAAQ;oBAC7B,CAAC,CAAC;;;OAGD;oBACD,CAAC,CAAC;;;OAIR;;UAEU,IAAI,CAAC,aAAa;;;;;;;;;;;;;;;;;+BAiBG,CAAC;QAE5B,KAAK,MAAM;YACT,OAAO,GAAG,eAAe;;gBAEf,IAAI,CAAC,SAAS;;;;;;;;;;;2HAW6F,CAAC;QAExH;YACE,OAAO,eAAe,CAAC;IAC3B,CAAC;AACH,CAAC;AAED,SAAS,kBAAkB,CAAC,IAAY;IACtC,QAAQ,IAAI,EAAE,CAAC;QACb,KAAK,MAAM;YACT,OAAO;;;;;gGAKmF,CAAC;QAE7F,KAAK,OAAO;YACV,OAAO;;;;;;iFAMoE,CAAC;QAE9E;YACE,OAAO;;;;;;;;yDAQ4C,CAAC;IACxD,CAAC;AACH,CAAC;AAED,SAAS,iBAAiB,CAAC,IAM1B;IACC,MAAM,iBAAiB,GAA2B;QAChD,KAAK,EAAE,2CAA2C;QAClD,QAAQ,EAAE,gEAAgE;QAC1E,UAAU,EACR,kGAAkG;KACrG,CAAC;IAEF,OAAO;;EAEP,8BAAqB;;;;EAIrB,iBAAiB,CAAC,IAAI,CAAC,KAAK,CAAC;;;;;;kCAMG,IAAI,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,mDAAmD,CAAC,CAAC,CAAC,oEAAoE;;;;EAIjL,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC;;EAE7B,8BAAqB;;EAErB,qBAAqB,CAAC,IAAI,CAAC;;;;;;mBAMV,CAAC;AACpB,CAAC;AAED,+EAA+E;AAE/E,SAAgB,QAAQ,CAAC,SAAkB,EAAE,OAAgB;IAC3D,MAAM,MAAM,GAAG,mBAAQ,CAAC,OAAO,CAAC,CAAC;IAEjC,SAAS;SACN,OAAO,CAAC,gBAAgB,CAAC;SACzB,WAAW,CACV,kEAAkE,CACnE;SACA,QAAQ,CAAC,UAAU,EAAE,4BAA4B,CAAC;SAClD,MAAM,CAAC,WAAW,EAAE,mCAAmC,CAAC;SACxD,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,EAAE;QAChC,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;QAEnE,MAAM,eAAe,GAAG,OAAO,CAAC,GAAG,IAAI,CAAC,MAAM,IAAA,0BAAiB,EAAC,OAAO,CAAC,CAAC,CAAC;QAC1E,OAAO,CAAC,GAAG,CAAC,eAAe,MAAM,CAAC,WAAW,OAAO,CAAC,CAAC;QAEtD,IAAA,sBAAW,EACT,OAAO,EACP,iBAAiB,CAAC;YAChB,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,KAAK,EAAE,MAAM,CAAC,KAAK;YACnB,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,aAAa,EAAE,MAAM,CAAC,aAAa;SACpC,CAAC,EACF,IAAA,qBAAY,EAAC;YACX,gCAAgC,MAAM,CAAC,MAAM,EAAE;YAC/C,MAAM,CAAC,OAAO;SACf,CAAC,EACF,eAAe,CAChB,CAAC;IACJ,CAAC,CAAC,CAAC;AACP,CAAC"}
@@ -1,12 +0,0 @@
1
- /**
2
- * Workflow: Knowledge Base Ingestion
3
- *
4
- * Navigates auth-gated documentation portals using saved browser profiles,
5
- * paginates through articles and sections, and extracts everything into
6
- * structured JSON. Built for portals that require login, have pagination,
7
- * or use JS-heavy rendering that static scraping can't handle.
8
- */
9
- import { Command } from 'commander';
10
- import { type Backend } from '../backends';
11
- export declare function register(parentCmd: Command, backend: Backend): void;
12
- //# sourceMappingURL=knowledge-ingest.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"knowledge-ingest.d.ts","sourceRoot":"","sources":["../../../../src/commands/experimental/workflows/knowledge-ingest.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,KAAK,OAAO,EAAyB,MAAM,aAAa,CAAC;AAuNlE,wBAAgB,QAAQ,CAAC,SAAS,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,GAAG,IAAI,CA+BnE"}
@@ -1,251 +0,0 @@
1
- "use strict";
2
- /**
3
- * Workflow: Knowledge Base Ingestion
4
- *
5
- * Navigates auth-gated documentation portals using saved browser profiles,
6
- * paginates through articles and sections, and extracts everything into
7
- * structured JSON. Built for portals that require login, have pagination,
8
- * or use JS-heavy rendering that static scraping can't handle.
9
- */
10
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
11
- if (k2 === undefined) k2 = k;
12
- var desc = Object.getOwnPropertyDescriptor(m, k);
13
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
14
- desc = { enumerable: true, get: function() { return m[k]; } };
15
- }
16
- Object.defineProperty(o, k2, desc);
17
- }) : (function(o, m, k, k2) {
18
- if (k2 === undefined) k2 = k;
19
- o[k2] = m[k];
20
- }));
21
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
22
- Object.defineProperty(o, "default", { enumerable: true, value: v });
23
- }) : function(o, v) {
24
- o["default"] = v;
25
- });
26
- var __importStar = (this && this.__importStar) || (function () {
27
- var ownKeys = function(o) {
28
- ownKeys = Object.getOwnPropertyNames || function (o) {
29
- var ar = [];
30
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
31
- return ar;
32
- };
33
- return ownKeys(o);
34
- };
35
- return function (mod) {
36
- if (mod && mod.__esModule) return mod;
37
- var result = {};
38
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
39
- __setModuleDefault(result, mod);
40
- return result;
41
- };
42
- })();
43
- Object.defineProperty(exports, "__esModule", { value: true });
44
- exports.register = register;
45
- const backends_1 = require("../backends");
46
- const shared_1 = require("../shared");
47
- // ─── Input gathering ────────────────────────────────────────────────────────
48
- async function gatherInputs(prefill) {
49
- if (prefill?.url) {
50
- return {
51
- url: prefill.url,
52
- profile: '',
53
- format: 'json',
54
- maxPages: '100',
55
- context: '',
56
- };
57
- }
58
- const { input, select } = await Promise.resolve().then(() => __importStar(require('@inquirer/prompts')));
59
- const url = await input({
60
- message: 'URL of the docs portal or knowledge base?',
61
- validate: (0, shared_1.validateRequired)('URL'),
62
- });
63
- const profile = await input({
64
- message: 'Browser profile for auth? (leave blank for public/anonymous access)',
65
- default: '',
66
- });
67
- const format = await select({
68
- message: 'Output format?',
69
- choices: [
70
- {
71
- name: 'Structured JSON (articles with metadata)',
72
- value: 'json',
73
- },
74
- {
75
- name: 'Markdown files (one per article, .firecrawl/ convention)',
76
- value: 'markdown',
77
- },
78
- {
79
- name: 'Single merged file (all content in one document)',
80
- value: 'merged',
81
- },
82
- ],
83
- });
84
- const maxPages = await input({
85
- message: 'Max pages to extract?',
86
- default: '100',
87
- });
88
- const context = await input({
89
- message: 'Any specific sections or topics to focus on? (leave blank for everything)',
90
- default: '',
91
- });
92
- return { url, profile, format, maxPages, context };
93
- }
94
- // ─── System prompt ──────────────────────────────────────────────────────────
95
- function buildSystemPrompt(opts) {
96
- const profileBlock = opts.profile
97
- ? `\n### Authentication\n\nUse the saved browser profile \`${opts.profile}\` to access auth-gated content:\n\`\`\`bash\nfirecrawl browser "open <url>" --profile ${opts.profile}\n\`\`\`\nAfter the first \`open\` with \`--profile\`, subsequent browser commands don't need the flag.`
98
- : '';
99
- const outputInstructions = {
100
- json: `Save results to \`knowledge-base.json\` in the current directory. Tell the user the file path when done.
101
-
102
- Use this schema:
103
- \`\`\`json
104
- {
105
- "source": "string (portal name)",
106
- "url": "string (base URL)",
107
- "extractedAt": "ISO-8601",
108
- "totalArticles": 0,
109
- "sections": [
110
- {
111
- "name": "string",
112
- "articles": [
113
- {
114
- "title": "string",
115
- "url": "string",
116
- "section": "string",
117
- "content": "string (full markdown content)",
118
- "metadata": {
119
- "lastUpdated": "string",
120
- "author": "string",
121
- "tags": ["string"]
122
- }
123
- }
124
- ]
125
- }
126
- ]
127
- }
128
- \`\`\``,
129
- markdown: `Save each article as a separate markdown file following the .firecrawl/ convention:
130
- \`\`\`
131
- .firecrawl/<hostname>/<path>/index.md
132
- \`\`\`
133
-
134
- Each file should have frontmatter:
135
- \`\`\`yaml
136
- ---
137
- title: "Article Title"
138
- url: "https://..."
139
- section: "Section Name"
140
- lastUpdated: "date if available"
141
- ---
142
- \`\`\`
143
-
144
- Also create \`.firecrawl/index.md\` as a table of contents. Tell the user the output path when done.`,
145
- merged: `Save all content to a single \`knowledge-base.md\` file in the current directory with a table of contents at the top. Each article should be a section with its title as a heading. Tell the user the file path when done.`,
146
- };
147
- return `You are a knowledge base ingestion agent powered by Firecrawl. You use a real cloud browser to navigate documentation portals -- including auth-gated ones -- paginate through all articles, and extract content into structured formats.
148
-
149
- ## STEP 1: Launch Browser and Open Live View
150
-
151
- Before anything else, launch a browser session so the user can watch:
152
-
153
- \`\`\`bash
154
- firecrawl browser launch-session --json
155
- \`\`\`
156
-
157
- Extract the \`interactiveLiveViewUrl\` from the JSON output and open it (NOT the regular \`liveViewUrl\` -- the interactive one lets the user click and interact):
158
-
159
- \`\`\`bash
160
- open "<interactiveLiveViewUrl>" # macOS
161
- xdg-open "<interactiveLiveViewUrl>" # Linux
162
- \`\`\`
163
-
164
- If the \`open\` command fails, print the URL clearly.
165
- ${profileBlock}
166
-
167
- ## STEP 2: Map the Portal Structure
168
-
169
- 1. Open the portal's main page / table of contents / sidebar
170
- 2. Snapshot to see the navigation structure
171
- 3. Identify all sections, categories, or sidebar nav items
172
- 4. Build a list of all article URLs to visit
173
-
174
- If the portal has a sitemap or API docs index, use that. Otherwise, click through sidebar/nav items to discover pages.
175
-
176
- \`\`\`bash
177
- firecrawl browser "open <url>"
178
- firecrawl browser "snapshot"
179
- firecrawl browser "scrape"
180
- \`\`\`
181
-
182
- Also try \`firecrawl map <url>\` to discover URLs programmatically -- combine with browser navigation for auth-gated content.
183
-
184
- ## STEP 3: Extract Articles
185
-
186
- For each article/page:
187
-
188
- 1. **Navigate** to the article URL
189
- 2. **Wait** for content to fully render (some portals are JS-heavy)
190
- 3. **Scrape** the full page content as markdown
191
- 4. **Extract metadata** -- title, section, last updated date, author, tags
192
- 5. **Handle pagination** within articles (multi-page docs, "Next" buttons)
193
- 6. **Navigate** to the next article
194
-
195
- ### Pagination strategies:
196
- - **Sidebar navigation**: Click through each sidebar item systematically
197
- - **"Next article" links**: Follow sequential article links
198
- - **Paginated lists**: Click page numbers or "Load More"
199
- - **Infinite scroll**: Scroll down and snapshot to load more items
200
- - **Search/filter**: If the portal has search, use it to find specific sections
201
-
202
- ### Browser commands:
203
- \`\`\`bash
204
- firecrawl browser "open <url>"
205
- firecrawl browser "snapshot"
206
- firecrawl browser "click @<ref>"
207
- firecrawl browser "scroll down"
208
- firecrawl browser "scrape"
209
- \`\`\`
210
-
211
- ## Limits
212
-
213
- Extract up to ${opts.maxPages} pages. Prioritize breadth (cover all sections) over depth (every sub-article) if you're approaching the limit.
214
-
215
- ## Output Format
216
-
217
- ${outputInstructions[opts.format]}
218
-
219
- ## Quality Guidelines
220
-
221
- - Preserve code examples, tables, and formatting
222
- - Strip navigation chrome, headers, footers -- extract only article content
223
- - Note any pages that failed to load or were access-restricted
224
- - Track progress: print "Extracted X/Y articles..." periodically
225
-
226
- Do everything sequentially. Start immediately.`;
227
- }
228
- // ─── Command registration ───────────────────────────────────────────────────
229
- function register(parentCmd, backend) {
230
- const config = backends_1.BACKENDS[backend];
231
- parentCmd
232
- .command('knowledge-ingest')
233
- .description('Extract auth-gated docs portals into structured JSON or markdown')
234
- .argument('[url]', 'URL of the docs portal or knowledge base')
235
- .option('-y, --yes', 'Auto-approve all tool permissions')
236
- .action(async (url, options) => {
237
- const inputs = await gatherInputs(url ? { url } : undefined);
238
- const parts = [`Ingest knowledge base from: ${inputs.url}`];
239
- if (inputs.context)
240
- parts.push(`Focus on: ${inputs.context}`);
241
- const userMessage = parts.join('. ') + '.';
242
- const skipPermissions = true;
243
- console.log(`\nLaunching ${config.displayName}...\n`);
244
- (0, backends_1.launchAgent)(backend, buildSystemPrompt({
245
- profile: inputs.profile,
246
- format: inputs.format,
247
- maxPages: inputs.maxPages,
248
- }), userMessage, skipPermissions);
249
- });
250
- }
251
- //# sourceMappingURL=knowledge-ingest.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"knowledge-ingest.js","sourceRoot":"","sources":["../../../../src/commands/experimental/workflows/knowledge-ingest.ts"],"names":[],"mappings":";AAAA;;;;;;;GAOG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA0NH,4BA+BC;AAtPD,0CAAkE;AAClE,sCAA6C;AAY7C,+EAA+E;AAE/E,KAAK,UAAU,YAAY,CAAC,OAA0B;IACpD,IAAI,OAAO,EAAE,GAAG,EAAE,CAAC;QACjB,OAAO;YACL,GAAG,EAAE,OAAO,CAAC,GAAG;YAChB,OAAO,EAAE,EAAE;YACX,MAAM,EAAE,MAAM;YACd,QAAQ,EAAE,KAAK;YACf,OAAO,EAAE,EAAE;SACZ,CAAC;IACJ,CAAC;IAED,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,wDAAa,mBAAmB,GAAC,CAAC;IAE5D,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC;QACtB,OAAO,EAAE,2CAA2C;QACpD,QAAQ,EAAE,IAAA,yBAAgB,EAAC,KAAK,CAAC;KAClC,CAAC,CAAC;IAEH,MAAM,OAAO,GAAG,MAAM,KAAK,CAAC;QAC1B,OAAO,EACL,qEAAqE;QACvE,OAAO,EAAE,EAAE;KACZ,CAAC,CAAC;IAEH,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC;QAC1B,OAAO,EAAE,gBAAgB;QACzB,OAAO,EAAE;YACP;gBACE,IAAI,EAAE,0CAA0C;gBAChD,KAAK,EAAE,MAAM;aACd;YACD;gBACE,IAAI,EAAE,0DAA0D;gBAChE,KAAK,EAAE,UAAU;aAClB;YACD;gBACE,IAAI,EAAE,kDAAkD;gBACxD,KAAK,EAAE,QAAQ;aAChB;SACF;KACF,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC;QAC3B,OAAO,EAAE,uBAAuB;QAChC,OAAO,EAAE,KAAK;KACf,CAAC,CAAC;IAEH,MAAM,OAAO,GAAG,MAAM,KAAK,CAAC;QAC1B,OAAO,EACL,2EAA2E;QAC7E,OAAO,EAAE,EAAE;KACZ,CAAC,CAAC;IAEH,OAAO,EAAE,GAAG,EAAE,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,OAAO,EAAE,CAAC;AACrD,CAAC;AAED,+EAA+E;AAE/E,SAAS,iBAAiB,CAAC,IAI1B;IACC,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO;QAC/B,CAAC,CAAC,2DAA2D,IAAI,CAAC,OAAO,0FAA0F,IAAI,CAAC,OAAO,yGAAyG;QACxR,CAAC,CAAC,EAAE,CAAC;IAEP,MAAM,kBAAkB,GAA2B;QACjD,IAAI,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;OA4BH;QACH,QAAQ,EAAE;;;;;;;;;;;;;;;qGAeuF;QACjG,MAAM,EAAE,4NAA4N;KACrO,CAAC;IAEF,OAAO;;;;;;;;;;;;;;;;;;EAkBP,YAAY;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;gBAgDE,IAAI,CAAC,QAAQ;;;;EAI3B,kBAAkB,CAAC,IAAI,CAAC,MAAM,CAAC;;;;;;;;;+CASc,CAAC;AAChD,CAAC;AAED,+EAA+E;AAE/E,SAAgB,QAAQ,CAAC,SAAkB,EAAE,OAAgB;IAC3D,MAAM,MAAM,GAAG,mBAAQ,CAAC,OAAO,CAAC,CAAC;IAEjC,SAAS;SACN,OAAO,CAAC,kBAAkB,CAAC;SAC3B,WAAW,CACV,kEAAkE,CACnE;SACA,QAAQ,CAAC,OAAO,EAAE,0CAA0C,CAAC;SAC7D,MAAM,CAAC,WAAW,EAAE,mCAAmC,CAAC;SACxD,MAAM,CAAC,KAAK,EAAE,GAAG,EAAE,OAAO,EAAE,EAAE;QAC7B,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;QAE7D,MAAM,KAAK,GAAG,CAAC,+BAA+B,MAAM,CAAC,GAAG,EAAE,CAAC,CAAC;QAC5D,IAAI,MAAM,CAAC,OAAO;YAAE,KAAK,CAAC,IAAI,CAAC,aAAa,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC;QAC9D,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,GAAG,CAAC;QAE3C,MAAM,eAAe,GAAG,IAAI,CAAC;QAC7B,OAAO,CAAC,GAAG,CAAC,eAAe,MAAM,CAAC,WAAW,OAAO,CAAC,CAAC;QAEtD,IAAA,sBAAW,EACT,OAAO,EACP,iBAAiB,CAAC;YAChB,OAAO,EAAE,MAAM,CAAC,OAAO;YACvB,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,QAAQ,EAAE,MAAM,CAAC,QAAQ;SAC1B,CAAC,EACF,WAAW,EACX,eAAe,CAChB,CAAC;IACJ,CAAC,CAAC,CAAC;AACP,CAAC"}
@@ -1,11 +0,0 @@
1
- /**
2
- * Workflow: Lead Generation
3
- *
4
- * Uses a cloud browser to fill search forms on prospect databases (Apollo,
5
- * LinkedIn Sales Nav, ZoomInfo, etc.), apply filters, paginate through
6
- * results, and extract contact details at scale into structured formats.
7
- */
8
- import { Command } from 'commander';
9
- import { type Backend } from '../backends';
10
- export declare function register(parentCmd: Command, backend: Backend): void;
11
- //# sourceMappingURL=lead-gen.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"lead-gen.d.ts","sourceRoot":"","sources":["../../../../src/commands/experimental/workflows/lead-gen.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,KAAK,OAAO,EAAyB,MAAM,aAAa,CAAC;AAkOlE,wBAAgB,QAAQ,CAAC,SAAS,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,GAAG,IAAI,CAwCnE"}