@riddledc/openclaw-riddledc 0.4.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHECKSUMS.txt CHANGED
@@ -1,4 +1,4 @@
1
- 008b101829a770aab04361a6432304b6fe7edf4173fed1185339d6f53969b418 dist/index.cjs
1
+ de790b590f73cb048b327164f3121e596860a16678ac1ed67abae5bdb6270ae8 dist/index.cjs
2
2
  94ce04f0e2d84bf64dd68f0500dfdd2f951287a3deccec87f197261961927f6f dist/index.d.cts
3
3
  94ce04f0e2d84bf64dd68f0500dfdd2f951287a3deccec87f197261961927f6f dist/index.d.ts
4
- 52e59ee4fd2c37fd503c9e52add79ef76e5959fdef1d0c5e191b132eba4027db dist/index.js
4
+ 490816f9b8e7241bbc9bde9238d2cf73b918f37b2b8c6c1e045194030118321f dist/index.js
package/dist/index.cjs CHANGED
@@ -299,7 +299,7 @@ function register(api) {
299
299
  api.registerTool(
300
300
  {
301
301
  name: "riddle_steps",
302
- description: 'Riddle: run a workflow in steps mode (goto/click/fill/etc.). Supports authenticated sessions via cookies/localStorage. Returns screenshot + console by default; pass include:["har"] to opt in to HAR capture.',
302
+ description: `Riddle: run a workflow in steps mode (goto/click/fill/screenshot/scrape/map/crawl/etc.). Supports authenticated sessions via cookies/localStorage. Data extraction steps: { scrape: true }, { map: { max_pages?: N } }, { crawl: { max_pages?: N, format?: 'json'|'csv' } }. Returns screenshot + console by default; pass include:["har","data","urls","dataset","sitemap"] for additional artifacts.`,
303
303
  parameters: import_typebox.Type.Object({
304
304
  steps: import_typebox.Type.Array(import_typebox.Type.Record(import_typebox.Type.String(), import_typebox.Type.Any())),
305
305
  timeout_sec: import_typebox.Type.Optional(import_typebox.Type.Number()),
@@ -330,7 +330,7 @@ function register(api) {
330
330
  if (Object.keys(opts).length > 0) payload.options = opts;
331
331
  if (params.include) payload.include = params.include;
332
332
  if (params.harInline) payload.harInline = params.harInline;
333
- const result = await runWithDefaults(api, payload, { include: ["screenshot", "console", "result"] });
333
+ const result = await runWithDefaults(api, payload, { include: ["screenshot", "console", "result", "data", "urls", "dataset", "sitemap"] });
334
334
  return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
335
335
  }
336
336
  },
@@ -339,7 +339,7 @@ function register(api) {
339
339
  api.registerTool(
340
340
  {
341
341
  name: "riddle_script",
342
- description: 'Riddle: run full Playwright code (script mode). Supports authenticated sessions via cookies/localStorage. In scripts, use `await injectLocalStorage()` after navigating to the origin to apply localStorage values. Returns screenshot + console by default; pass include:["har"] to opt in to HAR capture.',
342
+ description: 'Riddle: run full Playwright code (script mode). Supports authenticated sessions via cookies/localStorage. In scripts, use `await injectLocalStorage()` after navigating to the origin to apply localStorage values. Available sandbox helpers: saveScreenshot(label), saveHtml(label), saveJson(name, data), scrape(opts?), map(opts?), crawl(opts?). Returns screenshot + console by default; pass include:["har","data","urls","dataset","sitemap"] for additional artifacts.',
343
343
  parameters: import_typebox.Type.Object({
344
344
  script: import_typebox.Type.String(),
345
345
  timeout_sec: import_typebox.Type.Optional(import_typebox.Type.Number()),
@@ -370,7 +370,122 @@ function register(api) {
370
370
  if (Object.keys(opts).length > 0) payload.options = opts;
371
371
  if (params.include) payload.include = params.include;
372
372
  if (params.harInline) payload.harInline = params.harInline;
373
- const result = await runWithDefaults(api, payload, { include: ["screenshot", "console", "result"] });
373
+ const result = await runWithDefaults(api, payload, { include: ["screenshot", "console", "result", "data", "urls", "dataset", "sitemap"] });
374
+ return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
375
+ }
376
+ },
377
+ { optional: true }
378
+ );
379
+ api.registerTool(
380
+ {
381
+ name: "riddle_scrape",
382
+ description: "Riddle: scrape a URL and extract structured content (title, description, markdown, links, headings, word count). Navigates to the URL first, then extracts. For authenticated scraping, use riddle_script with login steps followed by await scrape().",
383
+ parameters: import_typebox.Type.Object({
384
+ url: import_typebox.Type.String({ description: "URL to scrape" }),
385
+ extract_metadata: import_typebox.Type.Optional(import_typebox.Type.Boolean({ description: "Extract metadata (default: true)" })),
386
+ cookies: import_typebox.Type.Optional(import_typebox.Type.Array(import_typebox.Type.Object({
387
+ name: import_typebox.Type.String(),
388
+ value: import_typebox.Type.String(),
389
+ domain: import_typebox.Type.String(),
390
+ path: import_typebox.Type.Optional(import_typebox.Type.String()),
391
+ secure: import_typebox.Type.Optional(import_typebox.Type.Boolean()),
392
+ httpOnly: import_typebox.Type.Optional(import_typebox.Type.Boolean())
393
+ }), { description: "Cookies to inject for authenticated sessions" })),
394
+ options: import_typebox.Type.Optional(import_typebox.Type.Record(import_typebox.Type.String(), import_typebox.Type.Any()))
395
+ }),
396
+ async execute(_id, params) {
397
+ const scrapeOpts = params.extract_metadata === false ? "{ extract_metadata: false }" : "";
398
+ const payload = {
399
+ url: params.url,
400
+ script: `return await scrape(${scrapeOpts});`,
401
+ options: { ...params.options || {}, returnResult: true }
402
+ };
403
+ if (params.cookies) payload.options.cookies = params.cookies;
404
+ const result = await runWithDefaults(api, payload, { include: ["result", "console"] });
405
+ return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
406
+ }
407
+ },
408
+ { optional: true }
409
+ );
410
+ api.registerTool(
411
+ {
412
+ name: "riddle_map",
413
+ description: "Riddle: discover all URLs on a website by crawling from the given URL. Returns an array of discovered URLs. For authenticated mapping, use riddle_script with login steps followed by await map().",
414
+ parameters: import_typebox.Type.Object({
415
+ url: import_typebox.Type.String({ description: "Starting URL to map from" }),
416
+ max_pages: import_typebox.Type.Optional(import_typebox.Type.Number({ description: "Max pages to crawl (default: 500, max: 5000)" })),
417
+ include_patterns: import_typebox.Type.Optional(import_typebox.Type.Array(import_typebox.Type.String(), { description: "URL patterns to include (glob)" })),
418
+ exclude_patterns: import_typebox.Type.Optional(import_typebox.Type.Array(import_typebox.Type.String(), { description: "URL patterns to exclude (glob)" })),
419
+ respect_robots: import_typebox.Type.Optional(import_typebox.Type.Boolean({ description: "Respect robots.txt (default: true)" })),
420
+ cookies: import_typebox.Type.Optional(import_typebox.Type.Array(import_typebox.Type.Object({
421
+ name: import_typebox.Type.String(),
422
+ value: import_typebox.Type.String(),
423
+ domain: import_typebox.Type.String(),
424
+ path: import_typebox.Type.Optional(import_typebox.Type.String()),
425
+ secure: import_typebox.Type.Optional(import_typebox.Type.Boolean()),
426
+ httpOnly: import_typebox.Type.Optional(import_typebox.Type.Boolean())
427
+ }), { description: "Cookies to inject for authenticated sessions" })),
428
+ options: import_typebox.Type.Optional(import_typebox.Type.Record(import_typebox.Type.String(), import_typebox.Type.Any()))
429
+ }),
430
+ async execute(_id, params) {
431
+ const mapOpts = [];
432
+ if (params.max_pages != null) mapOpts.push(`max_pages: ${params.max_pages}`);
433
+ if (params.include_patterns) mapOpts.push(`include_patterns: ${JSON.stringify(params.include_patterns)}`);
434
+ if (params.exclude_patterns) mapOpts.push(`exclude_patterns: ${JSON.stringify(params.exclude_patterns)}`);
435
+ if (params.respect_robots === false) mapOpts.push("respect_robots: false");
436
+ const optsStr = mapOpts.length > 0 ? `{ ${mapOpts.join(", ")} }` : "";
437
+ const payload = {
438
+ url: params.url,
439
+ script: `return await map(${optsStr});`,
440
+ options: { ...params.options || {}, returnResult: true }
441
+ };
442
+ if (params.cookies) payload.options.cookies = params.cookies;
443
+ const result = await runWithDefaults(api, payload, { include: ["result", "console"] });
444
+ return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
445
+ }
446
+ },
447
+ { optional: true }
448
+ );
449
+ api.registerTool(
450
+ {
451
+ name: "riddle_crawl",
452
+ description: 'Riddle: crawl a website and extract content from each page into a dataset. Returns dataset metadata; use include:["dataset"] to get the full dataset file. For authenticated crawling, use riddle_script with login steps followed by await crawl().',
453
+ parameters: import_typebox.Type.Object({
454
+ url: import_typebox.Type.String({ description: "Starting URL to crawl from" }),
455
+ max_pages: import_typebox.Type.Optional(import_typebox.Type.Number({ description: "Max pages to crawl (default: 100, max: 1000)" })),
456
+ format: import_typebox.Type.Optional(import_typebox.Type.String({ description: "Output format: jsonl, json, csv, zip (default: jsonl)" })),
457
+ js_rendering: import_typebox.Type.Optional(import_typebox.Type.Boolean({ description: "Use full browser rendering (slower but handles SPAs)" })),
458
+ include_patterns: import_typebox.Type.Optional(import_typebox.Type.Array(import_typebox.Type.String(), { description: "URL patterns to include (glob)" })),
459
+ exclude_patterns: import_typebox.Type.Optional(import_typebox.Type.Array(import_typebox.Type.String(), { description: "URL patterns to exclude (glob)" })),
460
+ extract_metadata: import_typebox.Type.Optional(import_typebox.Type.Boolean({ description: "Extract metadata per page (default: true)" })),
461
+ respect_robots: import_typebox.Type.Optional(import_typebox.Type.Boolean({ description: "Respect robots.txt (default: true)" })),
462
+ cookies: import_typebox.Type.Optional(import_typebox.Type.Array(import_typebox.Type.Object({
463
+ name: import_typebox.Type.String(),
464
+ value: import_typebox.Type.String(),
465
+ domain: import_typebox.Type.String(),
466
+ path: import_typebox.Type.Optional(import_typebox.Type.String()),
467
+ secure: import_typebox.Type.Optional(import_typebox.Type.Boolean()),
468
+ httpOnly: import_typebox.Type.Optional(import_typebox.Type.Boolean())
469
+ }), { description: "Cookies to inject for authenticated sessions" })),
470
+ options: import_typebox.Type.Optional(import_typebox.Type.Record(import_typebox.Type.String(), import_typebox.Type.Any()))
471
+ }),
472
+ async execute(_id, params) {
473
+ const crawlOpts = [];
474
+ if (params.max_pages != null) crawlOpts.push(`max_pages: ${params.max_pages}`);
475
+ if (params.format) crawlOpts.push(`format: '${params.format}'`);
476
+ if (params.js_rendering) crawlOpts.push("js_rendering: true");
477
+ if (params.include_patterns) crawlOpts.push(`include_patterns: ${JSON.stringify(params.include_patterns)}`);
478
+ if (params.exclude_patterns) crawlOpts.push(`exclude_patterns: ${JSON.stringify(params.exclude_patterns)}`);
479
+ if (params.extract_metadata === false) crawlOpts.push("extract_metadata: false");
480
+ if (params.respect_robots === false) crawlOpts.push("respect_robots: false");
481
+ const optsStr = crawlOpts.length > 0 ? `{ ${crawlOpts.join(", ")} }` : "";
482
+ const payload = {
483
+ url: params.url,
484
+ script: `return await crawl(${optsStr});`,
485
+ options: { ...params.options || {}, returnResult: true }
486
+ };
487
+ if (params.cookies) payload.options.cookies = params.cookies;
488
+ const result = await runWithDefaults(api, payload, { include: ["result", "console"] });
374
489
  return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
375
490
  }
376
491
  },
package/dist/index.js CHANGED
@@ -275,7 +275,7 @@ function register(api) {
275
275
  api.registerTool(
276
276
  {
277
277
  name: "riddle_steps",
278
- description: 'Riddle: run a workflow in steps mode (goto/click/fill/etc.). Supports authenticated sessions via cookies/localStorage. Returns screenshot + console by default; pass include:["har"] to opt in to HAR capture.',
278
+ description: `Riddle: run a workflow in steps mode (goto/click/fill/screenshot/scrape/map/crawl/etc.). Supports authenticated sessions via cookies/localStorage. Data extraction steps: { scrape: true }, { map: { max_pages?: N } }, { crawl: { max_pages?: N, format?: 'json'|'csv' } }. Returns screenshot + console by default; pass include:["har","data","urls","dataset","sitemap"] for additional artifacts.`,
279
279
  parameters: Type.Object({
280
280
  steps: Type.Array(Type.Record(Type.String(), Type.Any())),
281
281
  timeout_sec: Type.Optional(Type.Number()),
@@ -306,7 +306,7 @@ function register(api) {
306
306
  if (Object.keys(opts).length > 0) payload.options = opts;
307
307
  if (params.include) payload.include = params.include;
308
308
  if (params.harInline) payload.harInline = params.harInline;
309
- const result = await runWithDefaults(api, payload, { include: ["screenshot", "console", "result"] });
309
+ const result = await runWithDefaults(api, payload, { include: ["screenshot", "console", "result", "data", "urls", "dataset", "sitemap"] });
310
310
  return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
311
311
  }
312
312
  },
@@ -315,7 +315,7 @@ function register(api) {
315
315
  api.registerTool(
316
316
  {
317
317
  name: "riddle_script",
318
- description: 'Riddle: run full Playwright code (script mode). Supports authenticated sessions via cookies/localStorage. In scripts, use `await injectLocalStorage()` after navigating to the origin to apply localStorage values. Returns screenshot + console by default; pass include:["har"] to opt in to HAR capture.',
318
+ description: 'Riddle: run full Playwright code (script mode). Supports authenticated sessions via cookies/localStorage. In scripts, use `await injectLocalStorage()` after navigating to the origin to apply localStorage values. Available sandbox helpers: saveScreenshot(label), saveHtml(label), saveJson(name, data), scrape(opts?), map(opts?), crawl(opts?). Returns screenshot + console by default; pass include:["har","data","urls","dataset","sitemap"] for additional artifacts.',
319
319
  parameters: Type.Object({
320
320
  script: Type.String(),
321
321
  timeout_sec: Type.Optional(Type.Number()),
@@ -346,7 +346,122 @@ function register(api) {
346
346
  if (Object.keys(opts).length > 0) payload.options = opts;
347
347
  if (params.include) payload.include = params.include;
348
348
  if (params.harInline) payload.harInline = params.harInline;
349
- const result = await runWithDefaults(api, payload, { include: ["screenshot", "console", "result"] });
349
+ const result = await runWithDefaults(api, payload, { include: ["screenshot", "console", "result", "data", "urls", "dataset", "sitemap"] });
350
+ return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
351
+ }
352
+ },
353
+ { optional: true }
354
+ );
355
+ api.registerTool(
356
+ {
357
+ name: "riddle_scrape",
358
+ description: "Riddle: scrape a URL and extract structured content (title, description, markdown, links, headings, word count). Navigates to the URL first, then extracts. For authenticated scraping, use riddle_script with login steps followed by await scrape().",
359
+ parameters: Type.Object({
360
+ url: Type.String({ description: "URL to scrape" }),
361
+ extract_metadata: Type.Optional(Type.Boolean({ description: "Extract metadata (default: true)" })),
362
+ cookies: Type.Optional(Type.Array(Type.Object({
363
+ name: Type.String(),
364
+ value: Type.String(),
365
+ domain: Type.String(),
366
+ path: Type.Optional(Type.String()),
367
+ secure: Type.Optional(Type.Boolean()),
368
+ httpOnly: Type.Optional(Type.Boolean())
369
+ }), { description: "Cookies to inject for authenticated sessions" })),
370
+ options: Type.Optional(Type.Record(Type.String(), Type.Any()))
371
+ }),
372
+ async execute(_id, params) {
373
+ const scrapeOpts = params.extract_metadata === false ? "{ extract_metadata: false }" : "";
374
+ const payload = {
375
+ url: params.url,
376
+ script: `return await scrape(${scrapeOpts});`,
377
+ options: { ...params.options || {}, returnResult: true }
378
+ };
379
+ if (params.cookies) payload.options.cookies = params.cookies;
380
+ const result = await runWithDefaults(api, payload, { include: ["result", "console"] });
381
+ return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
382
+ }
383
+ },
384
+ { optional: true }
385
+ );
386
+ api.registerTool(
387
+ {
388
+ name: "riddle_map",
389
+ description: "Riddle: discover all URLs on a website by crawling from the given URL. Returns an array of discovered URLs. For authenticated mapping, use riddle_script with login steps followed by await map().",
390
+ parameters: Type.Object({
391
+ url: Type.String({ description: "Starting URL to map from" }),
392
+ max_pages: Type.Optional(Type.Number({ description: "Max pages to crawl (default: 500, max: 5000)" })),
393
+ include_patterns: Type.Optional(Type.Array(Type.String(), { description: "URL patterns to include (glob)" })),
394
+ exclude_patterns: Type.Optional(Type.Array(Type.String(), { description: "URL patterns to exclude (glob)" })),
395
+ respect_robots: Type.Optional(Type.Boolean({ description: "Respect robots.txt (default: true)" })),
396
+ cookies: Type.Optional(Type.Array(Type.Object({
397
+ name: Type.String(),
398
+ value: Type.String(),
399
+ domain: Type.String(),
400
+ path: Type.Optional(Type.String()),
401
+ secure: Type.Optional(Type.Boolean()),
402
+ httpOnly: Type.Optional(Type.Boolean())
403
+ }), { description: "Cookies to inject for authenticated sessions" })),
404
+ options: Type.Optional(Type.Record(Type.String(), Type.Any()))
405
+ }),
406
+ async execute(_id, params) {
407
+ const mapOpts = [];
408
+ if (params.max_pages != null) mapOpts.push(`max_pages: ${params.max_pages}`);
409
+ if (params.include_patterns) mapOpts.push(`include_patterns: ${JSON.stringify(params.include_patterns)}`);
410
+ if (params.exclude_patterns) mapOpts.push(`exclude_patterns: ${JSON.stringify(params.exclude_patterns)}`);
411
+ if (params.respect_robots === false) mapOpts.push("respect_robots: false");
412
+ const optsStr = mapOpts.length > 0 ? `{ ${mapOpts.join(", ")} }` : "";
413
+ const payload = {
414
+ url: params.url,
415
+ script: `return await map(${optsStr});`,
416
+ options: { ...params.options || {}, returnResult: true }
417
+ };
418
+ if (params.cookies) payload.options.cookies = params.cookies;
419
+ const result = await runWithDefaults(api, payload, { include: ["result", "console"] });
420
+ return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
421
+ }
422
+ },
423
+ { optional: true }
424
+ );
425
+ api.registerTool(
426
+ {
427
+ name: "riddle_crawl",
428
+ description: 'Riddle: crawl a website and extract content from each page into a dataset. Returns dataset metadata; use include:["dataset"] to get the full dataset file. For authenticated crawling, use riddle_script with login steps followed by await crawl().',
429
+ parameters: Type.Object({
430
+ url: Type.String({ description: "Starting URL to crawl from" }),
431
+ max_pages: Type.Optional(Type.Number({ description: "Max pages to crawl (default: 100, max: 1000)" })),
432
+ format: Type.Optional(Type.String({ description: "Output format: jsonl, json, csv, zip (default: jsonl)" })),
433
+ js_rendering: Type.Optional(Type.Boolean({ description: "Use full browser rendering (slower but handles SPAs)" })),
434
+ include_patterns: Type.Optional(Type.Array(Type.String(), { description: "URL patterns to include (glob)" })),
435
+ exclude_patterns: Type.Optional(Type.Array(Type.String(), { description: "URL patterns to exclude (glob)" })),
436
+ extract_metadata: Type.Optional(Type.Boolean({ description: "Extract metadata per page (default: true)" })),
437
+ respect_robots: Type.Optional(Type.Boolean({ description: "Respect robots.txt (default: true)" })),
438
+ cookies: Type.Optional(Type.Array(Type.Object({
439
+ name: Type.String(),
440
+ value: Type.String(),
441
+ domain: Type.String(),
442
+ path: Type.Optional(Type.String()),
443
+ secure: Type.Optional(Type.Boolean()),
444
+ httpOnly: Type.Optional(Type.Boolean())
445
+ }), { description: "Cookies to inject for authenticated sessions" })),
446
+ options: Type.Optional(Type.Record(Type.String(), Type.Any()))
447
+ }),
448
+ async execute(_id, params) {
449
+ const crawlOpts = [];
450
+ if (params.max_pages != null) crawlOpts.push(`max_pages: ${params.max_pages}`);
451
+ if (params.format) crawlOpts.push(`format: '${params.format}'`);
452
+ if (params.js_rendering) crawlOpts.push("js_rendering: true");
453
+ if (params.include_patterns) crawlOpts.push(`include_patterns: ${JSON.stringify(params.include_patterns)}`);
454
+ if (params.exclude_patterns) crawlOpts.push(`exclude_patterns: ${JSON.stringify(params.exclude_patterns)}`);
455
+ if (params.extract_metadata === false) crawlOpts.push("extract_metadata: false");
456
+ if (params.respect_robots === false) crawlOpts.push("respect_robots: false");
457
+ const optsStr = crawlOpts.length > 0 ? `{ ${crawlOpts.join(", ")} }` : "";
458
+ const payload = {
459
+ url: params.url,
460
+ script: `return await crawl(${optsStr});`,
461
+ options: { ...params.options || {}, returnResult: true }
462
+ };
463
+ if (params.cookies) payload.options.cookies = params.cookies;
464
+ const result = await runWithDefaults(api, payload, { include: ["result", "console"] });
350
465
  return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
351
466
  }
352
467
  },
@@ -2,8 +2,8 @@
2
2
  "id": "openclaw-riddledc",
3
3
  "name": "Riddle",
4
4
  "description": "Riddle (riddledc.com) hosted browser API tools for OpenClaw agents.",
5
- "version": "0.4.0",
6
- "notes": "0.3.4: Added capability manifest, npm provenance, checksums, SECURITY.md.",
5
+ "version": "0.5.2",
6
+ "notes": "0.4.0: Added riddle_scrape, riddle_map, riddle_crawl convenience tools. Updated riddle_steps and riddle_script descriptions with data extraction capabilities.",
7
7
  "type": "plugin",
8
8
  "bundledSkills": [],
9
9
  "capabilities": {
@@ -32,7 +32,10 @@
32
32
  "riddle_screenshots",
33
33
  "riddle_steps",
34
34
  "riddle_script",
35
- "riddle_run"
35
+ "riddle_run",
36
+ "riddle_scrape",
37
+ "riddle_map",
38
+ "riddle_crawl"
36
39
  ],
37
40
  "invokes": [],
38
41
  "note": "Provides tools for agent use; does not invoke other agent tools"
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@riddledc/openclaw-riddledc",
3
- "version": "0.4.0",
3
+ "version": "0.5.2",
4
4
  "description": "OpenClaw integration package for RiddleDC (no secrets).",
5
5
  "license": "MIT",
6
6
  "author": "RiddleDC",