npm - @riddledc/openclaw-riddledc - Versions diffs - 0.4.0 → 0.5.2 - Mend

@riddledc/openclaw-riddledc 0.4.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/CHECKSUMS.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-008b101829a770aab04361a6432304b6fe7edf4173fed1185339d6f53969b418  dist/index.cjs
+de790b590f73cb048b327164f3121e596860a16678ac1ed67abae5bdb6270ae8  dist/index.cjs
 94ce04f0e2d84bf64dd68f0500dfdd2f951287a3deccec87f197261961927f6f  dist/index.d.cts
 94ce04f0e2d84bf64dd68f0500dfdd2f951287a3deccec87f197261961927f6f  dist/index.d.ts
-52e59ee4fd2c37fd503c9e52add79ef76e5959fdef1d0c5e191b132eba4027db  dist/index.js
+490816f9b8e7241bbc9bde9238d2cf73b918f37b2b8c6c1e045194030118321f  dist/index.js

package/dist/index.cjs CHANGED Viewed

@@ -299,7 +299,7 @@ function register(api) {
   api.registerTool(
     {
       name: "riddle_steps",
-      description: 'Riddle: run a workflow in steps mode (goto/click/fill/etc.). Supports authenticated sessions via cookies/localStorage. Returns screenshot + console by default; pass include:["har"] to opt in to HAR capture.',
+      description: `Riddle: run a workflow in steps mode (goto/click/fill/screenshot/scrape/map/crawl/etc.). Supports authenticated sessions via cookies/localStorage. Data extraction steps: { scrape: true }, { map: { max_pages?: N } }, { crawl: { max_pages?: N, format?: 'json'|'csv' } }. Returns screenshot + console by default; pass include:["har","data","urls","dataset","sitemap"] for additional artifacts.`,
       parameters: import_typebox.Type.Object({
         steps: import_typebox.Type.Array(import_typebox.Type.Record(import_typebox.Type.String(), import_typebox.Type.Any())),
         timeout_sec: import_typebox.Type.Optional(import_typebox.Type.Number()),
@@ -330,7 +330,7 @@ function register(api) {
         if (Object.keys(opts).length > 0) payload.options = opts;
         if (params.include) payload.include = params.include;
         if (params.harInline) payload.harInline = params.harInline;
-        const result = await runWithDefaults(api, payload, { include: ["screenshot", "console", "result"] });
+        const result = await runWithDefaults(api, payload, { include: ["screenshot", "console", "result", "data", "urls", "dataset", "sitemap"] });
         return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
       }
     },
@@ -339,7 +339,7 @@ function register(api) {
   api.registerTool(
     {
       name: "riddle_script",
-      description: 'Riddle: run full Playwright code (script mode). Supports authenticated sessions via cookies/localStorage. In scripts, use `await injectLocalStorage()` after navigating to the origin to apply localStorage values. Returns screenshot + console by default; pass include:["har"] to opt in to HAR capture.',
+      description: 'Riddle: run full Playwright code (script mode). Supports authenticated sessions via cookies/localStorage. In scripts, use `await injectLocalStorage()` after navigating to the origin to apply localStorage values. Available sandbox helpers: saveScreenshot(label), saveHtml(label), saveJson(name, data), scrape(opts?), map(opts?), crawl(opts?). Returns screenshot + console by default; pass include:["har","data","urls","dataset","sitemap"] for additional artifacts.',
       parameters: import_typebox.Type.Object({
         script: import_typebox.Type.String(),
         timeout_sec: import_typebox.Type.Optional(import_typebox.Type.Number()),
@@ -370,7 +370,122 @@ function register(api) {
         if (Object.keys(opts).length > 0) payload.options = opts;
         if (params.include) payload.include = params.include;
         if (params.harInline) payload.harInline = params.harInline;
-        const result = await runWithDefaults(api, payload, { include: ["screenshot", "console", "result"] });
+        const result = await runWithDefaults(api, payload, { include: ["screenshot", "console", "result", "data", "urls", "dataset", "sitemap"] });
+        return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
+      }
+    },
+    { optional: true }
+  );
+  api.registerTool(
+    {
+      name: "riddle_scrape",
+      description: "Riddle: scrape a URL and extract structured content (title, description, markdown, links, headings, word count). Navigates to the URL first, then extracts. For authenticated scraping, use riddle_script with login steps followed by await scrape().",
+      parameters: import_typebox.Type.Object({
+        url: import_typebox.Type.String({ description: "URL to scrape" }),
+        extract_metadata: import_typebox.Type.Optional(import_typebox.Type.Boolean({ description: "Extract metadata (default: true)" })),
+        cookies: import_typebox.Type.Optional(import_typebox.Type.Array(import_typebox.Type.Object({
+          name: import_typebox.Type.String(),
+          value: import_typebox.Type.String(),
+          domain: import_typebox.Type.String(),
+          path: import_typebox.Type.Optional(import_typebox.Type.String()),
+          secure: import_typebox.Type.Optional(import_typebox.Type.Boolean()),
+          httpOnly: import_typebox.Type.Optional(import_typebox.Type.Boolean())
+        }), { description: "Cookies to inject for authenticated sessions" })),
+        options: import_typebox.Type.Optional(import_typebox.Type.Record(import_typebox.Type.String(), import_typebox.Type.Any()))
+      }),
+      async execute(_id, params) {
+        const scrapeOpts = params.extract_metadata === false ? "{ extract_metadata: false }" : "";
+        const payload = {
+          url: params.url,
+          script: `return await scrape(${scrapeOpts});`,
+          options: { ...params.options || {}, returnResult: true }
+        };
+        if (params.cookies) payload.options.cookies = params.cookies;
+        const result = await runWithDefaults(api, payload, { include: ["result", "console"] });
+        return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
+      }
+    },
+    { optional: true }
+  );
+  api.registerTool(
+    {
+      name: "riddle_map",
+      description: "Riddle: discover all URLs on a website by crawling from the given URL. Returns an array of discovered URLs. For authenticated mapping, use riddle_script with login steps followed by await map().",
+      parameters: import_typebox.Type.Object({
+        url: import_typebox.Type.String({ description: "Starting URL to map from" }),
+        max_pages: import_typebox.Type.Optional(import_typebox.Type.Number({ description: "Max pages to crawl (default: 500, max: 5000)" })),
+        include_patterns: import_typebox.Type.Optional(import_typebox.Type.Array(import_typebox.Type.String(), { description: "URL patterns to include (glob)" })),
+        exclude_patterns: import_typebox.Type.Optional(import_typebox.Type.Array(import_typebox.Type.String(), { description: "URL patterns to exclude (glob)" })),
+        respect_robots: import_typebox.Type.Optional(import_typebox.Type.Boolean({ description: "Respect robots.txt (default: true)" })),
+        cookies: import_typebox.Type.Optional(import_typebox.Type.Array(import_typebox.Type.Object({
+          name: import_typebox.Type.String(),
+          value: import_typebox.Type.String(),
+          domain: import_typebox.Type.String(),
+          path: import_typebox.Type.Optional(import_typebox.Type.String()),
+          secure: import_typebox.Type.Optional(import_typebox.Type.Boolean()),
+          httpOnly: import_typebox.Type.Optional(import_typebox.Type.Boolean())
+        }), { description: "Cookies to inject for authenticated sessions" })),
+        options: import_typebox.Type.Optional(import_typebox.Type.Record(import_typebox.Type.String(), import_typebox.Type.Any()))
+      }),
+      async execute(_id, params) {
+        const mapOpts = [];
+        if (params.max_pages != null) mapOpts.push(`max_pages: ${params.max_pages}`);
+        if (params.include_patterns) mapOpts.push(`include_patterns: ${JSON.stringify(params.include_patterns)}`);
+        if (params.exclude_patterns) mapOpts.push(`exclude_patterns: ${JSON.stringify(params.exclude_patterns)}`);
+        if (params.respect_robots === false) mapOpts.push("respect_robots: false");
+        const optsStr = mapOpts.length > 0 ? `{ ${mapOpts.join(", ")} }` : "";
+        const payload = {
+          url: params.url,
+          script: `return await map(${optsStr});`,
+          options: { ...params.options || {}, returnResult: true }
+        };
+        if (params.cookies) payload.options.cookies = params.cookies;
+        const result = await runWithDefaults(api, payload, { include: ["result", "console"] });
+        return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
+      }
+    },
+    { optional: true }
+  );
+  api.registerTool(
+    {
+      name: "riddle_crawl",
+      description: 'Riddle: crawl a website and extract content from each page into a dataset. Returns dataset metadata; use include:["dataset"] to get the full dataset file. For authenticated crawling, use riddle_script with login steps followed by await crawl().',
+      parameters: import_typebox.Type.Object({
+        url: import_typebox.Type.String({ description: "Starting URL to crawl from" }),
+        max_pages: import_typebox.Type.Optional(import_typebox.Type.Number({ description: "Max pages to crawl (default: 100, max: 1000)" })),
+        format: import_typebox.Type.Optional(import_typebox.Type.String({ description: "Output format: jsonl, json, csv, zip (default: jsonl)" })),
+        js_rendering: import_typebox.Type.Optional(import_typebox.Type.Boolean({ description: "Use full browser rendering (slower but handles SPAs)" })),
+        include_patterns: import_typebox.Type.Optional(import_typebox.Type.Array(import_typebox.Type.String(), { description: "URL patterns to include (glob)" })),
+        exclude_patterns: import_typebox.Type.Optional(import_typebox.Type.Array(import_typebox.Type.String(), { description: "URL patterns to exclude (glob)" })),
+        extract_metadata: import_typebox.Type.Optional(import_typebox.Type.Boolean({ description: "Extract metadata per page (default: true)" })),
+        respect_robots: import_typebox.Type.Optional(import_typebox.Type.Boolean({ description: "Respect robots.txt (default: true)" })),
+        cookies: import_typebox.Type.Optional(import_typebox.Type.Array(import_typebox.Type.Object({
+          name: import_typebox.Type.String(),
+          value: import_typebox.Type.String(),
+          domain: import_typebox.Type.String(),
+          path: import_typebox.Type.Optional(import_typebox.Type.String()),
+          secure: import_typebox.Type.Optional(import_typebox.Type.Boolean()),
+          httpOnly: import_typebox.Type.Optional(import_typebox.Type.Boolean())
+        }), { description: "Cookies to inject for authenticated sessions" })),
+        options: import_typebox.Type.Optional(import_typebox.Type.Record(import_typebox.Type.String(), import_typebox.Type.Any()))
+      }),
+      async execute(_id, params) {
+        const crawlOpts = [];
+        if (params.max_pages != null) crawlOpts.push(`max_pages: ${params.max_pages}`);
+        if (params.format) crawlOpts.push(`format: '${params.format}'`);
+        if (params.js_rendering) crawlOpts.push("js_rendering: true");
+        if (params.include_patterns) crawlOpts.push(`include_patterns: ${JSON.stringify(params.include_patterns)}`);
+        if (params.exclude_patterns) crawlOpts.push(`exclude_patterns: ${JSON.stringify(params.exclude_patterns)}`);
+        if (params.extract_metadata === false) crawlOpts.push("extract_metadata: false");
+        if (params.respect_robots === false) crawlOpts.push("respect_robots: false");
+        const optsStr = crawlOpts.length > 0 ? `{ ${crawlOpts.join(", ")} }` : "";
+        const payload = {
+          url: params.url,
+          script: `return await crawl(${optsStr});`,
+          options: { ...params.options || {}, returnResult: true }
+        };
+        if (params.cookies) payload.options.cookies = params.cookies;
+        const result = await runWithDefaults(api, payload, { include: ["result", "console"] });
         return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
       }
     },

package/dist/index.js CHANGED Viewed

@@ -275,7 +275,7 @@ function register(api) {
   api.registerTool(
     {
       name: "riddle_steps",
-      description: 'Riddle: run a workflow in steps mode (goto/click/fill/etc.). Supports authenticated sessions via cookies/localStorage. Returns screenshot + console by default; pass include:["har"] to opt in to HAR capture.',
+      description: `Riddle: run a workflow in steps mode (goto/click/fill/screenshot/scrape/map/crawl/etc.). Supports authenticated sessions via cookies/localStorage. Data extraction steps: { scrape: true }, { map: { max_pages?: N } }, { crawl: { max_pages?: N, format?: 'json'|'csv' } }. Returns screenshot + console by default; pass include:["har","data","urls","dataset","sitemap"] for additional artifacts.`,
       parameters: Type.Object({
         steps: Type.Array(Type.Record(Type.String(), Type.Any())),
         timeout_sec: Type.Optional(Type.Number()),
@@ -306,7 +306,7 @@ function register(api) {
         if (Object.keys(opts).length > 0) payload.options = opts;
         if (params.include) payload.include = params.include;
         if (params.harInline) payload.harInline = params.harInline;
-        const result = await runWithDefaults(api, payload, { include: ["screenshot", "console", "result"] });
+        const result = await runWithDefaults(api, payload, { include: ["screenshot", "console", "result", "data", "urls", "dataset", "sitemap"] });
         return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
       }
     },
@@ -315,7 +315,7 @@ function register(api) {
   api.registerTool(
     {
       name: "riddle_script",
-      description: 'Riddle: run full Playwright code (script mode). Supports authenticated sessions via cookies/localStorage. In scripts, use `await injectLocalStorage()` after navigating to the origin to apply localStorage values. Returns screenshot + console by default; pass include:["har"] to opt in to HAR capture.',
+      description: 'Riddle: run full Playwright code (script mode). Supports authenticated sessions via cookies/localStorage. In scripts, use `await injectLocalStorage()` after navigating to the origin to apply localStorage values. Available sandbox helpers: saveScreenshot(label), saveHtml(label), saveJson(name, data), scrape(opts?), map(opts?), crawl(opts?). Returns screenshot + console by default; pass include:["har","data","urls","dataset","sitemap"] for additional artifacts.',
       parameters: Type.Object({
         script: Type.String(),
         timeout_sec: Type.Optional(Type.Number()),
@@ -346,7 +346,122 @@ function register(api) {
         if (Object.keys(opts).length > 0) payload.options = opts;
         if (params.include) payload.include = params.include;
         if (params.harInline) payload.harInline = params.harInline;
-        const result = await runWithDefaults(api, payload, { include: ["screenshot", "console", "result"] });
+        const result = await runWithDefaults(api, payload, { include: ["screenshot", "console", "result", "data", "urls", "dataset", "sitemap"] });
+        return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
+      }
+    },
+    { optional: true }
+  );
+  api.registerTool(
+    {
+      name: "riddle_scrape",
+      description: "Riddle: scrape a URL and extract structured content (title, description, markdown, links, headings, word count). Navigates to the URL first, then extracts. For authenticated scraping, use riddle_script with login steps followed by await scrape().",
+      parameters: Type.Object({
+        url: Type.String({ description: "URL to scrape" }),
+        extract_metadata: Type.Optional(Type.Boolean({ description: "Extract metadata (default: true)" })),
+        cookies: Type.Optional(Type.Array(Type.Object({
+          name: Type.String(),
+          value: Type.String(),
+          domain: Type.String(),
+          path: Type.Optional(Type.String()),
+          secure: Type.Optional(Type.Boolean()),
+          httpOnly: Type.Optional(Type.Boolean())
+        }), { description: "Cookies to inject for authenticated sessions" })),
+        options: Type.Optional(Type.Record(Type.String(), Type.Any()))
+      }),
+      async execute(_id, params) {
+        const scrapeOpts = params.extract_metadata === false ? "{ extract_metadata: false }" : "";
+        const payload = {
+          url: params.url,
+          script: `return await scrape(${scrapeOpts});`,
+          options: { ...params.options || {}, returnResult: true }
+        };
+        if (params.cookies) payload.options.cookies = params.cookies;
+        const result = await runWithDefaults(api, payload, { include: ["result", "console"] });
+        return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
+      }
+    },
+    { optional: true }
+  );
+  api.registerTool(
+    {
+      name: "riddle_map",
+      description: "Riddle: discover all URLs on a website by crawling from the given URL. Returns an array of discovered URLs. For authenticated mapping, use riddle_script with login steps followed by await map().",
+      parameters: Type.Object({
+        url: Type.String({ description: "Starting URL to map from" }),
+        max_pages: Type.Optional(Type.Number({ description: "Max pages to crawl (default: 500, max: 5000)" })),
+        include_patterns: Type.Optional(Type.Array(Type.String(), { description: "URL patterns to include (glob)" })),
+        exclude_patterns: Type.Optional(Type.Array(Type.String(), { description: "URL patterns to exclude (glob)" })),
+        respect_robots: Type.Optional(Type.Boolean({ description: "Respect robots.txt (default: true)" })),
+        cookies: Type.Optional(Type.Array(Type.Object({
+          name: Type.String(),
+          value: Type.String(),
+          domain: Type.String(),
+          path: Type.Optional(Type.String()),
+          secure: Type.Optional(Type.Boolean()),
+          httpOnly: Type.Optional(Type.Boolean())
+        }), { description: "Cookies to inject for authenticated sessions" })),
+        options: Type.Optional(Type.Record(Type.String(), Type.Any()))
+      }),
+      async execute(_id, params) {
+        const mapOpts = [];
+        if (params.max_pages != null) mapOpts.push(`max_pages: ${params.max_pages}`);
+        if (params.include_patterns) mapOpts.push(`include_patterns: ${JSON.stringify(params.include_patterns)}`);
+        if (params.exclude_patterns) mapOpts.push(`exclude_patterns: ${JSON.stringify(params.exclude_patterns)}`);
+        if (params.respect_robots === false) mapOpts.push("respect_robots: false");
+        const optsStr = mapOpts.length > 0 ? `{ ${mapOpts.join(", ")} }` : "";
+        const payload = {
+          url: params.url,
+          script: `return await map(${optsStr});`,
+          options: { ...params.options || {}, returnResult: true }
+        };
+        if (params.cookies) payload.options.cookies = params.cookies;
+        const result = await runWithDefaults(api, payload, { include: ["result", "console"] });
+        return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
+      }
+    },
+    { optional: true }
+  );
+  api.registerTool(
+    {
+      name: "riddle_crawl",
+      description: 'Riddle: crawl a website and extract content from each page into a dataset. Returns dataset metadata; use include:["dataset"] to get the full dataset file. For authenticated crawling, use riddle_script with login steps followed by await crawl().',
+      parameters: Type.Object({
+        url: Type.String({ description: "Starting URL to crawl from" }),
+        max_pages: Type.Optional(Type.Number({ description: "Max pages to crawl (default: 100, max: 1000)" })),
+        format: Type.Optional(Type.String({ description: "Output format: jsonl, json, csv, zip (default: jsonl)" })),
+        js_rendering: Type.Optional(Type.Boolean({ description: "Use full browser rendering (slower but handles SPAs)" })),
+        include_patterns: Type.Optional(Type.Array(Type.String(), { description: "URL patterns to include (glob)" })),
+        exclude_patterns: Type.Optional(Type.Array(Type.String(), { description: "URL patterns to exclude (glob)" })),
+        extract_metadata: Type.Optional(Type.Boolean({ description: "Extract metadata per page (default: true)" })),
+        respect_robots: Type.Optional(Type.Boolean({ description: "Respect robots.txt (default: true)" })),
+        cookies: Type.Optional(Type.Array(Type.Object({
+          name: Type.String(),
+          value: Type.String(),
+          domain: Type.String(),
+          path: Type.Optional(Type.String()),
+          secure: Type.Optional(Type.Boolean()),
+          httpOnly: Type.Optional(Type.Boolean())
+        }), { description: "Cookies to inject for authenticated sessions" })),
+        options: Type.Optional(Type.Record(Type.String(), Type.Any()))
+      }),
+      async execute(_id, params) {
+        const crawlOpts = [];
+        if (params.max_pages != null) crawlOpts.push(`max_pages: ${params.max_pages}`);
+        if (params.format) crawlOpts.push(`format: '${params.format}'`);
+        if (params.js_rendering) crawlOpts.push("js_rendering: true");
+        if (params.include_patterns) crawlOpts.push(`include_patterns: ${JSON.stringify(params.include_patterns)}`);
+        if (params.exclude_patterns) crawlOpts.push(`exclude_patterns: ${JSON.stringify(params.exclude_patterns)}`);
+        if (params.extract_metadata === false) crawlOpts.push("extract_metadata: false");
+        if (params.respect_robots === false) crawlOpts.push("respect_robots: false");
+        const optsStr = crawlOpts.length > 0 ? `{ ${crawlOpts.join(", ")} }` : "";
+        const payload = {
+          url: params.url,
+          script: `return await crawl(${optsStr});`,
+          options: { ...params.options || {}, returnResult: true }
+        };
+        if (params.cookies) payload.options.cookies = params.cookies;
+        const result = await runWithDefaults(api, payload, { include: ["result", "console"] });
         return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
       }
     },

package/openclaw.plugin.json CHANGED Viewed

@@ -2,8 +2,8 @@
   "id": "openclaw-riddledc",
   "name": "Riddle",
   "description": "Riddle (riddledc.com) hosted browser API tools for OpenClaw agents.",
-  "version": "0.4.0",
-  "notes": "0.3.4: Added capability manifest, npm provenance, checksums, SECURITY.md.",
+  "version": "0.5.2",
+  "notes": "0.4.0: Added riddle_scrape, riddle_map, riddle_crawl convenience tools. Updated riddle_steps and riddle_script descriptions with data extraction capabilities.",
   "type": "plugin",
   "bundledSkills": [],
   "capabilities": {
@@ -32,7 +32,10 @@
         "riddle_screenshots",
         "riddle_steps",
         "riddle_script",
-        "riddle_run"
+        "riddle_run",
+        "riddle_scrape",
+        "riddle_map",
+        "riddle_crawl"
       ],
       "invokes": [],
       "note": "Provides tools for agent use; does not invoke other agent tools"

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@riddledc/openclaw-riddledc",
-  "version": "0.4.0",
+  "version": "0.5.2",
   "description": "OpenClaw integration package for RiddleDC (no secrets).",
   "license": "MIT",
   "author": "RiddleDC",