npm - @blinkdotnew/cli - Versions diffs - 0.2.0 → 0.2.2 - Mend

@blinkdotnew/cli 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md CHANGED Viewed

@@ -36,6 +36,10 @@ blink db query "SELECT count(*) FROM users"
 # Generate AI content
 blink ai image "a glowing blink logo on dark background"
 blink ai text "Summarize this article: ..."
+# Scrape websites
+blink scrape https://lovable.dev --extract "pricing tiers and costs"
+blink scrape https://example.com --text
 ```
 ---
@@ -218,7 +222,7 @@ blink ai transcribe https://example.com/audio.mp3 --language en
 ---
-### `blink fetch` and `blink search` — Web & data
+### `blink fetch`, `blink search`, `blink scrape` — Web & data
 ```bash
 # Fetch any URL via Blink proxy (handles CORS, auth headers)
@@ -229,8 +233,21 @@ blink fetch https://api.example.com --header "X-API-Key: secret"
 # Web search
 blink search "latest AI news"
 blink search "React Server Components" --count 10 --json
+# Scrape web pages
+blink scrape https://example.com                              # Raw response
+blink scrape https://example.com --text                      # Clean text (strips HTML)
+blink scrape https://example.com --extract "all prices"      # AI-extract specific data
+blink scrape https://news.ycombinator.com --extract "top 10 story titles and URLs"
+blink scrape https://example.com --extract "contact email" --json
 ```
+No project key needed — `blink scrape` uses your workspace key only.
+- `--text` — strips all HTML tags, returns readable text
+- `--extract <instructions>` — uses AI (Gemini Flash) to extract exactly what you ask for
+- Combine with `--json` for `{ url, content }` or `{ url, extracted }` output
 ---
 ### `blink realtime` — Pub/sub

package/dist/cli.js CHANGED Viewed

@@ -509,6 +509,65 @@ Examples:
     if (typeof result === "string") console.log(result);
     else console.log(JSON.stringify(result, null, 2));
   });
+  program2.command("scrape <url>").description("Scrape a webpage \u2014 returns text content, optionally AI-extracted data").option("--extract <instructions>", 'What to extract using AI (e.g. "all prices and product names")').option("--text", "Strip HTML tags and return clean readable text").addHelpText("after", `
+No project key needed \u2014 scrape runs through the Blink proxy using your workspace key.
+Examples:
+  $ blink scrape https://example.com                         Get full page content
+  $ blink scrape https://example.com --text                  Clean text only (no HTML)
+  $ blink scrape https://example.com --extract "all prices"  AI-extract specific data
+  $ blink scrape https://news.ycombinator.com --extract "top 10 story titles and URLs" --json
+`).action(async (url, opts) => {
+    requireToken();
+    const response = await withSpinner(
+      "Fetching page...",
+      () => resourcesRequest("/api/v1/fetch", { body: { url } })
+    );
+    let content;
+    if (typeof response === "string") {
+      content = response;
+    } else if (typeof response?.body === "string") {
+      content = response.body;
+    } else {
+      content = JSON.stringify(response);
+    }
+    if (opts.extract) {
+      const extraction = await withSpinner(
+        `Extracting: ${opts.extract}...`,
+        () => resourcesRequest("/api/v1/ai/chat/completions", {
+          body: {
+            model: "google/gemini-3-flash",
+            messages: [
+              {
+                role: "system",
+                content: "You are a web scraping assistant. Extract exactly the requested information from the webpage content provided. Return only the extracted data, formatted clearly. No commentary."
+              },
+              {
+                role: "user",
+                content: `URL: ${url}
+Webpage content:
+${content.slice(0, 5e4)}
+Extract: ${opts.extract}`
+              }
+            ],
+            stream: false
+          }
+        })
+      );
+      const extracted = extraction?.choices?.[0]?.message?.content ?? "";
+      if (isJsonMode()) return printJson({ url, extracted, extract_instructions: opts.extract });
+      console.log(extracted);
+    } else if (opts.text) {
+      const text = content.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, " ").replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, " ").replace(/<[^>]+>/g, " ").replace(/&nbsp;/g, " ").replace(/&amp;/g, "&").replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/&quot;/g, '"').replace(/\s+/g, " ").trim();
+      if (isJsonMode()) return printJson({ url, text });
+      console.log(text);
+    } else {
+      if (isJsonMode()) return printJson({ url, content });
+      console.log(content);
+    }
+  });
   program2.command("search <query>").description("Web search \u2014 returns titles, URLs, and snippets").option("--count <n>", "Number of results to return (max 20)", "5").addHelpText("after", `
 Examples:
   $ blink search "latest AI news"
@@ -1539,6 +1598,8 @@ Web & Data:
   $ blink search "latest AI news" --count 10
   $ blink fetch https://api.github.com/users/octocat
   $ blink fetch https://api.example.com --method POST --body '{"key":"val"}'
+  $ blink scrape https://example.com --text              Clean text (no HTML)
+  $ blink scrape https://example.com --extract "prices"  AI-extract specific data
 Realtime / RAG / Notify:
   $ blink realtime publish updates '{"type":"refresh"}'

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@blinkdotnew/cli",
-  "version": "0.2.0",
+  "version": "0.2.2",
   "description": "Blink platform CLI — deploy apps, manage databases, generate AI content",
   "bin": {
     "blink": "dist/cli.js"

package/src/cli.ts CHANGED Viewed

@@ -70,6 +70,8 @@ Web & Data:
   $ blink search "latest AI news" --count 10
   $ blink fetch https://api.github.com/users/octocat
   $ blink fetch https://api.example.com --method POST --body '{"key":"val"}'
+  $ blink scrape https://example.com --text              Clean text (no HTML)
+  $ blink scrape https://example.com --extract "prices"  AI-extract specific data
 Realtime / RAG / Notify:
   $ blink realtime publish updates '{"type":"refresh"}'

package/src/commands/web.ts CHANGED Viewed

@@ -40,6 +40,80 @@ Examples:
       else console.log(JSON.stringify(result, null, 2))
     })
+  program.command('scrape <url>')
+    .description('Scrape a webpage — returns text content, optionally AI-extracted data')
+    .option('--extract <instructions>', 'What to extract using AI (e.g. "all prices and product names")')
+    .option('--text', 'Strip HTML tags and return clean readable text')
+    .addHelpText('after', `
+No project key needed — scrape runs through the Blink proxy using your workspace key.
+Examples:
+  $ blink scrape https://example.com                         Get full page content
+  $ blink scrape https://example.com --text                  Clean text only (no HTML)
+  $ blink scrape https://example.com --extract "all prices"  AI-extract specific data
+  $ blink scrape https://news.ycombinator.com --extract "top 10 story titles and URLs" --json
+`)
+    .action(async (url, opts) => {
+      requireToken()
+      const response = await withSpinner('Fetching page...', () =>
+        resourcesRequest('/api/v1/fetch', { body: { url } })
+      )
+      // Extract the response body — fetch returns { status, contentType, body } or raw string
+      let content: string
+      if (typeof response === 'string') {
+        content = response
+      } else if (typeof response?.body === 'string') {
+        content = response.body
+      } else {
+        content = JSON.stringify(response)
+      }
+      if (opts.extract) {
+        // AI extraction — uses workspace key, no project needed
+        const extraction = await withSpinner(`Extracting: ${opts.extract}...`, () =>
+          resourcesRequest('/api/v1/ai/chat/completions', {
+            body: {
+              model: 'google/gemini-3-flash',
+              messages: [
+                {
+                  role: 'system',
+                  content: 'You are a web scraping assistant. Extract exactly the requested information from the webpage content provided. Return only the extracted data, formatted clearly. No commentary.',
+                },
+                {
+                  role: 'user',
+                  content: `URL: ${url}\n\nWebpage content:\n${content.slice(0, 50000)}\n\nExtract: ${opts.extract}`,
+                },
+              ],
+              stream: false,
+            },
+          })
+        )
+        const extracted = extraction?.choices?.[0]?.message?.content ?? ''
+        if (isJsonMode()) return printJson({ url, extracted, extract_instructions: opts.extract })
+        console.log(extracted)
+      } else if (opts.text) {
+        // Strip HTML and return clean text
+        const text = content
+          .replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, ' ')
+          .replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, ' ')
+          .replace(/<[^>]+>/g, ' ')
+          .replace(/&nbsp;/g, ' ')
+          .replace(/&amp;/g, '&')
+          .replace(/&lt;/g, '<')
+          .replace(/&gt;/g, '>')
+          .replace(/&quot;/g, '"')
+          .replace(/\s+/g, ' ')
+          .trim()
+        if (isJsonMode()) return printJson({ url, text })
+        console.log(text)
+      } else {
+        if (isJsonMode()) return printJson({ url, content })
+        console.log(content)
+      }
+    })
   program.command('search <query>')
     .description('Web search — returns titles, URLs, and snippets')
     .option('--count <n>', 'Number of results to return (max 20)', '5')