@mdream/crawl 0.8.4 → 0.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,24 +5,10 @@ import { HttpCrawler, PlaywrightCrawler, log, purgeDefaultStorages } from "crawl
5
5
  import { generateLlmsTxtArtifacts, htmlToMarkdown } from "mdream";
6
6
  import { withMinimalPreset } from "mdream/preset/minimal";
7
7
  import { dirname, join, normalize, resolve } from "pathe";
8
+ import { withHttps } from "ufo";
8
9
  import picomatch from "picomatch";
9
10
  import { extractionPlugin } from "mdream/plugins";
10
11
 
11
- //#region ../../node_modules/.pnpm/ufo@1.6.1/node_modules/ufo/dist/index.mjs
12
- const r = String.fromCharCode;
13
- const PROTOCOL_REGEX = /^[\s\w\0+.-]{2,}:([/\\]{2})?/;
14
- function withHttps(input) {
15
- return withProtocol(input, "https://");
16
- }
17
- function withProtocol(input, protocol) {
18
- let match = input.match(PROTOCOL_REGEX);
19
- if (!match) match = input.match(/^\/{2,}/);
20
- if (!match) return protocol + input;
21
- return protocol + input.slice(match[0].length);
22
- }
23
- const protocolRelative = Symbol.for("ufo:protocolRelative");
24
-
25
- //#endregion
26
12
  //#region src/glob-utils.ts
27
13
  /**
28
14
  * Parse a URL that may contain glob patterns
@@ -467,14 +453,14 @@ async function crawlAndGenerate(options, onProgress) {
467
453
  await crawler.run(initialRequests);
468
454
  progress.crawling.status = "completed";
469
455
  onProgress?.(progress);
470
- if (results.some((r$1) => r$1.success)) {
456
+ if (results.some((r) => r.success)) {
471
457
  progress.generation.status = "generating";
472
458
  onProgress?.(progress);
473
- const successfulResults = results.filter((r$1) => r$1.success);
459
+ const successfulResults = results.filter((r) => r.success);
474
460
  const firstUrl = new URL(withHttps(urls[0]));
475
461
  const origin$1 = firstUrl.origin;
476
- const homePageResult = successfulResults.find((r$1) => {
477
- const resultUrl = new URL(withHttps(r$1.url));
462
+ const homePageResult = successfulResults.find((r) => {
463
+ const resultUrl = new URL(withHttps(r.url));
478
464
  return resultUrl.href === origin$1 || resultUrl.href === `${origin$1}/`;
479
465
  });
480
466
  const siteName = siteNameOverride || homePageResult?.metadata?.title || homePageResult?.title || firstUrl.hostname;
@@ -528,4 +514,4 @@ async function crawlAndGenerate(options, onProgress) {
528
514
  }
529
515
 
530
516
  //#endregion
531
- export { crawlAndGenerate, parseUrlPattern, validateGlobPattern, withHttps };
517
+ export { crawlAndGenerate, parseUrlPattern, validateGlobPattern };
package/dist/cli.mjs CHANGED
@@ -1,9 +1,10 @@
1
- import { crawlAndGenerate, parseUrlPattern, validateGlobPattern, withHttps } from "./_chunks/crawl-DYXGzu7W.mjs";
1
+ import { crawlAndGenerate, parseUrlPattern, validateGlobPattern } from "./_chunks/crawl-BtuYX2_u.mjs";
2
2
  import { readFileSync } from "node:fs";
3
3
  import * as p$1 from "@clack/prompts";
4
4
  import * as p from "@clack/prompts";
5
5
  import { PlaywrightCrawler } from "crawlee";
6
6
  import { dirname, join, resolve } from "pathe";
7
+ import { withHttps } from "ufo";
7
8
  import { fileURLToPath } from "node:url";
8
9
  import { addDependency } from "nypm";
9
10
 
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { crawlAndGenerate } from "./_chunks/crawl-DYXGzu7W.mjs";
1
+ import { crawlAndGenerate } from "./_chunks/crawl-BtuYX2_u.mjs";
2
2
  import { writeFile } from "node:fs/promises";
3
3
  import { basename, sep } from "pathe";
4
4
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@mdream/crawl",
3
3
  "type": "module",
4
- "version": "0.8.4",
4
+ "version": "0.8.5",
5
5
  "description": "Mdream Crawl generates comprehensive llms.txt artifacts from a single URL, using mdream to convert HTML to Markdown.",
6
6
  "author": {
7
7
  "name": "Harlan Wilton",
@@ -50,7 +50,8 @@
50
50
  "nypm": "^0.6.0",
51
51
  "pathe": "^2.0.3",
52
52
  "picomatch": "^4.0.3",
53
- "mdream": "0.8.4"
53
+ "ufo": "^1.6.1",
54
+ "mdream": "0.8.5"
54
55
  },
55
56
  "devDependencies": {
56
57
  "@types/picomatch": "^4.0.1"