extract-from-sitemap 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/cli.js +3 -22
  2. package/package.json +1 -1
package/cli.js CHANGED
@@ -7,7 +7,7 @@ const crypto = require("crypto");
7
7
  const http = require("http");
8
8
  const { URL, URLSearchParams } = require("url");
9
9
  const os = require("os");
10
-
10
+ const { extractFromSitemap } = require("./mod.js");
11
11
  /**
12
12
  * @typedef {Object} Config
13
13
  * @property {string} outDir - Output directory for extracted files
@@ -410,6 +410,8 @@ async function processCustomUrls(customUrls, apiKey, forceExtract) {
410
410
  tokens: Math.round((extracted.full_content || "").length / 5),
411
411
  };
412
412
  }
413
+ } else {
414
+ throw new Error(`${response.status} - ${await response.statusText()}`);
413
415
  }
414
416
  } catch (error) {
415
417
  console.error(
@@ -438,27 +440,6 @@ async function clearCredentials() {
438
440
  }
439
441
  }
440
442
 
441
- /**
442
- * Extract content from sitemap (placeholder - you'll need to implement this)
443
- * @param {string} origin - The origin URL
444
- * @param {boolean} forceExtract - Whether to force extraction
445
- * @param {string} apiKey - API key for authentication
446
- * @returns {Promise<{totalPages: number, totalTokens: number, errors: number, files: Record<string, any>}>}
447
- */
448
- async function extractFromSitemap(origin, forceExtract, apiKey) {
449
- // This is a placeholder - you'll need to implement the actual extraction logic
450
- // or import it from your mod.js file
451
- console.log(`Extracting from ${origin} (force: ${forceExtract})`);
452
-
453
- // For now, return empty result
454
- return {
455
- totalPages: 0,
456
- totalTokens: 0,
457
- errors: 0,
458
- files: {},
459
- };
460
- }
461
-
462
443
  /**
463
444
  * Main function
464
445
  */
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "extract-from-sitemap",
3
3
  "bin": "cli.js",
4
- "version": "0.0.3",
4
+ "version": "0.0.4",
5
5
  "main": "mod.js",
6
6
  "description": "A module and CLI that allows extracting all pages from a sitemap into markdown and a llms.txt, using Parallel.ai APIs.",
7
7
  "files": [