@promptbook/cli 0.72.0-30 → 0.72.0-32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/index.es.js CHANGED
@@ -2,7 +2,7 @@ import commander from 'commander';
2
2
  import spaceTrim$1, { spaceTrim } from 'spacetrim';
3
3
  import colors from 'colors';
4
4
  import { forTime } from 'waitasecond';
5
- import { stat, access, constants, readFile, readdir, writeFile, mkdir, unlink, rm, rmdir, rename } from 'fs/promises';
5
+ import { stat, access, constants, readFile, writeFile, readdir, mkdir, unlink, rm, rmdir, rename } from 'fs/promises';
6
6
  import { join, basename, dirname } from 'path';
7
7
  import { format } from 'prettier';
8
8
  import parserHtml from 'prettier/parser-html';
@@ -27,7 +27,7 @@ import { Converter } from 'showdown';
27
27
  /**
28
28
  * The version of the Promptbook library
29
29
  */
30
- var PROMPTBOOK_VERSION = '0.72.0-29';
30
+ var PROMPTBOOK_VERSION = '0.72.0-31';
31
31
  // TODO: [main] !!!! List here all the versions and annotate + put into script
32
32
 
33
33
  /*! *****************************************************************************
@@ -8415,6 +8415,7 @@ function $provideFilesystemForNode(options) {
8415
8415
  access: access,
8416
8416
  constants: constants,
8417
8417
  readFile: readFile,
8418
+ writeFile: writeFile,
8418
8419
  readdir: readdir,
8419
8420
  };
8420
8421
  }
@@ -14120,13 +14121,26 @@ var WebsiteScraper = /** @class */ (function () {
14120
14121
  this.tools = tools;
14121
14122
  this.options = options;
14122
14123
  this.markdownScraper = new MarkdownScraper(tools, options);
14123
- // TODO: !!!!!! Remove
14124
+ this.showdownConverter = createShowdownConverter();
14125
+ /**/
14126
+ // TODO: [🏄] !!!!!! Remove or describe why it is here
14124
14127
  TODO_USE(Readability);
14125
14128
  TODO_USE(Converter);
14126
14129
  TODO_USE(JSDOM);
14127
- TODO_USE(new JSDOM());
14128
14130
  TODO_USE(createShowdownConverter);
14129
- this.showdownConverter = createShowdownConverter();
14131
+ /**/
14132
+ /**/
14133
+ var jsdom = new JSDOM();
14134
+ var reader = new Readability(jsdom.window.document);
14135
+ keepUnused(reader);
14136
+ /**/
14137
+ /**/
14138
+ keepUnused(DEFAULT_INTERMEDIATE_FILES_STRATEGY, DEFAULT_IS_VERBOSE, DEFAULT_SCRAPE_CACHE_DIRNAME);
14139
+ keepUnused(EnvironmentMismatchError);
14140
+ keepUnused(KnowledgeScrapeError);
14141
+ keepUnused(getScraperIntermediateSource);
14142
+ keepUnused();
14143
+ /**/
14130
14144
  }
14131
14145
  Object.defineProperty(WebsiteScraper.prototype, "metadata", {
14132
14146
  /**
@@ -14145,10 +14159,55 @@ var WebsiteScraper = /** @class */ (function () {
14145
14159
  */
14146
14160
  WebsiteScraper.prototype.$convert = function (source) {
14147
14161
  return __awaiter(this, void 0, void 0, function () {
14148
- var markdown;
14149
- return __generator(this, function (_a) {
14150
- markdown = "";
14151
- return [2 /*return*/, __assign(__assign({}, source), { markdown: markdown, destroy: function () { } })];
14162
+ var _a, _b,
14163
+ // TODO: [🧠] Maybe in node use headless browser not just JSDOM
14164
+ rootDirname, _c, cacheDirname, _d, intermediateFilesStrategy, _e, isVerbose, jsdom, _f, reader, article, html, i, cacheFilehandler, markdown;
14165
+ return __generator(this, function (_g) {
14166
+ switch (_g.label) {
14167
+ case 0:
14168
+ _a = this.options, _b = _a.rootDirname, rootDirname = _b === void 0 ? process.cwd() : _b, _c = _a.cacheDirname, cacheDirname = _c === void 0 ? DEFAULT_SCRAPE_CACHE_DIRNAME : _c, _d = _a.intermediateFilesStrategy, intermediateFilesStrategy = _d === void 0 ? DEFAULT_INTERMEDIATE_FILES_STRATEGY : _d, _e = _a.isVerbose, isVerbose = _e === void 0 ? DEFAULT_IS_VERBOSE : _e;
14169
+ // [🏄] !!!!!!!
14170
+ keepUnused(rootDirname, cacheDirname, intermediateFilesStrategy, isVerbose);
14171
+ if (source.url === null) {
14172
+ throw new KnowledgeScrapeError('Website scraper requires URL');
14173
+ }
14174
+ if (this.tools.fs === undefined) {
14175
+ throw new EnvironmentMismatchError('Can not scrape websites without filesystem tools');
14176
+ }
14177
+ _f = JSDOM.bind;
14178
+ return [4 /*yield*/, source.asText()];
14179
+ case 1:
14180
+ jsdom = new (_f.apply(JSDOM, [void 0, _g.sent(), {
14181
+ // <- TODO: !!!!!!! Problem with build is probbably in `new JSDOM();`
14182
+ url: source.url,
14183
+ }]))();
14184
+ // [🏄] !!!!!!!
14185
+ keepUnused(jsdom);
14186
+ reader = new Readability(jsdom.window.document);
14187
+ article = reader.parse();
14188
+ html = (article === null || article === void 0 ? void 0 : article.content) || (article === null || article === void 0 ? void 0 : article.textContent) || jsdom.window.document.body.innerHTML;
14189
+ // Note: Unwrap html such as it is convertable by `markdownConverter`
14190
+ for (i = 0; i < 2; i++) {
14191
+ html = html.replace(/<div\s*(?:id="readability-page-\d+"\s+class="page")?>(.*)<\/div>/is, '$1');
14192
+ }
14193
+ if (html.includes('<div')) {
14194
+ html = (article === null || article === void 0 ? void 0 : article.textContent) || '';
14195
+ }
14196
+ return [4 /*yield*/, getScraperIntermediateSource(source, {
14197
+ rootDirname: rootDirname,
14198
+ cacheDirname: cacheDirname,
14199
+ intermediateFilesStrategy: intermediateFilesStrategy,
14200
+ extension: 'html',
14201
+ isVerbose: isVerbose,
14202
+ })];
14203
+ case 2:
14204
+ cacheFilehandler = _g.sent();
14205
+ return [4 /*yield*/, this.tools.fs.writeFile(cacheFilehandler.filename, html, 'utf-8')];
14206
+ case 3:
14207
+ _g.sent();
14208
+ markdown = this.showdownConverter.makeMarkdown(html, jsdom.window.document);
14209
+ return [2 /*return*/, __assign(__assign({}, cacheFilehandler), { markdown: markdown })];
14210
+ }
14152
14211
  });
14153
14212
  });
14154
14213
  };