@promptbook/cli 0.72.0-30 → 0.72.0-31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/index.es.js
CHANGED
|
@@ -2,7 +2,7 @@ import commander from 'commander';
|
|
|
2
2
|
import spaceTrim$1, { spaceTrim } from 'spacetrim';
|
|
3
3
|
import colors from 'colors';
|
|
4
4
|
import { forTime } from 'waitasecond';
|
|
5
|
-
import { stat, access, constants, readFile,
|
|
5
|
+
import { stat, access, constants, readFile, writeFile, readdir, mkdir, unlink, rm, rmdir, rename } from 'fs/promises';
|
|
6
6
|
import { join, basename, dirname } from 'path';
|
|
7
7
|
import { format } from 'prettier';
|
|
8
8
|
import parserHtml from 'prettier/parser-html';
|
|
@@ -27,7 +27,7 @@ import { Converter } from 'showdown';
|
|
|
27
27
|
/**
|
|
28
28
|
* The version of the Promptbook library
|
|
29
29
|
*/
|
|
30
|
-
var PROMPTBOOK_VERSION = '0.72.0-
|
|
30
|
+
var PROMPTBOOK_VERSION = '0.72.0-30';
|
|
31
31
|
// TODO: [main] !!!! List here all the versions and annotate + put into script
|
|
32
32
|
|
|
33
33
|
/*! *****************************************************************************
|
|
@@ -8415,6 +8415,7 @@ function $provideFilesystemForNode(options) {
|
|
|
8415
8415
|
access: access,
|
|
8416
8416
|
constants: constants,
|
|
8417
8417
|
readFile: readFile,
|
|
8418
|
+
writeFile: writeFile,
|
|
8418
8419
|
readdir: readdir,
|
|
8419
8420
|
};
|
|
8420
8421
|
}
|
|
@@ -14120,13 +14121,26 @@ var WebsiteScraper = /** @class */ (function () {
|
|
|
14120
14121
|
this.tools = tools;
|
|
14121
14122
|
this.options = options;
|
|
14122
14123
|
this.markdownScraper = new MarkdownScraper(tools, options);
|
|
14123
|
-
|
|
14124
|
+
this.showdownConverter = createShowdownConverter();
|
|
14125
|
+
/**/
|
|
14126
|
+
// TODO: [🏄] !!!!!! Remove or describe why it is here
|
|
14124
14127
|
TODO_USE(Readability);
|
|
14125
14128
|
TODO_USE(Converter);
|
|
14126
14129
|
TODO_USE(JSDOM);
|
|
14127
|
-
TODO_USE(new JSDOM());
|
|
14128
14130
|
TODO_USE(createShowdownConverter);
|
|
14129
|
-
|
|
14131
|
+
/**/
|
|
14132
|
+
/**/
|
|
14133
|
+
var jsdom = new JSDOM();
|
|
14134
|
+
var reader = new Readability(jsdom.window.document);
|
|
14135
|
+
keepUnused(reader);
|
|
14136
|
+
/**/
|
|
14137
|
+
/**/
|
|
14138
|
+
keepUnused(DEFAULT_INTERMEDIATE_FILES_STRATEGY, DEFAULT_IS_VERBOSE, DEFAULT_SCRAPE_CACHE_DIRNAME);
|
|
14139
|
+
keepUnused(EnvironmentMismatchError);
|
|
14140
|
+
keepUnused(KnowledgeScrapeError);
|
|
14141
|
+
keepUnused(getScraperIntermediateSource);
|
|
14142
|
+
keepUnused();
|
|
14143
|
+
/**/
|
|
14130
14144
|
}
|
|
14131
14145
|
Object.defineProperty(WebsiteScraper.prototype, "metadata", {
|
|
14132
14146
|
/**
|
|
@@ -14145,10 +14159,55 @@ var WebsiteScraper = /** @class */ (function () {
|
|
|
14145
14159
|
*/
|
|
14146
14160
|
WebsiteScraper.prototype.$convert = function (source) {
|
|
14147
14161
|
return __awaiter(this, void 0, void 0, function () {
|
|
14148
|
-
var
|
|
14149
|
-
|
|
14150
|
-
|
|
14151
|
-
|
|
14162
|
+
var _a, _b,
|
|
14163
|
+
// TODO: [🧠] Maybe in node use headless browser not just JSDOM
|
|
14164
|
+
rootDirname, _c, cacheDirname, _d, intermediateFilesStrategy, _e, isVerbose, jsdom, _f, reader, article, html, i, cacheFilehandler, markdown;
|
|
14165
|
+
return __generator(this, function (_g) {
|
|
14166
|
+
switch (_g.label) {
|
|
14167
|
+
case 0:
|
|
14168
|
+
_a = this.options, _b = _a.rootDirname, rootDirname = _b === void 0 ? process.cwd() : _b, _c = _a.cacheDirname, cacheDirname = _c === void 0 ? DEFAULT_SCRAPE_CACHE_DIRNAME : _c, _d = _a.intermediateFilesStrategy, intermediateFilesStrategy = _d === void 0 ? DEFAULT_INTERMEDIATE_FILES_STRATEGY : _d, _e = _a.isVerbose, isVerbose = _e === void 0 ? DEFAULT_IS_VERBOSE : _e;
|
|
14169
|
+
// [🏄] !!!!!!!
|
|
14170
|
+
keepUnused(rootDirname, cacheDirname, intermediateFilesStrategy, isVerbose);
|
|
14171
|
+
if (source.url === null) {
|
|
14172
|
+
throw new KnowledgeScrapeError('Website scraper requires URL');
|
|
14173
|
+
}
|
|
14174
|
+
if (this.tools.fs === undefined) {
|
|
14175
|
+
throw new EnvironmentMismatchError('Can not scrape websites without filesystem tools');
|
|
14176
|
+
}
|
|
14177
|
+
_f = JSDOM.bind;
|
|
14178
|
+
return [4 /*yield*/, source.asText()];
|
|
14179
|
+
case 1:
|
|
14180
|
+
jsdom = new (_f.apply(JSDOM, [void 0, _g.sent(), {
|
|
14181
|
+
// <- TODO: !!!!!!! Problem with build is probbably in `new JSDOM();`
|
|
14182
|
+
url: source.url,
|
|
14183
|
+
}]))();
|
|
14184
|
+
// [🏄] !!!!!!!
|
|
14185
|
+
keepUnused(jsdom);
|
|
14186
|
+
reader = new Readability(jsdom.window.document);
|
|
14187
|
+
article = reader.parse();
|
|
14188
|
+
html = (article === null || article === void 0 ? void 0 : article.content) || (article === null || article === void 0 ? void 0 : article.textContent) || jsdom.window.document.body.innerHTML;
|
|
14189
|
+
// Note: Unwrap html such as it is convertable by `markdownConverter`
|
|
14190
|
+
for (i = 0; i < 2; i++) {
|
|
14191
|
+
html = html.replace(/<div\s*(?:id="readability-page-\d+"\s+class="page")?>(.*)<\/div>/is, '$1');
|
|
14192
|
+
}
|
|
14193
|
+
if (html.includes('<div')) {
|
|
14194
|
+
html = (article === null || article === void 0 ? void 0 : article.textContent) || '';
|
|
14195
|
+
}
|
|
14196
|
+
return [4 /*yield*/, getScraperIntermediateSource(source, {
|
|
14197
|
+
rootDirname: rootDirname,
|
|
14198
|
+
cacheDirname: cacheDirname,
|
|
14199
|
+
intermediateFilesStrategy: intermediateFilesStrategy,
|
|
14200
|
+
extension: 'html',
|
|
14201
|
+
isVerbose: isVerbose,
|
|
14202
|
+
})];
|
|
14203
|
+
case 2:
|
|
14204
|
+
cacheFilehandler = _g.sent();
|
|
14205
|
+
return [4 /*yield*/, this.tools.fs.writeFile(cacheFilehandler.filename, html, 'utf-8')];
|
|
14206
|
+
case 3:
|
|
14207
|
+
_g.sent();
|
|
14208
|
+
markdown = this.showdownConverter.makeMarkdown(html, jsdom.window.document);
|
|
14209
|
+
return [2 /*return*/, __assign(__assign({}, cacheFilehandler), { markdown: markdown })];
|
|
14210
|
+
}
|
|
14152
14211
|
});
|
|
14153
14212
|
});
|
|
14154
14213
|
};
|