@promptbook/cli 0.72.0-30 → 0.72.0-32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -3,7 +3,7 @@ import type fs from 'fs/promises';
|
|
|
3
3
|
/**
|
|
4
4
|
* Container for all the tools needed to manipulate with filesystem
|
|
5
5
|
*/
|
|
6
|
-
export type FilesystemTools = Pick<typeof fs, 'access' | 'constants' | 'readFile' | 'stat' | 'readdir'>;
|
|
6
|
+
export type FilesystemTools = Pick<typeof fs, 'access' | 'constants' | 'readFile' | 'writeFile' | 'stat' | 'readdir'>;
|
|
7
7
|
/**
|
|
8
8
|
* TODO: Implement destroyable pattern to free resources
|
|
9
9
|
*/
|
package/package.json
CHANGED
package/umd/index.umd.js
CHANGED
|
@@ -39,7 +39,7 @@
|
|
|
39
39
|
/**
|
|
40
40
|
* The version of the Promptbook library
|
|
41
41
|
*/
|
|
42
|
-
var PROMPTBOOK_VERSION = '0.72.0-
|
|
42
|
+
var PROMPTBOOK_VERSION = '0.72.0-31';
|
|
43
43
|
// TODO: [main] !!!! List here all the versions and annotate + put into script
|
|
44
44
|
|
|
45
45
|
/*! *****************************************************************************
|
|
@@ -8427,6 +8427,7 @@
|
|
|
8427
8427
|
access: promises.access,
|
|
8428
8428
|
constants: promises.constants,
|
|
8429
8429
|
readFile: promises.readFile,
|
|
8430
|
+
writeFile: promises.writeFile,
|
|
8430
8431
|
readdir: promises.readdir,
|
|
8431
8432
|
};
|
|
8432
8433
|
}
|
|
@@ -14132,13 +14133,26 @@
|
|
|
14132
14133
|
this.tools = tools;
|
|
14133
14134
|
this.options = options;
|
|
14134
14135
|
this.markdownScraper = new MarkdownScraper(tools, options);
|
|
14135
|
-
|
|
14136
|
+
this.showdownConverter = createShowdownConverter();
|
|
14137
|
+
/**/
|
|
14138
|
+
// TODO: [🏄] !!!!!! Remove or describe why it is here
|
|
14136
14139
|
TODO_USE(readability.Readability);
|
|
14137
14140
|
TODO_USE(showdown.Converter);
|
|
14138
14141
|
TODO_USE(jsdom.JSDOM);
|
|
14139
|
-
TODO_USE(new jsdom.JSDOM());
|
|
14140
14142
|
TODO_USE(createShowdownConverter);
|
|
14141
|
-
|
|
14143
|
+
/**/
|
|
14144
|
+
/**/
|
|
14145
|
+
var jsdom$1 = new jsdom.JSDOM();
|
|
14146
|
+
var reader = new readability.Readability(jsdom$1.window.document);
|
|
14147
|
+
keepUnused(reader);
|
|
14148
|
+
/**/
|
|
14149
|
+
/**/
|
|
14150
|
+
keepUnused(DEFAULT_INTERMEDIATE_FILES_STRATEGY, DEFAULT_IS_VERBOSE, DEFAULT_SCRAPE_CACHE_DIRNAME);
|
|
14151
|
+
keepUnused(EnvironmentMismatchError);
|
|
14152
|
+
keepUnused(KnowledgeScrapeError);
|
|
14153
|
+
keepUnused(getScraperIntermediateSource);
|
|
14154
|
+
keepUnused();
|
|
14155
|
+
/**/
|
|
14142
14156
|
}
|
|
14143
14157
|
Object.defineProperty(WebsiteScraper.prototype, "metadata", {
|
|
14144
14158
|
/**
|
|
@@ -14157,10 +14171,55 @@
|
|
|
14157
14171
|
*/
|
|
14158
14172
|
WebsiteScraper.prototype.$convert = function (source) {
|
|
14159
14173
|
return __awaiter(this, void 0, void 0, function () {
|
|
14160
|
-
var
|
|
14161
|
-
|
|
14162
|
-
|
|
14163
|
-
|
|
14174
|
+
var _a, _b,
|
|
14175
|
+
// TODO: [🧠] Maybe in node use headless browser not just JSDOM
|
|
14176
|
+
rootDirname, _c, cacheDirname, _d, intermediateFilesStrategy, _e, isVerbose, jsdom$1, _f, reader, article, html, i, cacheFilehandler, markdown;
|
|
14177
|
+
return __generator(this, function (_g) {
|
|
14178
|
+
switch (_g.label) {
|
|
14179
|
+
case 0:
|
|
14180
|
+
_a = this.options, _b = _a.rootDirname, rootDirname = _b === void 0 ? process.cwd() : _b, _c = _a.cacheDirname, cacheDirname = _c === void 0 ? DEFAULT_SCRAPE_CACHE_DIRNAME : _c, _d = _a.intermediateFilesStrategy, intermediateFilesStrategy = _d === void 0 ? DEFAULT_INTERMEDIATE_FILES_STRATEGY : _d, _e = _a.isVerbose, isVerbose = _e === void 0 ? DEFAULT_IS_VERBOSE : _e;
|
|
14181
|
+
// [🏄] !!!!!!!
|
|
14182
|
+
keepUnused(rootDirname, cacheDirname, intermediateFilesStrategy, isVerbose);
|
|
14183
|
+
if (source.url === null) {
|
|
14184
|
+
throw new KnowledgeScrapeError('Website scraper requires URL');
|
|
14185
|
+
}
|
|
14186
|
+
if (this.tools.fs === undefined) {
|
|
14187
|
+
throw new EnvironmentMismatchError('Can not scrape websites without filesystem tools');
|
|
14188
|
+
}
|
|
14189
|
+
_f = jsdom.JSDOM.bind;
|
|
14190
|
+
return [4 /*yield*/, source.asText()];
|
|
14191
|
+
case 1:
|
|
14192
|
+
jsdom$1 = new (_f.apply(jsdom.JSDOM, [void 0, _g.sent(), {
|
|
14193
|
+
// <- TODO: !!!!!!! Problem with build is probbably in `new JSDOM();`
|
|
14194
|
+
url: source.url,
|
|
14195
|
+
}]))();
|
|
14196
|
+
// [🏄] !!!!!!!
|
|
14197
|
+
keepUnused(jsdom$1);
|
|
14198
|
+
reader = new readability.Readability(jsdom$1.window.document);
|
|
14199
|
+
article = reader.parse();
|
|
14200
|
+
html = (article === null || article === void 0 ? void 0 : article.content) || (article === null || article === void 0 ? void 0 : article.textContent) || jsdom$1.window.document.body.innerHTML;
|
|
14201
|
+
// Note: Unwrap html such as it is convertable by `markdownConverter`
|
|
14202
|
+
for (i = 0; i < 2; i++) {
|
|
14203
|
+
html = html.replace(/<div\s*(?:id="readability-page-\d+"\s+class="page")?>(.*)<\/div>/is, '$1');
|
|
14204
|
+
}
|
|
14205
|
+
if (html.includes('<div')) {
|
|
14206
|
+
html = (article === null || article === void 0 ? void 0 : article.textContent) || '';
|
|
14207
|
+
}
|
|
14208
|
+
return [4 /*yield*/, getScraperIntermediateSource(source, {
|
|
14209
|
+
rootDirname: rootDirname,
|
|
14210
|
+
cacheDirname: cacheDirname,
|
|
14211
|
+
intermediateFilesStrategy: intermediateFilesStrategy,
|
|
14212
|
+
extension: 'html',
|
|
14213
|
+
isVerbose: isVerbose,
|
|
14214
|
+
})];
|
|
14215
|
+
case 2:
|
|
14216
|
+
cacheFilehandler = _g.sent();
|
|
14217
|
+
return [4 /*yield*/, this.tools.fs.writeFile(cacheFilehandler.filename, html, 'utf-8')];
|
|
14218
|
+
case 3:
|
|
14219
|
+
_g.sent();
|
|
14220
|
+
markdown = this.showdownConverter.makeMarkdown(html, jsdom$1.window.document);
|
|
14221
|
+
return [2 /*return*/, __assign(__assign({}, cacheFilehandler), { markdown: markdown })];
|
|
14222
|
+
}
|
|
14164
14223
|
});
|
|
14165
14224
|
});
|
|
14166
14225
|
};
|