@memberjunction/content-autotagging 3.4.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/CloudStorage/generic/CloudStorageBase.d.ts +28 -0
- package/dist/CloudStorage/generic/CloudStorageBase.d.ts.map +1 -0
- package/dist/CloudStorage/generic/CloudStorageBase.js +38 -0
- package/dist/CloudStorage/generic/CloudStorageBase.js.map +1 -0
- package/dist/CloudStorage/index.d.ts +3 -0
- package/dist/CloudStorage/index.d.ts.map +1 -0
- package/dist/CloudStorage/index.js +3 -0
- package/dist/CloudStorage/index.js.map +1 -0
- package/dist/CloudStorage/providers/AutotagAzureBlob.d.ts +20 -0
- package/dist/CloudStorage/providers/AutotagAzureBlob.d.ts.map +1 -0
- package/dist/CloudStorage/providers/AutotagAzureBlob.js +86 -0
- package/dist/CloudStorage/providers/AutotagAzureBlob.js.map +1 -0
- package/dist/Core/generic/AutotagBase.d.ts +7 -0
- package/dist/Core/generic/AutotagBase.d.ts.map +1 -0
- package/dist/Core/generic/AutotagBase.js +3 -0
- package/dist/Core/generic/AutotagBase.js.map +1 -0
- package/dist/Core/index.d.ts +2 -0
- package/dist/Core/index.d.ts.map +1 -0
- package/dist/Core/index.js +2 -0
- package/dist/Core/index.js.map +1 -0
- package/dist/Engine/generic/AutotagBaseEngine.d.ts +131 -0
- package/dist/Engine/generic/AutotagBaseEngine.d.ts.map +1 -0
- package/dist/Engine/generic/AutotagBaseEngine.js +620 -0
- package/dist/Engine/generic/AutotagBaseEngine.js.map +1 -0
- package/dist/Engine/generic/content.types.d.ts +32 -0
- package/dist/Engine/generic/content.types.d.ts.map +1 -0
- package/dist/Engine/generic/content.types.js +7 -0
- package/dist/Engine/generic/content.types.js.map +1 -0
- package/dist/Engine/generic/process.types.d.ts +30 -0
- package/dist/Engine/generic/process.types.d.ts.map +1 -0
- package/dist/Engine/generic/process.types.js +7 -0
- package/dist/Engine/generic/process.types.js.map +1 -0
- package/dist/Engine/index.d.ts +4 -0
- package/dist/Engine/index.d.ts.map +1 -0
- package/dist/Engine/index.js +4 -0
- package/dist/Engine/index.js.map +1 -0
- package/dist/Entity/generic/AutotagEntity.d.ts +19 -0
- package/dist/Entity/generic/AutotagEntity.d.ts.map +1 -0
- package/dist/Entity/generic/AutotagEntity.js +127 -0
- package/dist/Entity/generic/AutotagEntity.js.map +1 -0
- package/dist/Entity/index.d.ts +2 -0
- package/dist/Entity/index.d.ts.map +1 -0
- package/dist/Entity/index.js +2 -0
- package/dist/Entity/index.js.map +1 -0
- package/dist/LocalFileSystem/generic/AutotagLocalFileSystem.d.ts +39 -0
- package/dist/LocalFileSystem/generic/AutotagLocalFileSystem.d.ts.map +1 -0
- package/dist/LocalFileSystem/generic/AutotagLocalFileSystem.js +171 -0
- package/dist/LocalFileSystem/generic/AutotagLocalFileSystem.js.map +1 -0
- package/dist/LocalFileSystem/index.d.ts +2 -0
- package/dist/LocalFileSystem/index.d.ts.map +1 -0
- package/dist/LocalFileSystem/index.js +2 -0
- package/dist/LocalFileSystem/index.js.map +1 -0
- package/dist/RSSFeed/generic/AutotagRSSFeed.d.ts +30 -0
- package/dist/RSSFeed/generic/AutotagRSSFeed.d.ts.map +1 -0
- package/dist/RSSFeed/generic/AutotagRSSFeed.js +177 -0
- package/dist/RSSFeed/generic/AutotagRSSFeed.js.map +1 -0
- package/dist/RSSFeed/generic/RSS.types.d.ts +13 -0
- package/dist/RSSFeed/generic/RSS.types.d.ts.map +1 -0
- package/dist/RSSFeed/generic/RSS.types.js +3 -0
- package/dist/RSSFeed/generic/RSS.types.js.map +1 -0
- package/dist/RSSFeed/index.d.ts +3 -0
- package/dist/RSSFeed/index.d.ts.map +1 -0
- package/dist/RSSFeed/index.js +3 -0
- package/dist/RSSFeed/index.js.map +1 -0
- package/dist/Websites/generic/AutotagWebsite.d.ts +85 -0
- package/dist/Websites/generic/AutotagWebsite.d.ts.map +1 -0
- package/dist/Websites/generic/AutotagWebsite.js +355 -0
- package/dist/Websites/generic/AutotagWebsite.js.map +1 -0
- package/dist/Websites/index.d.ts +2 -0
- package/dist/Websites/index.d.ts.map +1 -0
- package/dist/Websites/index.js +2 -0
- package/dist/Websites/index.js.map +1 -0
- package/dist/index.d.ts +7 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +7 -0
- package/dist/index.js.map +1 -0
- package/dist/src/CloudStorage/generic/CloudStorageBase.d.ts +3 -3
- package/dist/src/CloudStorage/generic/CloudStorageBase.js +2 -2
- package/dist/src/CloudStorage/index.d.ts +2 -2
- package/dist/src/CloudStorage/index.js +2 -2
- package/dist/src/CloudStorage/providers/AutotagAzureBlob.d.ts +2 -2
- package/dist/src/CloudStorage/providers/AutotagAzureBlob.js +1 -1
- package/dist/src/Core/index.d.ts +1 -1
- package/dist/src/Core/index.js +1 -1
- package/dist/src/Engine/generic/AutotagBaseEngine.d.ts +2 -2
- package/dist/src/Engine/generic/AutotagBaseEngine.js +2 -2
- package/dist/src/Engine/index.d.ts +3 -3
- package/dist/src/Engine/index.js +3 -3
- package/dist/src/Entity/generic/AutotagEntity.d.ts +2 -2
- package/dist/src/Entity/generic/AutotagEntity.js +2 -2
- package/dist/src/Entity/index.d.ts +1 -1
- package/dist/src/Entity/index.js +1 -1
- package/dist/src/LocalFileSystem/generic/AutotagLocalFileSystem.d.ts +2 -2
- package/dist/src/LocalFileSystem/generic/AutotagLocalFileSystem.js +2 -2
- package/dist/src/LocalFileSystem/index.d.ts +1 -1
- package/dist/src/LocalFileSystem/index.js +1 -1
- package/dist/src/RSSFeed/generic/AutotagRSSFeed.d.ts +3 -3
- package/dist/src/RSSFeed/generic/AutotagRSSFeed.js +3 -3
- package/dist/src/RSSFeed/index.d.ts +2 -2
- package/dist/src/RSSFeed/index.js +2 -2
- package/dist/src/Websites/generic/AutotagWebsite.d.ts +2 -2
- package/dist/src/Websites/generic/AutotagWebsite.js +2 -2
- package/dist/src/Websites/index.d.ts +1 -1
- package/dist/src/Websites/index.js +1 -1
- package/dist/src/index.d.ts +6 -6
- package/dist/src/index.js +6 -6
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +21 -20
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
|
|
2
|
+
var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
|
|
3
|
+
if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
|
|
4
|
+
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
|
|
5
|
+
return c > 3 && r && Object.defineProperty(target, key, r), r;
|
|
6
|
+
};
|
|
7
|
+
var __metadata = (this && this.__metadata) || function (k, v) {
|
|
8
|
+
if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
|
|
9
|
+
};
|
|
10
|
+
import { AutotagBase } from '../../Core/index.js';
|
|
11
|
+
import { AutotagBaseEngine } from '../../Engine/index.js';
|
|
12
|
+
import { RegisterClass } from '@memberjunction/global';
|
|
13
|
+
import { Metadata, RunView } from '@memberjunction/core';
|
|
14
|
+
import * as cheerio from 'cheerio';
|
|
15
|
+
import axios from 'axios';
|
|
16
|
+
import { URL } from 'url';
|
|
17
|
+
import dotenv from 'dotenv';
|
|
18
|
+
dotenv.config({ quiet: true });
|
|
19
|
+
let AutotagWebsite = class AutotagWebsite extends AutotagBase {
|
|
20
|
+
constructor() {
|
|
21
|
+
super();
|
|
22
|
+
this.engine = AutotagBaseEngine.Instance;
|
|
23
|
+
this.visitedURLs = new Set();
|
|
24
|
+
}
|
|
25
|
+
getContextUser() {
|
|
26
|
+
return this.contextUser;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Implemented abstract method from the AutotagBase class. that runs the entire autotagging process. This method is the entry point for the autotagging process.
|
|
30
|
+
* It initializes the connection, retrieves the content sources corresponding to the content source type, sets the content items that we want to process,
|
|
31
|
+
* extracts and processes the text, and sets the results in the database.
|
|
32
|
+
*/
|
|
33
|
+
async Autotag(contextUser) {
|
|
34
|
+
this.contextUser = contextUser;
|
|
35
|
+
this.contentSourceTypeID = await this.engine.setSubclassContentSourceType('Website', this.contextUser);
|
|
36
|
+
const contentSources = await this.engine.getAllContentSources(this.contextUser, this.contentSourceTypeID);
|
|
37
|
+
const contentItemsToProcess = await this.SetContentItemsToProcess(contentSources);
|
|
38
|
+
await this.engine.ExtractTextAndProcessWithLLM(contentItemsToProcess, this.contextUser);
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Given a content source, retrieve all content items associated with the content sources.
|
|
42
|
+
* The content items are then processed to determine if they have been modified since the last time they were processed or if they are new content items.
|
|
43
|
+
* @param contentSource
|
|
44
|
+
* @returns
|
|
45
|
+
*/
|
|
46
|
+
async SetContentItemsToProcess(contentSources) {
|
|
47
|
+
const contentItemsToProcess = [];
|
|
48
|
+
// If content source parameters were provided, set them. Otherwise, use the default values.
|
|
49
|
+
for (const contentSource of contentSources) {
|
|
50
|
+
const contentSourceParamsMap = await this.engine.getContentSourceParams(contentSource, this.contextUser);
|
|
51
|
+
if (contentSourceParamsMap) {
|
|
52
|
+
// Override defaults with content source specific params
|
|
53
|
+
contentSourceParamsMap.forEach((value, key) => {
|
|
54
|
+
if (key in this) {
|
|
55
|
+
this[key] = value;
|
|
56
|
+
}
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
const contentSourceParams = {
|
|
60
|
+
contentSourceID: contentSource.ID,
|
|
61
|
+
name: contentSource.Name,
|
|
62
|
+
ContentTypeID: contentSource.ContentTypeID,
|
|
63
|
+
ContentFileTypeID: contentSource.ContentFileTypeID,
|
|
64
|
+
ContentSourceTypeID: contentSource.ContentSourceTypeID,
|
|
65
|
+
URL: contentSource.URL
|
|
66
|
+
};
|
|
67
|
+
try {
|
|
68
|
+
// All content items associated with the content source
|
|
69
|
+
const startURL = contentSourceParams.URL;
|
|
70
|
+
// root url should be set to this.RootURL if it exists, otherwise it should be set to the base path of the startURL.
|
|
71
|
+
const rootURL = this.RootURL ? this.RootURL : this.getBasePath(startURL);
|
|
72
|
+
// regex should be set to this.URLPattern if it exists, otherwise it should be set to match any URL.
|
|
73
|
+
const regex = this.URLPattern && new RegExp(this.URLPattern) || new RegExp('.*');
|
|
74
|
+
const allContentItemLinks = await this.getAllLinksFromContentSource(startURL, rootURL, regex);
|
|
75
|
+
const contentItems = await this.SetNewAndModifiedContentItems(allContentItemLinks, contentSourceParams, this.contextUser);
|
|
76
|
+
if (contentItems && contentItems.length > 0) {
|
|
77
|
+
contentItemsToProcess.push(...contentItems);
|
|
78
|
+
}
|
|
79
|
+
else {
|
|
80
|
+
// No content items found to process
|
|
81
|
+
console.log(`No content items found to process for content source: ${contentSource.Get('Name')}`);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
catch (e) {
|
|
85
|
+
console.error(`Failed to process content source: ${contentSource.Get('Name')}`);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
return contentItemsToProcess;
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Given a list of content item links, check if the content item already exists in the database.
|
|
92
|
+
* If the content item exists, check if the content item has been modified since the last time it was processed.
|
|
93
|
+
* If the content item does not exist, create a new content item and add it to the list of content items to process.
|
|
94
|
+
* @param contentItemLinks
|
|
95
|
+
* @param contentSourceParams
|
|
96
|
+
* @param contextUser
|
|
97
|
+
* @returns
|
|
98
|
+
*/
|
|
99
|
+
async SetNewAndModifiedContentItems(contentItemLinks, contentSourceParams, contextUser) {
|
|
100
|
+
const addedContentItems = [];
|
|
101
|
+
for (const contentItemLink of contentItemLinks) {
|
|
102
|
+
try {
|
|
103
|
+
const newHash = await this.engine.getChecksumFromURL(contentItemLink);
|
|
104
|
+
const rv = new RunView();
|
|
105
|
+
const results = await rv.RunViews([
|
|
106
|
+
{
|
|
107
|
+
EntityName: 'Content Items',
|
|
108
|
+
ExtraFilter: `Checksum = '${newHash}'`,
|
|
109
|
+
ResultType: 'entity_object'
|
|
110
|
+
},
|
|
111
|
+
{
|
|
112
|
+
EntityName: 'Content Items',
|
|
113
|
+
ExtraFilter: `ContentSourceID = '${contentSourceParams.contentSourceID}' AND URL = '${contentItemLink}'`,
|
|
114
|
+
ResultType: 'entity_object'
|
|
115
|
+
}
|
|
116
|
+
], this.contextUser);
|
|
117
|
+
const contentItemResultsWithChecksum = results[0];
|
|
118
|
+
const contentItemResultsWithURL = results[1];
|
|
119
|
+
if (contentItemResultsWithChecksum.Success && contentItemResultsWithChecksum.Results.length) {
|
|
120
|
+
// We found the checksum so this content item has not changed since we last accessed it, do nothing
|
|
121
|
+
continue;
|
|
122
|
+
}
|
|
123
|
+
else if (contentItemResultsWithURL.Success && contentItemResultsWithURL.Results.length) {
|
|
124
|
+
// This content item already exists, update the hash and last updated date
|
|
125
|
+
const contentItemResult = contentItemResultsWithURL.Results[0];
|
|
126
|
+
const lastStoredHash = contentItemResult.Checksum;
|
|
127
|
+
if (lastStoredHash !== newHash) {
|
|
128
|
+
// This content item has changed since we last access it, update the hash and last updated date
|
|
129
|
+
const md = new Metadata();
|
|
130
|
+
const contentItem = await md.GetEntityObject('Content Items', this.contextUser);
|
|
131
|
+
contentItem.Load(contentItemResult.ID);
|
|
132
|
+
contentItem.Checksum = newHash;
|
|
133
|
+
contentItem.Text = await this.parseWebPage(contentItemLink);
|
|
134
|
+
await contentItem.Save();
|
|
135
|
+
addedContentItems.push(contentItem); // Content item was modified, add to list
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
else {
|
|
139
|
+
// This content item does not exist, add it
|
|
140
|
+
const md = new Metadata();
|
|
141
|
+
const contentItem = await md.GetEntityObject('Content Items', this.contextUser);
|
|
142
|
+
contentItem.ContentSourceID = contentSourceParams.contentSourceID;
|
|
143
|
+
contentItem.Name = this.getPathName(contentItemLink); // Will get overwritten by title later if it exists
|
|
144
|
+
contentItem.Description = await this.engine.getContentItemDescription(contentSourceParams, this.contextUser);
|
|
145
|
+
contentItem.ContentTypeID = contentSourceParams.ContentTypeID;
|
|
146
|
+
contentItem.ContentFileTypeID = contentSourceParams.ContentFileTypeID;
|
|
147
|
+
contentItem.ContentSourceTypeID = contentSourceParams.ContentSourceTypeID;
|
|
148
|
+
contentItem.Checksum = await this.engine.getChecksumFromURL(contentItemLink);
|
|
149
|
+
contentItem.URL = contentItemLink;
|
|
150
|
+
contentItem.Text = await this.parseWebPage(contentItemLink);
|
|
151
|
+
await contentItem.Save();
|
|
152
|
+
addedContentItems.push(contentItem); // Content item was added, add to list
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
catch (e) {
|
|
156
|
+
console.log(e);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
return addedContentItems;
|
|
160
|
+
}
|
|
161
|
+
async fetchPageContent(url) {
|
|
162
|
+
const { data } = await axios.get(url);
|
|
163
|
+
return data;
|
|
164
|
+
}
|
|
165
|
+
getTextWithLineBreaks(element, $) {
|
|
166
|
+
let text = '';
|
|
167
|
+
const children = $(element).contents();
|
|
168
|
+
for (let i = 0; i < children.length; i++) {
|
|
169
|
+
const el = children[i];
|
|
170
|
+
if (el.type === 'text') {
|
|
171
|
+
text += $(el).text().trim() + ' ';
|
|
172
|
+
}
|
|
173
|
+
else if (el.type === 'tag') {
|
|
174
|
+
text += '\n' + this.getTextWithLineBreaks(el, $) + '\n';
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
return text;
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Given a URL, this function extracts text from a webpage.
|
|
181
|
+
* @param url
|
|
182
|
+
* @returns The text extracted from the webpage
|
|
183
|
+
*/
|
|
184
|
+
async parseWebPage(url) {
|
|
185
|
+
try {
|
|
186
|
+
const pageContent = await this.fetchPageContent(url);
|
|
187
|
+
const $ = cheerio.load(pageContent);
|
|
188
|
+
const text = this.getTextWithLineBreaks($('body')[0], $);
|
|
189
|
+
return text;
|
|
190
|
+
}
|
|
191
|
+
catch (error) {
|
|
192
|
+
console.error(`Error processing ${url}:`, error);
|
|
193
|
+
return '';
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
/**
|
|
197
|
+
* Given a root URL that corresponds to a content source, retrieve all the links in accordance to the crawl settings.
|
|
198
|
+
* If the crawl settings are set to crawl other sites in the top level domain, then all links in the top level domain will be retrieved.
|
|
199
|
+
* If the crawl settings are set to crawl sites in lower level domains, then function is recursively called to retrieve all links in the lower level domains.
|
|
200
|
+
* @param url
|
|
201
|
+
* @returns
|
|
202
|
+
*/
|
|
203
|
+
async getAllLinksFromContentSource(url, rootURL, regex) {
|
|
204
|
+
try {
|
|
205
|
+
await this.getLowerLevelLinks(url, rootURL, this.MaxDepth, new Set(), regex);
|
|
206
|
+
await this.getTopLevelLinks(url, this.getBasePath(url));
|
|
207
|
+
return Array.from(this.visitedURLs);
|
|
208
|
+
}
|
|
209
|
+
catch (e) {
|
|
210
|
+
console.error(`Failed to get links from ${url}`);
|
|
211
|
+
return [];
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
/**
|
|
215
|
+
* For a given URL, retrieves all other links at that top level domain.
|
|
216
|
+
* @param url
|
|
217
|
+
* @param rootURL
|
|
218
|
+
* @param visitedURLs
|
|
219
|
+
* @returns
|
|
220
|
+
*/
|
|
221
|
+
async getTopLevelLinks(url, rootURL) {
|
|
222
|
+
if (!this.CrawlOtherSitesInTopLevelDomain) {
|
|
223
|
+
this.visitedURLs.add(url);
|
|
224
|
+
return;
|
|
225
|
+
}
|
|
226
|
+
// If we have already visited this URL, return an empty array
|
|
227
|
+
if (this.visitedURLs.has(url) || !await this.urlIsValid(url) || this.isHighestDomain(url)) {
|
|
228
|
+
return;
|
|
229
|
+
}
|
|
230
|
+
this.visitedURLs.add(url);
|
|
231
|
+
try {
|
|
232
|
+
const { data } = await axios.get(url);
|
|
233
|
+
const $ = cheerio.load(data);
|
|
234
|
+
// Get all links on the page for the current URL
|
|
235
|
+
$('a').each((_, element) => {
|
|
236
|
+
const link = $(element).attr('href');
|
|
237
|
+
if (link) {
|
|
238
|
+
const newURL = new URL(link, url).href;
|
|
239
|
+
if (newURL.startsWith(rootURL) && !this.visitedURLs.has(newURL)) {
|
|
240
|
+
this.visitedURLs.add(newURL);
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
});
|
|
244
|
+
await this.delay(1000); // Delay to prevent rate limiting
|
|
245
|
+
}
|
|
246
|
+
catch (e) {
|
|
247
|
+
console.error(`Failed to get links from ${url}`);
|
|
248
|
+
return;
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
/**
|
|
252
|
+
* Simple check to see if the URL is at the highest level domain.
|
|
253
|
+
* @param url
|
|
254
|
+
* @returns
|
|
255
|
+
*/
|
|
256
|
+
isHighestDomain(url) {
|
|
257
|
+
try {
|
|
258
|
+
const parsedURL = new URL(url);
|
|
259
|
+
return parsedURL.pathname === '/' || parsedURL.pathname === '';
|
|
260
|
+
}
|
|
261
|
+
catch (e) {
|
|
262
|
+
console.error(`Invalid URL for same level parsing: ${url}`);
|
|
263
|
+
throw e;
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
getBasePath(url) {
|
|
267
|
+
const parsedURL = new URL(url);
|
|
268
|
+
const pathSegments = parsedURL.pathname.split('/').filter(segment => segment);
|
|
269
|
+
if (pathSegments.length > 0) {
|
|
270
|
+
pathSegments.pop(); //Remove last segment so that we are in the same level domain
|
|
271
|
+
}
|
|
272
|
+
const basePath = parsedURL.origin + '/' + pathSegments.join('/');
|
|
273
|
+
return basePath;
|
|
274
|
+
}
|
|
275
|
+
// Creates a URL from input string and returns the path name in the form abc.com/xyz
|
|
276
|
+
getPathName(url) {
|
|
277
|
+
try {
|
|
278
|
+
const parsedURL = new URL(url);
|
|
279
|
+
const pathSegments = parsedURL.pathname.split('/').filter(segment => segment);
|
|
280
|
+
const path = parsedURL.origin + '/' + pathSegments.join('/');
|
|
281
|
+
return path;
|
|
282
|
+
}
|
|
283
|
+
catch (e) {
|
|
284
|
+
console.error(`Invalid URL for same level parsing: ${url}`);
|
|
285
|
+
throw e;
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
async urlIsValid(url) {
|
|
289
|
+
try {
|
|
290
|
+
const response = await axios.head(url);
|
|
291
|
+
return response.status === 200;
|
|
292
|
+
}
|
|
293
|
+
catch (e) {
|
|
294
|
+
console.error(`Invalid URL: ${url}`);
|
|
295
|
+
return false;
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
/**
|
|
299
|
+
* For a given URL, retrieves all links at lower level domains up to the specified crawl depth.
|
|
300
|
+
* @param url
|
|
301
|
+
* @param rootURL
|
|
302
|
+
* @param crawlDepth
|
|
303
|
+
* @param visitedURLs
|
|
304
|
+
* @returns
|
|
305
|
+
*/
|
|
306
|
+
async getLowerLevelLinks(url, rootURL, crawlDepth, scrapedURLs, regex) {
|
|
307
|
+
try {
|
|
308
|
+
console.log(`Scraping ${url}`);
|
|
309
|
+
// If we have already visited this URL, return an empty array
|
|
310
|
+
if (scrapedURLs.has(url) || await this.urlIsValid(url) === false || crawlDepth < 0 || !this.CrawlSitesInLowerLevelDomain) {
|
|
311
|
+
return new Set();
|
|
312
|
+
}
|
|
313
|
+
let combinedLinks = new Set(); // Combined links from the current URL and all lower level URLs
|
|
314
|
+
const extractedLinks = new Set(); // Links extracted from the input URL
|
|
315
|
+
const { data } = await axios.get(url);
|
|
316
|
+
const $ = cheerio.load(data);
|
|
317
|
+
// Get all links on the page for the current URL
|
|
318
|
+
$('a').each((_, element) => {
|
|
319
|
+
const link = $(element).attr('href');
|
|
320
|
+
if (link) {
|
|
321
|
+
const newURL = new URL(link, url).href;
|
|
322
|
+
if (newURL.startsWith(rootURL) && newURL !== url && !this.visitedURLs.has(newURL) && regex.test(newURL)) {
|
|
323
|
+
extractedLinks.add(newURL);
|
|
324
|
+
this.visitedURLs.add(newURL);
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
});
|
|
328
|
+
await this.delay(1000); // Delay to prevent rate limiting
|
|
329
|
+
scrapedURLs.add(url);
|
|
330
|
+
// If we are at the depth limit, return the current set of URLs and don't recurse
|
|
331
|
+
if (crawlDepth === 0) {
|
|
332
|
+
return extractedLinks;
|
|
333
|
+
}
|
|
334
|
+
for (const subLink of extractedLinks) {
|
|
335
|
+
//console.log(`Adding ${subLink}`);
|
|
336
|
+
const lowerLevelLinks = await this.getLowerLevelLinks(subLink, rootURL, crawlDepth - 1, scrapedURLs, regex);
|
|
337
|
+
combinedLinks = new Set([...extractedLinks, ...lowerLevelLinks]);
|
|
338
|
+
}
|
|
339
|
+
return combinedLinks;
|
|
340
|
+
}
|
|
341
|
+
catch (e) {
|
|
342
|
+
console.error(`Failed to get links from ${url}`);
|
|
343
|
+
return new Set();
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
async delay(ms) {
|
|
347
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
348
|
+
}
|
|
349
|
+
};
|
|
350
|
+
AutotagWebsite = __decorate([
|
|
351
|
+
RegisterClass(AutotagBase, 'AutotagWebsite'),
|
|
352
|
+
__metadata("design:paramtypes", [])
|
|
353
|
+
], AutotagWebsite);
|
|
354
|
+
export { AutotagWebsite };
|
|
355
|
+
//# sourceMappingURL=AutotagWebsite.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AutotagWebsite.js","sourceRoot":"","sources":["../../../src/Websites/generic/AutotagWebsite.ts"],"names":[],"mappings":";;;;;;;;;AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AACzC,OAAO,EAAE,iBAAiB,EAAuB,MAAM,cAAc,CAAC;AACtE,OAAO,EAAE,aAAa,EAAE,MAAM,wBAAwB,CAAC;AACvD,OAAO,EAAY,QAAQ,EAAE,OAAO,EAAE,MAAM,sBAAsB,CAAC;AAEnE,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AACnC,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,GAAG,EAAE,MAAM,KAAK,CAAC;AAC1B,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,MAAM,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAA;AAGvB,IAAM,cAAc,GAApB,MAAM,cAAe,SAAQ,WAAW;IAW3C;QACI,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,MAAM,GAAG,iBAAiB,CAAC,QAAQ,CAAC;QACzC,IAAI,CAAC,WAAW,GAAG,IAAI,GAAG,EAAU,CAAC;IACzC,CAAC;IAES,cAAc;QACpB,OAAO,IAAI,CAAC,WAAW,CAAC;IAC5B,CAAC;IAED;;;;OAIG;IACI,KAAK,CAAC,OAAO,CAAC,WAAqB;QACtC,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;QAC/B,IAAI,CAAC,mBAAmB,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,4BAA4B,CAAC,SAAS,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;QACvG,MAAM,cAAc,GAA0B,MAAM,IAAI,CAAC,MAAM,CAAC,oBAAoB,CAAC,IAAI,CAAC,WAAW,EAAE,IAAI,CAAC,mBAAmB,CAAC,CAAC;QACjI,MAAM,qBAAqB,GAAwB,MAAM,IAAI,CAAC,wBAAwB,CAAC,cAAc,CAAC,CAAC;QACvG,MAAM,IAAI,CAAC,MAAM,CAAC,4BAA4B,CAAC,qBAAqB,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;IAC5F,CAAC;IAGD;;;;;OAKG;IACI,KAAK,CAAC,wBAAwB,CAAC,cAAqC;QACvE,MAAM,qBAAqB,GAAwB,EAAE,CAAA;QAErD,2FAA2F;QAC3F,KAAK,MAAM,aAAa,IAAI,cAAc,EAAE,CAAC;YACzC,MAAM,sBAAsB,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,sBAAsB,CAAC,aAAa,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;YACzG,IAAI,sBAAsB,EAAE,CAAC;gBACzB,wDAAwD;gBACxD,sBAAsB,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE;oBAC1C,IAAI,GAAG,IAAI,IAAI,EAAE,CAAC;wBACb,IAAY,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;oBAC/B,CAAC;gBACL,CAAC,CAAC,CAAA;YACN,CAAC;YAED,MAAM,mBAAmB,GAAwB;gBAC7C,eAAe,EAAE,aAAa,CAAC,EAAE;gBACjC,IAAI,EAAE,aAAa,CAAC,IAAI;gBACxB,aAAa,EAAE,aAAa,CAAC,aAAa;gBAC1C,iBAAiB,EAAE,aAAa,CAAC,iBAAiB;gBAClD,mBAAmB,EAAE,aAAa,CAAC,mBAAmB;gBACtD,GAAG,EAAE,aAAa,CAAC,GAAG;aACzB,CAAA;YAED,IAAI,CAAC;gBAED,uDAAuD;gBACvD,MAAM,QAAQ,GAAW,mBAAmB,CAAC,GAAG,CAAC;gBAEjD,qHAAqH;gBACrH,MAAM,OAAO,GAAW,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;gBAEjF,oGAAoG;gBACpG,MAAM,KAAK,GAAW,IAAI,CAAC,UAAU,IAAI,IAAI,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,IAAI,MAAM,CAAC,IAAI,CAAC,CAAC;gBAEzF,MAAM,mBAAmB,GAAa,MAAM,IAAI,CAAC,4BAA4B,CAAC,QAAQ,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC;gBACxG,MAAM,YAAY,GAAwB,MAAM,IAAI,CAAC,6BAA6B,CAAC,mBAAmB,EAAE,mBAAmB,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;gBAC/I,IAAI,YAAY,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAC1C,qBAAqB,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;gBAChD,CAAC;qBACI,CAAC;oBACF,oCAAoC;oBACpC,OAAO,CAAC,GAAG,CAAC,yDAAyD,aAAa,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;gBACtG,CAAC;YACL,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACT,OAAO,CAAC,KAAK,CAAC,qCAAqC,aAAa,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;YACpF,CAAC;QACL,CAAC;QAED,OAAO,qBAAqB,CAAC;IACjC,CAAC;IAED;;;;;;;;OAQG;IACO,KAAK,CAAC,6BAA6B,CAAC,gBAA0B,EAAE,mBAAwC,EAAE,WAAqB;QAErI,MAAM,iBAAiB,GAAwB,EAAE,CAAC;QAClD,KAAK,MAAM,eAAe,IAAI,gBAAgB,EAAE,CAAC;YAC7C,IAAI,CAAC;gBACD,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC,eAAe,CAAC,CAAC;gBAEtE,MAAM,EAAE,GAAG,IAAI,OAAO,EAAE,CAAC;gBACzB,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAoB;oBACjD;wBACI,UAAU,EAAE,eAAe;wBAC3B,WAAW,EAAE,eAAe,OAAO,GAAG;wBACtC,UAAU,EAAE,eAAe;qBAC9B;oBACD;wBACI,UAAU,EAAE,eAAe;wBAC3B,WAAW,EAAE,sBAAsB,mBAAmB,CAAC,eAAe,gBAAgB,eAAe,GAAG;wBACxG,UAAU,EAAE,eAAe;qBAC9B;iBACJ,EAAE,IAAI,CAAC,WAAW,CAAC,CAAA;gBAEpB,MAAM,8BAA8B,GAAG,OAAO,CAAC,CAAC,CAAC,CAAA;gBACjD,MAAM,yBAAyB,GAAG,OAAO,CAAC,CAAC,CAAC,CAAA;gBAE5C,IAAI,8BAA8B,CAAC,OAAO,IAAI,8BAA8B,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;oBAC1F,mGAAmG;oBACnG,SAAS;gBACb,CAAC;qBAEI,IAAI,yBAAyB,CAAC,OAAO,IAAI,yBAAyB,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;oBACrF,0EAA0E;oBAC1E,MAAM,iBAAiB,GAAsB,yBAAyB,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;oBAClF,MAAM,cAAc,GAAW,iBAAiB,CAAC,QAAQ,CAAA;oBAEzD,IAAI,cAAc,KAAK,OAAO,EAAE,CAAC;wBAC7B,+FAA+F;wBAC/F,MAAM,EAAE,GAAG,IAAI,QAAQ,EAAE,CAAC;wBAC1B,MAAM,WAAW,GAAG,MAAM,EAAE,CAAC,eAAe,CAAoB,eAAe,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;wBACnG,WAAW,CAAC,IAAI,CAAC,iBAAiB,CAAC,EAAE,CAAC,CAAC;wBACvC,WAAW,CAAC,QAAQ,GAAG,OAAO,CAAA;wBAC9B,WAAW,CAAC,IAAI,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,eAAe,CAAC,CAAA;wBAE3D,MAAM,WAAW,CAAC,IAAI,EAAE,CAAC;wBACzB,iBAAiB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,yCAAyC;oBAClF,CAAC;gBACL,CAAC;qBACI,CAAC;oBACF,2CAA2C;oBAC3C,MAAM,EAAE,GAAG,IAAI,QAAQ,EAAE,CAAC;oBAC1B,MAAM,WAAW,GAAG,MAAM,EAAE,CAAC,eAAe,CAAoB,eAAe,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;oBACnG,WAAW,CAAC,eAAe,GAAG,mBAAmB,CAAC,eAAe,CAAA;oBACjE,WAAW,CAAC,IAAI,GAAG,IAAI,CAAC,WAAW,CAAC,eAAe,CAAC,CAAA,CAAC,mDAAmD;oBACxG,WAAW,CAAC,WAAW,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,yBAAyB,CAAC,mBAAmB,EAAE,IAAI,CAAC,WAAW,CAAC,CAAA;oBAC5G,WAAW,CAAC,aAAa,GAAG,mBAAmB,CAAC,aAAa,CAAA;oBAC7D,WAAW,CAAC,iBAAiB,GAAG,mBAAmB,CAAC,iBAAiB,CAAA;oBACrE,WAAW,CAAC,mBAAmB,GAAG,mBAAmB,CAAC,mBAAmB,CAAA;oBACzE,WAAW,CAAC,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC,eAAe,CAAC,CAAA;oBAC5E,WAAW,CAAC,GAAG,GAAG,eAAe,CAAA;oBACjC,WAAW,CAAC,IAAI,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,eAAe,CAAC,CAAA;oBAE3D,MAAM,WAAW,CAAC,IAAI,EAAE,CAAC;oBACzB,iBAAiB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,sCAAsC;gBAC/E,CAAC;YACD,CAAC;YAAA,OAAO,CAAC,EAAE,CAAC;gBACR,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;YAClB,CAAC;QACT,CAAC;QACD,OAAO,iBAAiB,CAAC;IAC7B,CAAC;IAEM,KAAK,CAAC,gBAAgB,CAAC,GAAW;QACrC,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACtC,OAAO,IAAI,CAAC;IAChB,CAAC;IAEM,qBAAqB,CAAC,OAAY,EAAE,CAAqB;QAC5D,IAAI,IAAI,GAAG,EAAE,CAAC;QACd,MAAM,QAAQ,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,QAAQ,EAAE,CAAC;QAEvC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,MAAM,EAAE,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;YACvB,IAAI,EAAE,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBACrB,IAAI,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,GAAG,GAAG,CAAC;YACtC,CAAC;iBAAM,IAAI,EAAE,CAAC,IAAI,KAAK,KAAK,EAAE,CAAC;gBAC3B,IAAI,IAAI,IAAI,GAAG,IAAI,CAAC,qBAAqB,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC;YAC5D,CAAC;QACL,CAAC;QAED,OAAO,IAAI,CAAC;IAChB,CAAC;IAED;;;;OAIG;IACI,KAAK,CAAC,YAAY,CAAC,GAAW;QACjC,IAAI,CAAC;YACD,MAAM,WAAW,GAAW,MAAM,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,CAAC;YAC7D,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YACpC,MAAM,IAAI,GAAW,IAAI,CAAC,qBAAqB,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;YACjE,OAAO,IAAI,CAAC;QAChB,CAAC;QACD,OAAO,KAAK,EAAE,CAAC;YACX,OAAO,CAAC,KAAK,CAAC,oBAAoB,GAAG,GAAG,EAAE,KAAK,CAAC,CAAC;YACjD,OAAO,EAAE,CAAC;QACd,CAAC;IACL,CAAC;IAED;;;;;;OAMG;IACO,KAAK,CAAC,4BAA4B,CAAC,GAAW,EAAE,OAAe,EAAE,KAAa;QAEpF,IAAI,CAAC;YACD,MAAM,IAAI,CAAC,kBAAkB,CAAC,GAAG,EAAE,OAAO,EAAE,IAAI,CAAC,QAAQ,EAAE,IAAI,GAAG,EAAU,EAAE,KAAK,CAAC,CAAC;YACrF,MAAM,IAAI,CAAC,gBAAgB,CAAC,GAAG,EAAE,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC;YAExD,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACxC,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACT,OAAO,CAAC,KAAK,CAAC,4BAA4B,GAAG,EAAE,CAAC,CAAC;YACjD,OAAO,EAAE,CAAC;QACd,CAAC;IACL,CAAC;IAED;;;;;;OAMG;IACO,KAAK,CAAC,gBAAgB,CAAC,GAAW,EAAE,OAAe;QACzD,IAAI,CAAC,IAAI,CAAC,+BAA+B,EAAE,CAAC;YACxC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAC1B,OAAM;QACV,CAAC;QAED,6DAA6D;QAC7D,IAAI,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,EAAE,CAAC;YACxF,OAAM;QACV,CAAC;QAED,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAE1B,IAAI,CAAC;YACD,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACtC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAE7B,gDAAgD;YAChD,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,EAAE;gBACvB,MAAM,IAAI,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBACrC,IAAI,IAAI,EAAE,CAAC;oBACP,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC;oBACvC,IAAI,MAAM,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;wBAC9D,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;oBACjC,CAAC;gBACL,CAAC;YACL,CAAC,CAAC,CAAC;YACH,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,iCAAiC;QAC7D,CAAC;QACD,OAAO,CAAC,EAAE,CAAC;YACP,OAAO,CAAC,KAAK,CAAC,4BAA4B,GAAG,EAAE,CAAC,CAAC;YACjD,OAAM;QACV,CAAC;IACL,CAAC;IAED;;;;OAIG;IACO,eAAe,CAAC,GAAW;QACjC,IAAI,CAAC;YACD,MAAM,SAAS,GAAQ,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;YACpC,OAAO,SAAS,CAAC,QAAQ,KAAK,GAAG,IAAI,SAAS,CAAC,QAAQ,KAAK,EAAE,CAAC;QACnE,CAAC;QACD,OAAO,CAAC,EAAE,CAAC;YACP,OAAO,CAAC,KAAK,CAAC,uCAAuC,GAAG,EAAE,CAAC,CAAC;YAC5D,MAAM,CAAC,CAAC;QACZ,CAAC;IACL,CAAC;IAES,WAAW,CAAC,GAAW;QAC7B,MAAM,SAAS,GAAQ,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QACpC,MAAM,YAAY,GAAa,SAAS,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC;QACxF,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1B,YAAY,CAAC,GAAG,EAAE,CAAC,CAAC,6DAA6D;QACrF,CAAC;QACD,MAAM,QAAQ,GAAG,SAAS,CAAC,MAAM,GAAG,GAAG,GAAG,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACjE,OAAO,QAAQ,CAAC;IACpB,CAAC;IAED,oFAAoF;IAC1E,WAAW,CAAC,GAAW;QAC7B,IAAI,CAAC;YACD,MAAM,SAAS,GAAQ,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;YACpC,MAAM,YAAY,GAAa,SAAS,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC;YACxF,MAAM,IAAI,GAAG,SAAS,CAAC,MAAM,GAAG,GAAG,GAAG,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAC7D,OAAO,IAAI,CAAA;QACf,CAAC;QACD,OAAO,CAAC,EAAE,CAAC;YACP,OAAO,CAAC,KAAK,CAAC,uCAAuC,GAAG,EAAE,CAAC,CAAC;YAC5D,MAAM,CAAC,CAAC;QACZ,CAAC;IACL,CAAC;IAES,KAAK,CAAC,UAAU,CAAC,GAAW;QAClC,IAAI,CAAC;YACD,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACvC,OAAO,QAAQ,CAAC,MAAM,KAAK,GAAG,CAAC;QACnC,CAAC;QACD,OAAO,CAAC,EAAE,CAAC;YACP,OAAO,CAAC,KAAK,CAAC,gBAAgB,GAAG,EAAE,CAAC,CAAC;YACrC,OAAO,KAAK,CAAC;QACjB,CAAC;IACL,CAAC;IAED;;;;;;;OAOG;IACO,KAAK,CAAC,kBAAkB,CAAC,GAAW,EAAE,OAAe,EAAE,UAAkB,EAAE,WAAwB,EAAE,KAAa;QAExH,IAAI,CAAC;YACD,OAAO,CAAC,GAAG,CAAC,YAAY,GAAG,EAAE,CAAC,CAAC;YAC/B,6DAA6D;YAC7D,IAAI,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,MAAM,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,KAAK,KAAK,IAAI,UAAU,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,4BAA4B,EAAE,CAAC;gBACvH,OAAO,IAAI,GAAG,EAAU,CAAC;YAC7B,CAAC;YAED,IAAI,aAAa,GAAG,IAAI,GAAG,EAAU,CAAC,CAAC,+DAA+D;YACtG,MAAM,cAAc,GAAG,IAAI,GAAG,EAAU,CAAC,CAAC,qCAAqC;YAE/E,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACtC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAE7B,gDAAgD;YAChD,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,EAAE;gBACvB,MAAM,IAAI,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBACrC,IAAI,IAAI,EAAE,CAAC;oBACP,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC;oBACvC,IAAI,MAAM,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC;wBACtG,cAAc,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;wBAC3B,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;oBACjC,CAAC;gBACL,CAAC;YACL,CAAC,CAAC,CAAC;YACH,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,iCAAiC;YACzD,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAErB,iFAAiF;YACjF,IAAI,UAAU,KAAK,CAAC,EAAE,CAAC;gBACnB,OAAO,cAAc,CAAC;YAC1B,CAAC;YAED,KAAK,MAAM,OAAO,IAAI,cAAc,EAAE,CAAC;gBACnC,mCAAmC;gBACnC,MAAM,eAAe,GAAG,MAAM,IAAI,CAAC,kBAAkB,CAAC,OAAO,EAAE,OAAO,EAAE,UAAU,GAAC,CAAC,EAAE,WAAW,EAAE,KAAK,CAAC,CAAC;gBAC1G,aAAa,GAAG,IAAI,GAAG,CAAS,CAAC,GAAG,cAAc,EAAE,GAAG,eAAe,CAAC,CAAC,CAAC;YAC7E,CAAC;YACD,OAAO,aAAa,CAAC;QACzB,CAAC;QACD,OAAO,CAAC,EAAE,CAAC;YACP,OAAO,CAAC,KAAK,CAAC,4BAA4B,GAAG,EAAE,CAAC,CAAC;YACjD,OAAO,IAAI,GAAG,EAAU,CAAC;QAC7B,CAAC;IACL,CAAC;IAES,KAAK,CAAC,KAAK,CAAC,EAAU;QAC5B,OAAO,IAAI,OAAO,CAAE,OAAO,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC,CAAE,CAAC;IAC7D,CAAC;CACJ,CAAA;AA9XY,cAAc;IAD1B,aAAa,CAAC,WAAW,EAAE,gBAAgB,CAAC;;GAChC,cAAc,CA8X1B"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/Websites/index.ts"],"names":[],"mappings":"AAAA,cAAc,0BAA0B,CAAA"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/Websites/index.ts"],"names":[],"mappings":"AAAA,cAAc,0BAA0B,CAAA"}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
export * from './Core/index.js';
|
|
2
|
+
export * from './Engine/index.js';
|
|
3
|
+
export * from './LocalFileSystem/index.js';
|
|
4
|
+
export * from './RSSFeed/index.js';
|
|
5
|
+
export * from './Websites/index.js';
|
|
6
|
+
export * from './CloudStorage/index.js';
|
|
7
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,QAAQ,CAAC;AACvB,cAAc,UAAU,CAAC;AACzB,cAAc,mBAAmB,CAAC;AAClC,cAAc,WAAW,CAAC;AAC1B,cAAc,YAAY,CAAC;AAC3B,cAAc,gBAAgB,CAAA"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
export * from './Core/index.js';
|
|
2
|
+
export * from './Engine/index.js';
|
|
3
|
+
export * from './LocalFileSystem/index.js';
|
|
4
|
+
export * from './RSSFeed/index.js';
|
|
5
|
+
export * from './Websites/index.js';
|
|
6
|
+
export * from './CloudStorage/index.js';
|
|
7
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,QAAQ,CAAC;AACvB,cAAc,UAAU,CAAC;AACzB,cAAc,mBAAmB,CAAC;AAClC,cAAc,WAAW,CAAC;AAC1B,cAAc,YAAY,CAAC;AAC3B,cAAc,gBAAgB,CAAA"}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { AutotagBase } from "../../Core";
|
|
2
|
-
import { AutotagBaseEngine } from "../../Engine";
|
|
3
|
-
import { ContentSourceParams } from "../../Engine";
|
|
1
|
+
import { AutotagBase } from "../../Core/index.js";
|
|
2
|
+
import { AutotagBaseEngine } from "../../Engine/index.js";
|
|
3
|
+
import { ContentSourceParams } from "../../Engine/index.js";
|
|
4
4
|
import { UserInfo } from "@memberjunction/core";
|
|
5
5
|
import { ContentSourceEntity, ContentItemEntity } from "@memberjunction/core-entities";
|
|
6
6
|
export declare abstract class CloudStorageBase extends AutotagBase {
|
|
@@ -4,8 +4,8 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
6
|
exports.CloudStorageBase = void 0;
|
|
7
|
-
const Core_1 = require("../../Core");
|
|
8
|
-
const Engine_1 = require("../../Engine");
|
|
7
|
+
const Core_1 = require("../../Core/index.js");
|
|
8
|
+
const Engine_1 = require("../../Engine/index.js");
|
|
9
9
|
const dotenv_1 = __importDefault(require("dotenv"));
|
|
10
10
|
dotenv_1.default.config();
|
|
11
11
|
class CloudStorageBase extends Core_1.AutotagBase {
|
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
export * from './generic/CloudStorageBase';
|
|
2
|
-
export * from './providers/AutotagAzureBlob';
|
|
1
|
+
export * from './generic/CloudStorageBase.js';
|
|
2
|
+
export * from './providers/AutotagAzureBlob.js';
|
|
3
3
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -14,6 +14,6 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
|
14
14
|
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
15
|
};
|
|
16
16
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
-
__exportStar(require("./generic/CloudStorageBase"), exports);
|
|
18
|
-
__exportStar(require("./providers/AutotagAzureBlob"), exports);
|
|
17
|
+
__exportStar(require("./generic/CloudStorageBase.js"), exports);
|
|
18
|
+
__exportStar(require("./providers/AutotagAzureBlob.js"), exports);
|
|
19
19
|
//# sourceMappingURL=index.js.map
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
/// <reference types="node" />
|
|
2
2
|
/// <reference types="node" />
|
|
3
|
-
import { CloudStorageBase } from "../generic/CloudStorageBase";
|
|
3
|
+
import { CloudStorageBase } from "../generic/CloudStorageBase.js";
|
|
4
4
|
import { UserInfo } from "@memberjunction/core";
|
|
5
5
|
import { ContentItemEntity } from "@memberjunction/core-entities";
|
|
6
|
-
import { ContentSourceParams } from "../../Engine";
|
|
6
|
+
import { ContentSourceParams } from "../../Engine/index.js";
|
|
7
7
|
export declare class AutotagAzureBlob extends CloudStorageBase {
|
|
8
8
|
private blobServiceClient;
|
|
9
9
|
private containerClient;
|
|
@@ -4,7 +4,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
6
|
exports.AutotagAzureBlob = void 0;
|
|
7
|
-
const CloudStorageBase_1 = require("../generic/CloudStorageBase");
|
|
7
|
+
const CloudStorageBase_1 = require("../generic/CloudStorageBase.js");
|
|
8
8
|
const storage_blob_1 = require("@azure/storage-blob");
|
|
9
9
|
const dotenv_1 = __importDefault(require("dotenv"));
|
|
10
10
|
const core_1 = require("@memberjunction/core");
|
package/dist/src/Core/index.d.ts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export * from './generic/AutotagBase';
|
|
1
|
+
export * from './generic/AutotagBase.js';
|
|
2
2
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/src/Core/index.js
CHANGED
|
@@ -14,5 +14,5 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
|
14
14
|
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
15
|
};
|
|
16
16
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
-
__exportStar(require("./generic/AutotagBase"), exports);
|
|
17
|
+
__exportStar(require("./generic/AutotagBase.js"), exports);
|
|
18
18
|
//# sourceMappingURL=index.js.map
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
/// <reference types="node" />
|
|
2
2
|
import { UserInfo } from '@memberjunction/core';
|
|
3
3
|
import { ContentSourceEntity, ContentItemEntity } from '@memberjunction/core-entities';
|
|
4
|
-
import { ContentSourceParams, ContentSourceTypeParams } from './content.types';
|
|
5
|
-
import { ProcessRunParams, JsonObject, ContentItemProcessParams } from './process.types';
|
|
4
|
+
import { ContentSourceParams, ContentSourceTypeParams } from './content.types.js';
|
|
5
|
+
import { ProcessRunParams, JsonObject, ContentItemProcessParams } from './process.types.js';
|
|
6
6
|
import { BaseLLM } from '@memberjunction/ai';
|
|
7
7
|
import { AIEngine } from '@memberjunction/aiengine';
|
|
8
8
|
export declare class AutotagBaseEngine extends AIEngine {
|
|
@@ -38,11 +38,11 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
38
38
|
exports.AutotagBaseEngine = void 0;
|
|
39
39
|
const core_1 = require("@memberjunction/core");
|
|
40
40
|
const global_1 = require("@memberjunction/global");
|
|
41
|
-
const content_types_1 = require("./content.types");
|
|
41
|
+
const content_types_1 = require("./content.types.js");
|
|
42
42
|
const pdf_parse_1 = __importDefault(require("pdf-parse"));
|
|
43
43
|
const officeparser = __importStar(require("officeparser"));
|
|
44
44
|
const fs = __importStar(require("fs"));
|
|
45
|
-
const process_types_1 = require("./process.types");
|
|
45
|
+
const process_types_1 = require("./process.types.js");
|
|
46
46
|
const date_fns_tz_1 = require("date-fns-tz");
|
|
47
47
|
const axios_1 = __importDefault(require("axios"));
|
|
48
48
|
const cheerio = __importStar(require("cheerio"));
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export * from './generic/AutotagBaseEngine';
|
|
2
|
-
export * from './generic/content.types';
|
|
3
|
-
export * from './generic/process.types';
|
|
1
|
+
export * from './generic/AutotagBaseEngine.js';
|
|
2
|
+
export * from './generic/content.types.js';
|
|
3
|
+
export * from './generic/process.types.js';
|
|
4
4
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/src/Engine/index.js
CHANGED
|
@@ -14,7 +14,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
|
14
14
|
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
15
|
};
|
|
16
16
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
-
__exportStar(require("./generic/AutotagBaseEngine"), exports);
|
|
18
|
-
__exportStar(require("./generic/content.types"), exports);
|
|
19
|
-
__exportStar(require("./generic/process.types"), exports);
|
|
17
|
+
__exportStar(require("./generic/AutotagBaseEngine.js"), exports);
|
|
18
|
+
__exportStar(require("./generic/content.types.js"), exports);
|
|
19
|
+
__exportStar(require("./generic/process.types.js"), exports);
|
|
20
20
|
//# sourceMappingURL=index.js.map
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { AutotagBase } from "../../Core";
|
|
2
|
-
import { ContentSourceParams } from "../../Engine";
|
|
1
|
+
import { AutotagBase } from "../../Core/index.js";
|
|
2
|
+
import { ContentSourceParams } from "../../Engine/index.js";
|
|
3
3
|
import { UserInfo, BaseEntity } from "@memberjunction/core";
|
|
4
4
|
import { ContentSourceEntity, ContentItemEntity } from "@memberjunction/core-entities";
|
|
5
5
|
export declare class AutotagEntity extends AutotagBase {
|
|
@@ -11,8 +11,8 @@ var __metadata = (this && this.__metadata) || function (k, v) {
|
|
|
11
11
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
12
|
exports.AutotagEntity = void 0;
|
|
13
13
|
const global_1 = require("@memberjunction/global");
|
|
14
|
-
const Core_1 = require("../../Core");
|
|
15
|
-
const Engine_1 = require("../../Engine");
|
|
14
|
+
const Core_1 = require("../../Core/index.js");
|
|
15
|
+
const Engine_1 = require("../../Engine/index.js");
|
|
16
16
|
const core_1 = require("@memberjunction/core");
|
|
17
17
|
let AutotagEntity = class AutotagEntity extends Core_1.AutotagBase {
|
|
18
18
|
contextUser;
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export * from './generic/AutotagEntity';
|
|
1
|
+
export * from './generic/AutotagEntity.js';
|
|
2
2
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/src/Entity/index.js
CHANGED
|
@@ -14,5 +14,5 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
|
14
14
|
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
15
|
};
|
|
16
16
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
-
__exportStar(require("./generic/AutotagEntity"), exports);
|
|
17
|
+
__exportStar(require("./generic/AutotagEntity.js"), exports);
|
|
18
18
|
//# sourceMappingURL=index.js.map
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { AutotagBase } from "../../Core";
|
|
2
|
-
import { ContentSourceParams } from "../../Engine";
|
|
1
|
+
import { AutotagBase } from "../../Core/index.js";
|
|
2
|
+
import { ContentSourceParams } from "../../Engine/index.js";
|
|
3
3
|
import { UserInfo } from "@memberjunction/core";
|
|
4
4
|
import { ContentSourceEntity, ContentItemEntity } from "@memberjunction/core-entities";
|
|
5
5
|
import { OpenAI } from "openai";
|
|
@@ -15,8 +15,8 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
15
15
|
exports.AutotagLocalFileSystem = void 0;
|
|
16
16
|
const global_1 = require("@memberjunction/global");
|
|
17
17
|
const fs_1 = __importDefault(require("fs"));
|
|
18
|
-
const Core_1 = require("../../Core");
|
|
19
|
-
const Engine_1 = require("../../Engine");
|
|
18
|
+
const Core_1 = require("../../Core/index.js");
|
|
19
|
+
const Engine_1 = require("../../Engine/index.js");
|
|
20
20
|
const core_1 = require("@memberjunction/core");
|
|
21
21
|
const path_1 = __importDefault(require("path"));
|
|
22
22
|
const dotenv_1 = __importDefault(require("dotenv"));
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export * from './generic/AutotagLocalFileSystem';
|
|
1
|
+
export * from './generic/AutotagLocalFileSystem.js';
|
|
2
2
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -14,5 +14,5 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
|
14
14
|
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
15
|
};
|
|
16
16
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
-
__exportStar(require("./generic/AutotagLocalFileSystem"), exports);
|
|
17
|
+
__exportStar(require("./generic/AutotagLocalFileSystem.js"), exports);
|
|
18
18
|
//# sourceMappingURL=index.js.map
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { UserInfo } from '@memberjunction/core';
|
|
2
|
-
import { AutotagBase } from "../../Core";
|
|
3
|
-
import { ContentSourceParams } from "../../Engine";
|
|
2
|
+
import { AutotagBase } from "../../Core/index.js";
|
|
3
|
+
import { ContentSourceParams } from "../../Engine/index.js";
|
|
4
4
|
import { ContentSourceEntity, ContentItemEntity } from '@memberjunction/core-entities';
|
|
5
|
-
import { RSSItem } from './RSS.types';
|
|
5
|
+
import { RSSItem } from './RSS.types.js';
|
|
6
6
|
export declare class AutotagRSSFeed extends AutotagBase {
|
|
7
7
|
private contextUser;
|
|
8
8
|
private engine;
|