@memberjunction/content-autotagging 3.3.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/CloudStorage/generic/CloudStorageBase.d.ts +28 -0
- package/dist/CloudStorage/generic/CloudStorageBase.d.ts.map +1 -0
- package/dist/CloudStorage/generic/CloudStorageBase.js +38 -0
- package/dist/CloudStorage/generic/CloudStorageBase.js.map +1 -0
- package/dist/CloudStorage/index.d.ts +3 -0
- package/dist/CloudStorage/index.d.ts.map +1 -0
- package/dist/CloudStorage/index.js +3 -0
- package/dist/CloudStorage/index.js.map +1 -0
- package/dist/CloudStorage/providers/AutotagAzureBlob.d.ts +20 -0
- package/dist/CloudStorage/providers/AutotagAzureBlob.d.ts.map +1 -0
- package/dist/CloudStorage/providers/AutotagAzureBlob.js +86 -0
- package/dist/CloudStorage/providers/AutotagAzureBlob.js.map +1 -0
- package/dist/Core/generic/AutotagBase.d.ts +7 -0
- package/dist/Core/generic/AutotagBase.d.ts.map +1 -0
- package/dist/Core/generic/AutotagBase.js +3 -0
- package/dist/Core/generic/AutotagBase.js.map +1 -0
- package/dist/Core/index.d.ts +2 -0
- package/dist/Core/index.d.ts.map +1 -0
- package/dist/Core/index.js +2 -0
- package/dist/Core/index.js.map +1 -0
- package/dist/Engine/generic/AutotagBaseEngine.d.ts +131 -0
- package/dist/Engine/generic/AutotagBaseEngine.d.ts.map +1 -0
- package/dist/Engine/generic/AutotagBaseEngine.js +620 -0
- package/dist/Engine/generic/AutotagBaseEngine.js.map +1 -0
- package/dist/Engine/generic/content.types.d.ts +32 -0
- package/dist/Engine/generic/content.types.d.ts.map +1 -0
- package/dist/Engine/generic/content.types.js +7 -0
- package/dist/Engine/generic/content.types.js.map +1 -0
- package/dist/Engine/generic/process.types.d.ts +30 -0
- package/dist/Engine/generic/process.types.d.ts.map +1 -0
- package/dist/Engine/generic/process.types.js +7 -0
- package/dist/Engine/generic/process.types.js.map +1 -0
- package/dist/Engine/index.d.ts +4 -0
- package/dist/Engine/index.d.ts.map +1 -0
- package/dist/Engine/index.js +4 -0
- package/dist/Engine/index.js.map +1 -0
- package/dist/Entity/generic/AutotagEntity.d.ts +19 -0
- package/dist/Entity/generic/AutotagEntity.d.ts.map +1 -0
- package/dist/Entity/generic/AutotagEntity.js +127 -0
- package/dist/Entity/generic/AutotagEntity.js.map +1 -0
- package/dist/Entity/index.d.ts +2 -0
- package/dist/Entity/index.d.ts.map +1 -0
- package/dist/Entity/index.js +2 -0
- package/dist/Entity/index.js.map +1 -0
- package/dist/LocalFileSystem/generic/AutotagLocalFileSystem.d.ts +39 -0
- package/dist/LocalFileSystem/generic/AutotagLocalFileSystem.d.ts.map +1 -0
- package/dist/LocalFileSystem/generic/AutotagLocalFileSystem.js +171 -0
- package/dist/LocalFileSystem/generic/AutotagLocalFileSystem.js.map +1 -0
- package/dist/LocalFileSystem/index.d.ts +2 -0
- package/dist/LocalFileSystem/index.d.ts.map +1 -0
- package/dist/LocalFileSystem/index.js +2 -0
- package/dist/LocalFileSystem/index.js.map +1 -0
- package/dist/RSSFeed/generic/AutotagRSSFeed.d.ts +30 -0
- package/dist/RSSFeed/generic/AutotagRSSFeed.d.ts.map +1 -0
- package/dist/RSSFeed/generic/AutotagRSSFeed.js +177 -0
- package/dist/RSSFeed/generic/AutotagRSSFeed.js.map +1 -0
- package/dist/RSSFeed/generic/RSS.types.d.ts +13 -0
- package/dist/RSSFeed/generic/RSS.types.d.ts.map +1 -0
- package/dist/RSSFeed/generic/RSS.types.js +3 -0
- package/dist/RSSFeed/generic/RSS.types.js.map +1 -0
- package/dist/RSSFeed/index.d.ts +3 -0
- package/dist/RSSFeed/index.d.ts.map +1 -0
- package/dist/RSSFeed/index.js +3 -0
- package/dist/RSSFeed/index.js.map +1 -0
- package/dist/Websites/generic/AutotagWebsite.d.ts +85 -0
- package/dist/Websites/generic/AutotagWebsite.d.ts.map +1 -0
- package/dist/Websites/generic/AutotagWebsite.js +355 -0
- package/dist/Websites/generic/AutotagWebsite.js.map +1 -0
- package/dist/Websites/index.d.ts +2 -0
- package/dist/Websites/index.d.ts.map +1 -0
- package/dist/Websites/index.js +2 -0
- package/dist/Websites/index.js.map +1 -0
- package/dist/index.d.ts +7 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +7 -0
- package/dist/index.js.map +1 -0
- package/dist/src/CloudStorage/generic/CloudStorageBase.d.ts +3 -3
- package/dist/src/CloudStorage/generic/CloudStorageBase.js +2 -2
- package/dist/src/CloudStorage/index.d.ts +2 -2
- package/dist/src/CloudStorage/index.js +2 -2
- package/dist/src/CloudStorage/providers/AutotagAzureBlob.d.ts +2 -2
- package/dist/src/CloudStorage/providers/AutotagAzureBlob.js +1 -1
- package/dist/src/Core/index.d.ts +1 -1
- package/dist/src/Core/index.js +1 -1
- package/dist/src/Engine/generic/AutotagBaseEngine.d.ts +2 -2
- package/dist/src/Engine/generic/AutotagBaseEngine.js +2 -2
- package/dist/src/Engine/index.d.ts +3 -3
- package/dist/src/Engine/index.js +3 -3
- package/dist/src/Entity/generic/AutotagEntity.d.ts +2 -2
- package/dist/src/Entity/generic/AutotagEntity.js +2 -2
- package/dist/src/Entity/index.d.ts +1 -1
- package/dist/src/Entity/index.js +1 -1
- package/dist/src/LocalFileSystem/generic/AutotagLocalFileSystem.d.ts +2 -2
- package/dist/src/LocalFileSystem/generic/AutotagLocalFileSystem.js +2 -2
- package/dist/src/LocalFileSystem/index.d.ts +1 -1
- package/dist/src/LocalFileSystem/index.js +1 -1
- package/dist/src/RSSFeed/generic/AutotagRSSFeed.d.ts +3 -3
- package/dist/src/RSSFeed/generic/AutotagRSSFeed.js +3 -3
- package/dist/src/RSSFeed/index.d.ts +2 -2
- package/dist/src/RSSFeed/index.js +2 -2
- package/dist/src/Websites/generic/AutotagWebsite.d.ts +2 -2
- package/dist/src/Websites/generic/AutotagWebsite.js +2 -2
- package/dist/src/Websites/index.d.ts +1 -1
- package/dist/src/Websites/index.js +1 -1
- package/dist/src/index.d.ts +6 -6
- package/dist/src/index.js +6 -6
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +21 -20
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
|
|
2
|
+
var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
|
|
3
|
+
if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
|
|
4
|
+
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
|
|
5
|
+
return c > 3 && r && Object.defineProperty(target, key, r), r;
|
|
6
|
+
};
|
|
7
|
+
var __metadata = (this && this.__metadata) || function (k, v) {
|
|
8
|
+
if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
|
|
9
|
+
};
|
|
10
|
+
import { Metadata, RunView } from '@memberjunction/core';
|
|
11
|
+
import { RegisterClass } from '@memberjunction/global';
|
|
12
|
+
import { AutotagBase } from "../../Core/index.js";
|
|
13
|
+
import { AutotagBaseEngine } from "../../Engine/index.js";
|
|
14
|
+
import { RSSItem } from './RSS.types.js';
|
|
15
|
+
import axios from 'axios';
|
|
16
|
+
import crypto from 'crypto';
|
|
17
|
+
import Parser from 'rss-parser';
|
|
18
|
+
import dotenv from 'dotenv';
|
|
19
|
+
dotenv.config({ quiet: true });
|
|
20
|
+
let AutotagRSSFeed = class AutotagRSSFeed extends AutotagBase {
|
|
21
|
+
constructor() {
|
|
22
|
+
super();
|
|
23
|
+
this.engine = AutotagBaseEngine.Instance;
|
|
24
|
+
}
|
|
25
|
+
getContextUser() {
|
|
26
|
+
return this.contextUser;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Implemented abstract method from the AutotagBase class. that runs the entire autotagging process. This method is the entry point for the autotagging process.
|
|
30
|
+
* It initializes the connection, retrieves the content sources corresponding to the content source type, sets the content items that we want to process,
|
|
31
|
+
* extracts and processes the text, and sets the results in the database.
|
|
32
|
+
*/
|
|
33
|
+
async Autotag(contextUser) {
|
|
34
|
+
this.contextUser = contextUser;
|
|
35
|
+
this.contentSourceTypeID = await this.engine.setSubclassContentSourceType('RSS Feed', this.contextUser);
|
|
36
|
+
const contentSources = await this.engine.getAllContentSources(this.contextUser, this.contentSourceTypeID);
|
|
37
|
+
const contentItemsToProcess = await this.SetContentItemsToProcess(contentSources);
|
|
38
|
+
await this.engine.ExtractTextAndProcessWithLLM(contentItemsToProcess, this.contextUser);
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Implemented abstract method from the AutotagBase class. Given a list of content sources, this method should return a list
|
|
42
|
+
* of content source items that have been modified or added after the most recent process run for that content source.
|
|
43
|
+
* @param contentSources - An array of content sources to check for modified or added content source items
|
|
44
|
+
* @returns - An array of content source items that have been modified or added after the most recent process run for that content source
|
|
45
|
+
*/
|
|
46
|
+
async SetContentItemsToProcess(contentSources) {
|
|
47
|
+
const contentItemsToProcess = [];
|
|
48
|
+
for (const contentSource of contentSources) {
|
|
49
|
+
// If content source parameters were provided, set them. Otherwise, use the default values.
|
|
50
|
+
const contentSourceParamsMap = await this.engine.getContentSourceParams(contentSource, this.contextUser);
|
|
51
|
+
if (contentSourceParamsMap) {
|
|
52
|
+
// Override defaults with content source specific params
|
|
53
|
+
contentSourceParamsMap.forEach((value, key) => {
|
|
54
|
+
if (key in this) {
|
|
55
|
+
this[key] = value;
|
|
56
|
+
}
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
const contentSourceParams = {
|
|
60
|
+
contentSourceID: contentSource.ID,
|
|
61
|
+
name: contentSource.Name,
|
|
62
|
+
ContentTypeID: contentSource.ContentTypeID,
|
|
63
|
+
ContentFileTypeID: contentSource.ContentFileTypeID,
|
|
64
|
+
ContentSourceTypeID: contentSource.ContentSourceTypeID,
|
|
65
|
+
URL: contentSource.URL
|
|
66
|
+
};
|
|
67
|
+
const allRSSItems = await this.parseRSSFeed(contentSourceParams.URL);
|
|
68
|
+
const contentItems = await this.SetNewAndModifiedContentItems(allRSSItems, contentSourceParams);
|
|
69
|
+
if (contentItems && contentItems.length > 0) {
|
|
70
|
+
contentItemsToProcess.push(...contentItems);
|
|
71
|
+
}
|
|
72
|
+
else {
|
|
73
|
+
// No content items found to process
|
|
74
|
+
console.log(`No content items found to process for content source: ${contentSource.Get('Name')}`);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
return contentItemsToProcess;
|
|
78
|
+
}
|
|
79
|
+
async SetNewAndModifiedContentItems(allRSSItems, contentSourceParams) {
|
|
80
|
+
const contentItemsToProcess = [];
|
|
81
|
+
for (const RSSContentItem of allRSSItems) {
|
|
82
|
+
const rv = new RunView();
|
|
83
|
+
const results = await rv.RunView({
|
|
84
|
+
EntityName: 'Content Items',
|
|
85
|
+
ExtraFilter: `ContentSourceID = '${contentSourceParams.contentSourceID}' AND (URL = '${RSSContentItem.link}' OR Description = '${RSSContentItem.description}')`, // According to the RSS spec, all items must contain either a title or a description.
|
|
86
|
+
ResultType: 'entity_object',
|
|
87
|
+
}, this.contextUser);
|
|
88
|
+
if (results.Success && results.Results.length) {
|
|
89
|
+
const contentItemResult = results.Results[0];
|
|
90
|
+
// This content item already exists, check the last hash to see if it has been modified
|
|
91
|
+
const lastStoredHash = contentItemResult.Checksum;
|
|
92
|
+
const newHash = await this.getChecksumFromRSSItem(RSSContentItem, this.contextUser);
|
|
93
|
+
if (lastStoredHash !== newHash) {
|
|
94
|
+
// This content item has been modified
|
|
95
|
+
const md = new Metadata();
|
|
96
|
+
const contentItem = await md.GetEntityObject('Content Items', this.contextUser);
|
|
97
|
+
contentItem.Load(contentItemResult.ID);
|
|
98
|
+
contentItem.Checksum = newHash;
|
|
99
|
+
contentItem.Text = JSON.stringify(RSSContentItem);
|
|
100
|
+
await contentItem.Save();
|
|
101
|
+
contentItemsToProcess.push(contentItem); // Content item was modified, add to list
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
else {
|
|
105
|
+
// This content item does not exist, add it
|
|
106
|
+
const md = new Metadata();
|
|
107
|
+
const contentItem = await md.GetEntityObject('Content Items', this.contextUser);
|
|
108
|
+
contentItem.ContentSourceID = contentSourceParams.contentSourceID;
|
|
109
|
+
contentItem.Name = contentSourceParams.name;
|
|
110
|
+
contentItem.Description = RSSContentItem.description || await this.engine.getContentItemDescription(contentSourceParams, this.contextUser);
|
|
111
|
+
contentItem.ContentTypeID = contentSourceParams.ContentTypeID;
|
|
112
|
+
contentItem.ContentFileTypeID = contentSourceParams.ContentFileTypeID;
|
|
113
|
+
contentItem.ContentSourceTypeID = contentSourceParams.ContentSourceTypeID;
|
|
114
|
+
contentItem.Checksum = await this.getChecksumFromRSSItem(RSSContentItem, this.contextUser);
|
|
115
|
+
contentItem.URL = RSSContentItem.link || contentSourceParams.URL;
|
|
116
|
+
contentItem.Text = JSON.stringify(RSSContentItem);
|
|
117
|
+
await contentItem.Save();
|
|
118
|
+
contentItemsToProcess.push(contentItem); // Content item was added, add to list
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
return contentItemsToProcess;
|
|
122
|
+
}
|
|
123
|
+
async parseRSSFeed(url) {
|
|
124
|
+
try {
|
|
125
|
+
if (await this.urlIsValid(url)) {
|
|
126
|
+
const RSSItems = [];
|
|
127
|
+
const parser = new Parser();
|
|
128
|
+
const feed = await parser.parseURL(url);
|
|
129
|
+
const items = feed.items;
|
|
130
|
+
// Map each item to an RSSItem object and add it to the RSSItems array
|
|
131
|
+
items.forEach(async (item) => {
|
|
132
|
+
const rssItem = new RSSItem();
|
|
133
|
+
rssItem.title = item.title ?? '';
|
|
134
|
+
rssItem.link = item.link ?? '';
|
|
135
|
+
rssItem.description = item.description ?? '';
|
|
136
|
+
rssItem.pubDate = item.pubDate ?? '';
|
|
137
|
+
rssItem.guid = item.guid ?? '';
|
|
138
|
+
rssItem.category = item.category ?? '';
|
|
139
|
+
const content = item['content:encoded'] ?? item['content'] ?? '';
|
|
140
|
+
rssItem.content = await this.engine.parseHTML(content);
|
|
141
|
+
rssItem.author = item.author ?? '';
|
|
142
|
+
rssItem.comments = item.comments ?? '';
|
|
143
|
+
rssItem.source = item.source ?? '';
|
|
144
|
+
RSSItems.push(rssItem);
|
|
145
|
+
});
|
|
146
|
+
return RSSItems;
|
|
147
|
+
}
|
|
148
|
+
else {
|
|
149
|
+
throw new Error(`Invalid URL: ${url}`);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
catch (error) {
|
|
153
|
+
console.error('Error fetching RSS feed:', error);
|
|
154
|
+
return [];
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
async urlIsValid(url) {
|
|
158
|
+
try {
|
|
159
|
+
const response = await axios.head(url);
|
|
160
|
+
return response.status === 200;
|
|
161
|
+
}
|
|
162
|
+
catch (e) {
|
|
163
|
+
console.error(`Invalid URL: ${url}`);
|
|
164
|
+
return false;
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
async getChecksumFromRSSItem(RSSContentItem, contextUser) {
|
|
168
|
+
const hash = crypto.createHash('sha256').update(JSON.stringify(RSSContentItem)).digest('hex');
|
|
169
|
+
return hash;
|
|
170
|
+
}
|
|
171
|
+
};
|
|
172
|
+
AutotagRSSFeed = __decorate([
|
|
173
|
+
RegisterClass(AutotagBase, 'AutotagRSSFeed'),
|
|
174
|
+
__metadata("design:paramtypes", [])
|
|
175
|
+
], AutotagRSSFeed);
|
|
176
|
+
export { AutotagRSSFeed };
|
|
177
|
+
//# sourceMappingURL=AutotagRSSFeed.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AutotagRSSFeed.js","sourceRoot":"","sources":["../../../src/RSSFeed/generic/AutotagRSSFeed.ts"],"names":[],"mappings":";;;;;;;;;AAAA,OAAO,EAAY,QAAQ,EAAE,OAAO,EAAE,MAAM,sBAAsB,CAAC;AACnE,OAAO,EAAE,aAAa,EAAE,MAAM,wBAAwB,CAAC;AACvD,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AACzC,OAAO,EAAE,iBAAiB,EAAuB,MAAM,cAAc,CAAC;AAEtE,OAAO,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AACtC,OAAO,KAAK,MAAM,OAAO,CAAA;AACzB,OAAO,MAAM,MAAM,QAAQ,CAAA;AAC3B,OAAO,MAAM,MAAM,YAAY,CAAA;AAC/B,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,MAAM,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAA;AAGvB,IAAM,cAAc,GAApB,MAAM,cAAe,SAAQ,WAAW;IAK3C;QACI,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,MAAM,GAAG,iBAAiB,CAAC,QAAQ,CAAC;IAC7C,CAAC;IAES,cAAc;QACpB,OAAO,IAAI,CAAC,WAAW,CAAC;IAC5B,CAAC;IAED;;;;OAIG;IACI,KAAK,CAAC,OAAO,CAAC,WAAqB;QACtC,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;QAC/B,IAAI,CAAC,mBAAmB,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,4BAA4B,CAAC,UAAU,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;QACxG,MAAM,cAAc,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,oBAAoB,CAAC,IAAI,CAAC,WAAW,EAAE,IAAI,CAAC,mBAAmB,CAAC,CAAC;QAC1G,MAAM,qBAAqB,GAAG,MAAM,IAAI,CAAC,wBAAwB,CAAC,cAAc,CAAC,CAAC;QAClF,MAAM,IAAI,CAAC,MAAM,CAAC,4BAA4B,CAAC,qBAAqB,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;IAC5F,CAAC;IAED;;;;;OAKG;IACI,KAAK,CAAC,wBAAwB,CAAC,cAAqC;QACvE,MAAM,qBAAqB,GAAwB,EAAE,CAAA;QACrD,KAAK,MAAM,aAAa,IAAI,cAAc,EAAE,CAAC;YAEzC,2FAA2F;YAC3F,MAAM,sBAAsB,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,sBAAsB,CAAC,aAAa,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;YACzG,IAAI,sBAAsB,EAAE,CAAC;gBACzB,wDAAwD;gBACxD,sBAAsB,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE;oBAC1C,IAAI,GAAG,IAAI,IAAI,EAAE,CAAC;wBACb,IAAY,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;oBAC/B,CAAC;gBACL,CAAC,CAAC,CAAA;YACN,CAAC;YAED,MAAM,mBAAmB,GAAwB;gBAC7C,eAAe,EAAE,aAAa,CAAC,EAAE;gBACjC,IAAI,EAAE,aAAa,CAAC,IAAI;gBACxB,aAAa,EAAE,aAAa,CAAC,aAAa;gBAC1C,iBAAiB,EAAE,aAAa,CAAC,iBAAiB;gBAClD,mBAAmB,EAAE,aAAa,CAAC,mBAAmB;gBACtD,GAAG,EAAE,aAAa,CAAC,GAAG;aACzB,CAAA;YAED,MAAM,WAAW,GAAc,MAAM,IAAI,CAAC,YAAY,CAAC,mBAAmB,CAAC,GAAG,CAAC,CAAC;YAEhF,MAAM,YAAY,GAAwB,MAAM,IAAI,CAAC,6BAA6B,CAAC,WAAW,EAAE,mBAAmB,CAAC,CAAA;YAEpH,IAAI,YAAY,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC1C,qBAAqB,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;YAChD,CAAC;iBACI,CAAC;gBACF,oCAAoC;gBACpC,OAAO,CAAC,GAAG,CAAC,yDAAyD,aAAa,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;YACtG,CAAC;QACL,CAAC;QACD,OAAO,qBAAqB,CAAA;IAChC,CAAC;IAEM,KAAK,CAAC,6BAA6B,CAAC,WAAsB,EAAE,mBAAwC;QACvG,MAAM,qBAAqB,GAAwB,EAAE,CAAC;QACtD,KAAK,MAAM,cAAc,IAAI,WAAW,EAAE,CAAC;YACvC,MAAM,EAAE,GAAG,IAAI,OAAO,EAAE,CAAC;YACzB,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,OAAO,CAAC;gBAC7B,UAAU,EAAE,eAAe;gBAC3B,WAAW,EAAE,sBAAsB,mBAAmB,CAAC,eAAe,iBAAiB,cAAc,CAAC,IAAI,uBAAuB,cAAc,CAAC,WAAW,IAAI,EAAE,qFAAqF;gBACtP,UAAU,EAAE,eAAe;aAC9B,EAAE,IAAI,CAAC,WAAW,CAAC,CAAA;YAEpB,IAAI,OAAO,CAAC,OAAO,IAAI,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;gBAC5C,MAAM,iBAAiB,GAAuB,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;gBACjE,uFAAuF;gBACvF,MAAM,cAAc,GAAW,iBAAiB,CAAC,QAAQ,CAAA;gBACzD,MAAM,OAAO,GAAW,MAAM,IAAI,CAAC,sBAAsB,CAAC,cAAc,EAAE,IAAI,CAAC,WAAW,CAAC,CAAA;gBAE3F,IAAI,cAAc,KAAK,OAAO,EAAE,CAAC;oBAC7B,sCAAsC;oBACtC,MAAM,EAAE,GAAG,IAAI,QAAQ,EAAE,CAAC;oBAC1B,MAAM,WAAW,GAAG,MAAM,EAAE,CAAC,eAAe,CAAoB,eAAe,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;oBACnG,WAAW,CAAC,IAAI,CAAC,iBAAiB,CAAC,EAAE,CAAC,CAAC;oBACvC,WAAW,CAAC,QAAQ,GAAG,OAAO,CAAA;oBAC9B,WAAW,CAAC,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,cAAc,CAAC,CAAA;oBAEjD,MAAM,WAAW,CAAC,IAAI,EAAE,CAAC;oBACzB,qBAAqB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,yCAAyC;gBACtF,CAAC;YACL,CAAC;iBACI,CAAC;gBACF,2CAA2C;gBAC3C,MAAM,EAAE,GAAG,IAAI,QAAQ,EAAE,CAAC;gBAC1B,MAAM,WAAW,GAAG,MAAM,EAAE,CAAC,eAAe,CAAoB,eAAe,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;gBACnG,WAAW,CAAC,eAAe,GAAG,mBAAmB,CAAC,eAAe,CAAA;gBACjE,WAAW,CAAC,IAAI,GAAG,mBAAmB,CAAC,IAAI,CAAA;gBAC3C,WAAW,CAAC,WAAW,GAAG,cAAc,CAAC,WAAW,IAAI,MAAM,IAAI,CAAC,MAAM,CAAC,yBAAyB,CAAC,mBAAmB,EAAE,IAAI,CAAC,WAAW,CAAC,CAAA;gBAC1I,WAAW,CAAC,aAAa,GAAG,mBAAmB,CAAC,aAAa,CAAA;gBAC7D,WAAW,CAAC,iBAAiB,GAAG,mBAAmB,CAAC,iBAAiB,CAAA;gBACrE,WAAW,CAAC,mBAAmB,GAAG,mBAAmB,CAAC,mBAAmB,CAAA;gBACzE,WAAW,CAAC,QAAQ,GAAG,MAAM,IAAI,CAAC,sBAAsB,CAAC,cAAc,EAAE,IAAI,CAAC,WAAW,CAAC,CAAA;gBAC1F,WAAW,CAAC,GAAG,GAAG,cAAc,CAAC,IAAI,IAAI,mBAAmB,CAAC,GAAG,CAAA;gBAChE,WAAW,CAAC,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,cAAc,CAAC,CAAA;gBAEjD,MAAM,WAAW,CAAC,IAAI,EAAE,CAAC;gBACzB,qBAAqB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,sCAAsC;YAEnF,CAAC;QACL,CAAC;QACD,OAAO,qBAAqB,CAAA;IAChC,CAAC;IAEM,KAAK,CAAC,YAAY,CAAC,GAAW;QACjC,IAAI,CAAC;YACD,IAAG,MAAM,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC5B,MAAM,QAAQ,GAAc,EAAE,CAAA;gBAC9B,MAAM,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;gBAC5B,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;gBACxC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC;gBAEzB,sEAAsE;gBACtE,KAAK,CAAC,OAAO,CAAC,KAAK,EAAE,IAAS,EAAE,EAAE;oBAC9B,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;oBAC9B,OAAO,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC;oBACjC,OAAO,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;oBAC/B,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC;oBAC7C,OAAO,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,IAAI,EAAE,CAAC;oBACrC,OAAO,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;oBAC/B,OAAO,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,IAAI,EAAE,CAAC;oBACvC,MAAM,OAAO,GAAG,IAAI,CAAC,iBAAiB,CAAC,IAAI,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;oBACjE,OAAO,CAAC,OAAO,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;oBACvD,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,EAAE,CAAC;oBACnC,OAAO,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,IAAI,EAAE,CAAC;oBACvC,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,EAAE,CAAC;oBACnC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBAC3B,CAAC,CAAC,CAAC;gBAEH,OAAO,QAAQ,CAAA;YACnB,CAAC;iBACI,CAAC;gBACF,MAAM,IAAI,KAAK,CAAC,gBAAgB,GAAG,EAAE,CAAC,CAAC;YAC3C,CAAC;QACL,CAAC;QACD,OAAO,KAAK,EAAE,CAAC;YACX,OAAO,CAAC,KAAK,CAAC,0BAA0B,EAAE,KAAK,CAAC,CAAC;YACjD,OAAO,EAAE,CAAC;QACZ,CAAC;IACP,CAAC;IAES,KAAK,CAAC,UAAU,CAAC,GAAW;QAClC,IAAI,CAAC;YACD,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACvC,OAAO,QAAQ,CAAC,MAAM,KAAK,GAAG,CAAC;QACnC,CAAC;QACD,OAAO,CAAC,EAAE,CAAC;YACP,OAAO,CAAC,KAAK,CAAC,gBAAgB,GAAG,EAAE,CAAC,CAAC;YACrC,OAAO,KAAK,CAAC;QACjB,CAAC;IACL,CAAC;IAEM,KAAK,CAAC,sBAAsB,CAAC,cAAuB,EAAE,WAAqB;QAC9E,MAAM,IAAI,GAAG,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;QAC7F,OAAO,IAAI,CAAA;IACf,CAAC;CACJ,CAAA;AA9KY,cAAc;IAD1B,aAAa,CAAC,WAAW,EAAE,gBAAgB,CAAC;;GAChC,cAAc,CA8K1B"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
export declare class RSSItem {
|
|
2
|
+
title?: string;
|
|
3
|
+
link?: string;
|
|
4
|
+
description?: string;
|
|
5
|
+
pubDate?: string;
|
|
6
|
+
guid?: string;
|
|
7
|
+
category?: string;
|
|
8
|
+
content?: string;
|
|
9
|
+
author?: string;
|
|
10
|
+
comments?: string;
|
|
11
|
+
source?: string;
|
|
12
|
+
}
|
|
13
|
+
//# sourceMappingURL=RSS.types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"RSS.types.d.ts","sourceRoot":"","sources":["../../../src/RSSFeed/generic/RSS.types.ts"],"names":[],"mappings":"AAAA,qBAAa,OAAO;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;CACnB"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"RSS.types.js","sourceRoot":"","sources":["../../../src/RSSFeed/generic/RSS.types.ts"],"names":[],"mappings":"AAAA,MAAM,OAAO,OAAO;CAWnB"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/RSSFeed/index.ts"],"names":[],"mappings":"AAAA,cAAc,qBAAqB,CAAA;AACnC,cAAc,0BAA0B,CAAA"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/RSSFeed/index.ts"],"names":[],"mappings":"AAAA,cAAc,qBAAqB,CAAA;AACnC,cAAc,0BAA0B,CAAA"}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import { AutotagBase } from '../../Core/index.js';
|
|
2
|
+
import { ContentSourceParams } from '../../Engine/index.js';
|
|
3
|
+
import { UserInfo } from '@memberjunction/core';
|
|
4
|
+
import { ContentSourceEntity, ContentItemEntity } from '@memberjunction/core-entities';
|
|
5
|
+
import * as cheerio from 'cheerio';
|
|
6
|
+
export declare class AutotagWebsite extends AutotagBase {
|
|
7
|
+
private contextUser;
|
|
8
|
+
private engine;
|
|
9
|
+
protected contentSourceTypeID: string;
|
|
10
|
+
protected CrawlOtherSitesInTopLevelDomain: boolean;
|
|
11
|
+
protected CrawlSitesInLowerLevelDomain: boolean;
|
|
12
|
+
protected MaxDepth: number;
|
|
13
|
+
protected RootURL: string;
|
|
14
|
+
protected URLPattern: string;
|
|
15
|
+
protected visitedURLs: Set<string>;
|
|
16
|
+
constructor();
|
|
17
|
+
protected getContextUser(): UserInfo;
|
|
18
|
+
/**
|
|
19
|
+
* Implemented abstract method from the AutotagBase class. that runs the entire autotagging process. This method is the entry point for the autotagging process.
|
|
20
|
+
* It initializes the connection, retrieves the content sources corresponding to the content source type, sets the content items that we want to process,
|
|
21
|
+
* extracts and processes the text, and sets the results in the database.
|
|
22
|
+
*/
|
|
23
|
+
Autotag(contextUser: UserInfo): Promise<void>;
|
|
24
|
+
/**
|
|
25
|
+
* Given a content source, retrieve all content items associated with the content sources.
|
|
26
|
+
* The content items are then processed to determine if they have been modified since the last time they were processed or if they are new content items.
|
|
27
|
+
* @param contentSource
|
|
28
|
+
* @returns
|
|
29
|
+
*/
|
|
30
|
+
SetContentItemsToProcess(contentSources: ContentSourceEntity[]): Promise<ContentItemEntity[]>;
|
|
31
|
+
/**
|
|
32
|
+
* Given a list of content item links, check if the content item already exists in the database.
|
|
33
|
+
* If the content item exists, check if the content item has been modified since the last time it was processed.
|
|
34
|
+
* If the content item does not exist, create a new content item and add it to the list of content items to process.
|
|
35
|
+
* @param contentItemLinks
|
|
36
|
+
* @param contentSourceParams
|
|
37
|
+
* @param contextUser
|
|
38
|
+
* @returns
|
|
39
|
+
*/
|
|
40
|
+
protected SetNewAndModifiedContentItems(contentItemLinks: string[], contentSourceParams: ContentSourceParams, contextUser: UserInfo): Promise<ContentItemEntity[]>;
|
|
41
|
+
fetchPageContent(url: string): Promise<string>;
|
|
42
|
+
getTextWithLineBreaks(element: any, $: cheerio.CheerioAPI): string;
|
|
43
|
+
/**
|
|
44
|
+
* Given a URL, this function extracts text from a webpage.
|
|
45
|
+
* @param url
|
|
46
|
+
* @returns The text extracted from the webpage
|
|
47
|
+
*/
|
|
48
|
+
parseWebPage(url: string): Promise<string>;
|
|
49
|
+
/**
|
|
50
|
+
* Given a root URL that corresponds to a content source, retrieve all the links in accordance to the crawl settings.
|
|
51
|
+
* If the crawl settings are set to crawl other sites in the top level domain, then all links in the top level domain will be retrieved.
|
|
52
|
+
* If the crawl settings are set to crawl sites in lower level domains, then function is recursively called to retrieve all links in the lower level domains.
|
|
53
|
+
* @param url
|
|
54
|
+
* @returns
|
|
55
|
+
*/
|
|
56
|
+
protected getAllLinksFromContentSource(url: string, rootURL: string, regex: RegExp): Promise<string[]>;
|
|
57
|
+
/**
|
|
58
|
+
* For a given URL, retrieves all other links at that top level domain.
|
|
59
|
+
* @param url
|
|
60
|
+
* @param rootURL
|
|
61
|
+
* @param visitedURLs
|
|
62
|
+
* @returns
|
|
63
|
+
*/
|
|
64
|
+
protected getTopLevelLinks(url: string, rootURL: string): Promise<void>;
|
|
65
|
+
/**
|
|
66
|
+
* Simple check to see if the URL is at the highest level domain.
|
|
67
|
+
* @param url
|
|
68
|
+
* @returns
|
|
69
|
+
*/
|
|
70
|
+
protected isHighestDomain(url: string): boolean;
|
|
71
|
+
protected getBasePath(url: string): string;
|
|
72
|
+
protected getPathName(url: string): string;
|
|
73
|
+
protected urlIsValid(url: string): Promise<boolean>;
|
|
74
|
+
/**
|
|
75
|
+
* For a given URL, retrieves all links at lower level domains up to the specified crawl depth.
|
|
76
|
+
* @param url
|
|
77
|
+
* @param rootURL
|
|
78
|
+
* @param crawlDepth
|
|
79
|
+
* @param visitedURLs
|
|
80
|
+
* @returns
|
|
81
|
+
*/
|
|
82
|
+
protected getLowerLevelLinks(url: string, rootURL: string, crawlDepth: number, scrapedURLs: Set<string>, regex: RegExp): Promise<Set<string>>;
|
|
83
|
+
protected delay(ms: number): Promise<unknown>;
|
|
84
|
+
}
|
|
85
|
+
//# sourceMappingURL=AutotagWebsite.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AutotagWebsite.d.ts","sourceRoot":"","sources":["../../../src/Websites/generic/AutotagWebsite.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AACzC,OAAO,EAAqB,mBAAmB,EAAE,MAAM,cAAc,CAAC;AAEtE,OAAO,EAAE,QAAQ,EAAqB,MAAM,sBAAsB,CAAC;AACnE,OAAO,EAAE,mBAAmB,EAAE,iBAAiB,EAAE,MAAM,+BAA+B,CAAC;AACvF,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAMnC,qBACa,cAAe,SAAQ,WAAW;IAC3C,OAAO,CAAC,WAAW,CAAW;IAC9B,OAAO,CAAC,MAAM,CAAoB;IAClC,SAAS,CAAC,mBAAmB,EAAE,MAAM,CAAA;IACrC,SAAS,CAAC,+BAA+B,EAAE,OAAO,CAAC;IACnD,SAAS,CAAC,4BAA4B,EAAE,OAAO,CAAC;IAChD,SAAS,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC3B,SAAS,CAAC,OAAO,EAAE,MAAM,CAAC;IAC1B,SAAS,CAAC,UAAU,EAAE,MAAM,CAAC;IAC7B,SAAS,CAAC,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;;IAQnC,SAAS,CAAC,cAAc,IAAI,QAAQ;IAIpC;;;;OAIG;IACU,OAAO,CAAC,WAAW,EAAE,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC;IAS1D;;;;;OAKG;IACU,wBAAwB,CAAC,cAAc,EAAE,mBAAmB,EAAE,GAAG,OAAO,CAAC,iBAAiB,EAAE,CAAC;IAoD1G;;;;;;;;OAQG;cACa,6BAA6B,CAAC,gBAAgB,EAAE,MAAM,EAAE,EAAE,mBAAmB,EAAE,mBAAmB,EAAE,WAAW,EAAE,QAAQ,GAAG,OAAO,CAAC,iBAAiB,EAAE,CAAC;IAsE3J,gBAAgB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAKpD,qBAAqB,CAAC,OAAO,EAAE,GAAG,EAAE,CAAC,EAAE,OAAO,CAAC,UAAU,GAAG,MAAM;IAgBzE;;;;OAIG;IACU,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAavD;;;;;;OAMG;cACa,4BAA4B,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAa5G;;;;;;OAMG;cACa,gBAAgB,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAmC7E;;;;OAIG;IACH,SAAS,CAAC,eAAe,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO;IAW/C,SAAS,CAAC,WAAW,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM;IAW1C,SAAS,CAAC,WAAW,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM;cAa1B,UAAU,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAWzD;;;;;;;OAOG;cACa,kBAAkB,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;cA+CnI,KAAK,CAAC,EAAE,EAAE,MAAM;CAGnC"}
|