@memberjunction/content-autotagging 5.22.0 → 5.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +78 -18
- package/dist/CloudStorage/generic/CloudStorageBase.d.ts +2 -2
- package/dist/CloudStorage/generic/CloudStorageBase.d.ts.map +1 -1
- package/dist/CloudStorage/generic/CloudStorageBase.js +2 -2
- package/dist/CloudStorage/generic/CloudStorageBase.js.map +1 -1
- package/dist/CloudStorage/index.d.ts +5 -0
- package/dist/CloudStorage/index.d.ts.map +1 -1
- package/dist/CloudStorage/index.js +5 -0
- package/dist/CloudStorage/index.js.map +1 -1
- package/dist/CloudStorage/providers/AutotagCloudStorage.d.ts +61 -0
- package/dist/CloudStorage/providers/AutotagCloudStorage.d.ts.map +1 -0
- package/dist/CloudStorage/providers/AutotagCloudStorage.js +256 -0
- package/dist/CloudStorage/providers/AutotagCloudStorage.js.map +1 -0
- package/dist/Core/generic/AutotagBase.d.ts +9 -1
- package/dist/Core/generic/AutotagBase.d.ts.map +1 -1
- package/dist/Core/generic/AutotagBase.js.map +1 -1
- package/dist/Engine/generic/AutotagBaseEngine.d.ts +397 -15
- package/dist/Engine/generic/AutotagBaseEngine.d.ts.map +1 -1
- package/dist/Engine/generic/AutotagBaseEngine.js +1362 -128
- package/dist/Engine/generic/AutotagBaseEngine.js.map +1 -1
- package/dist/Engine/generic/RateLimiter.d.ts +49 -0
- package/dist/Engine/generic/RateLimiter.d.ts.map +1 -0
- package/dist/Engine/generic/RateLimiter.js +98 -0
- package/dist/Engine/generic/RateLimiter.js.map +1 -0
- package/dist/Engine/index.d.ts +1 -0
- package/dist/Engine/index.d.ts.map +1 -1
- package/dist/Engine/index.js +1 -0
- package/dist/Engine/index.js.map +1 -1
- package/dist/Entity/generic/AutotagEntity.d.ts +64 -15
- package/dist/Entity/generic/AutotagEntity.d.ts.map +1 -1
- package/dist/Entity/generic/AutotagEntity.js +362 -83
- package/dist/Entity/generic/AutotagEntity.js.map +1 -1
- package/dist/LocalFileSystem/generic/AutotagLocalFileSystem.d.ts +2 -2
- package/dist/LocalFileSystem/generic/AutotagLocalFileSystem.d.ts.map +1 -1
- package/dist/LocalFileSystem/generic/AutotagLocalFileSystem.js +2 -2
- package/dist/LocalFileSystem/generic/AutotagLocalFileSystem.js.map +1 -1
- package/dist/RSSFeed/generic/AutotagRSSFeed.d.ts +47 -16
- package/dist/RSSFeed/generic/AutotagRSSFeed.d.ts.map +1 -1
- package/dist/RSSFeed/generic/AutotagRSSFeed.js +239 -121
- package/dist/RSSFeed/generic/AutotagRSSFeed.js.map +1 -1
- package/dist/Websites/generic/AutotagWebsite.d.ts +2 -2
- package/dist/Websites/generic/AutotagWebsite.d.ts.map +1 -1
- package/dist/Websites/generic/AutotagWebsite.js +2 -2
- package/dist/Websites/generic/AutotagWebsite.js.map +1 -1
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -1
- package/package.json +16 -8
|
@@ -1,30 +1,61 @@
|
|
|
1
1
|
import { UserInfo } from '@memberjunction/core';
|
|
2
|
-
import { AutotagBase } from
|
|
3
|
-
import { ContentSourceParams } from "../../Engine/index.js";
|
|
2
|
+
import { AutotagBase, AutotagProgressCallback } from '../../Core/index.js';
|
|
4
3
|
import { MJContentSourceEntity, MJContentItemEntity } from '@memberjunction/core-entities';
|
|
5
4
|
import { RSSItem } from './RSS.types.js';
|
|
5
|
+
/**
|
|
6
|
+
* Autotag provider for RSS and Atom feeds. Parses feed items, follows article
|
|
7
|
+
* links to fetch full page content via Cheerio, and creates ContentItems with
|
|
8
|
+
* the extracted article text rather than raw RSS metadata.
|
|
9
|
+
*
|
|
10
|
+
* Fixes:
|
|
11
|
+
* - Text capture: follows item.link to fetch full article text instead of
|
|
12
|
+
* storing JSON.stringify(RSSItem)
|
|
13
|
+
* - Item naming: uses the RSS item title for ContentItem.Name, and the RSS
|
|
14
|
+
* description for ContentItem.Description, instead of the source name
|
|
15
|
+
*/
|
|
6
16
|
export declare class AutotagRSSFeed extends AutotagBase {
|
|
7
17
|
private contextUser;
|
|
8
18
|
private engine;
|
|
9
19
|
protected contentSourceTypeID: string;
|
|
10
20
|
constructor();
|
|
11
|
-
|
|
21
|
+
Autotag(contextUser: UserInfo, onProgress?: AutotagProgressCallback): Promise<void>;
|
|
22
|
+
SetContentItemsToProcess(contentSources: MJContentSourceEntity[]): Promise<MJContentItemEntity[]>;
|
|
12
23
|
/**
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
* extracts and processes the text, and sets the results in the database.
|
|
24
|
+
* Process a single content source: parse the RSS feed, detect new/modified
|
|
25
|
+
* items, fetch full article text, and create/update ContentItems.
|
|
16
26
|
*/
|
|
17
|
-
|
|
27
|
+
private ProcessContentSource;
|
|
18
28
|
/**
|
|
19
|
-
*
|
|
20
|
-
*
|
|
21
|
-
* @param contentSources - An array of content sources to check for modified or added content source items
|
|
22
|
-
* @returns - An array of content source items that have been modified or added after the most recent process run for that content source
|
|
29
|
+
* Process a single RSS feed item: fetch full article text, compute checksum,
|
|
30
|
+
* create or update the ContentItem.
|
|
23
31
|
*/
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
32
|
+
private ProcessSingleFeedItem;
|
|
33
|
+
/**
|
|
34
|
+
* Fetch the full article text for an RSS item.
|
|
35
|
+
*
|
|
36
|
+
* Strategy:
|
|
37
|
+
* 1. If the RSS item has inline content (content:encoded), use that
|
|
38
|
+
* 2. Otherwise, follow the item's link URL and extract text with Cheerio
|
|
39
|
+
* 3. Fall back to the RSS description if link fetching fails
|
|
40
|
+
*/
|
|
41
|
+
private FetchArticleText;
|
|
42
|
+
/**
|
|
43
|
+
* Fetch a web page and extract its main text content using Cheerio.
|
|
44
|
+
* Strips navigation, headers, footers, scripts, and styles.
|
|
45
|
+
*/
|
|
46
|
+
private FetchAndParseWebPage;
|
|
47
|
+
/**
|
|
48
|
+
* Parse an RSS/Atom feed URL and return structured items.
|
|
49
|
+
* The content field is HTML-stripped via the engine's parseHTML.
|
|
50
|
+
*/
|
|
51
|
+
ParseRSSFeed(url: string): Promise<RSSItem[]>;
|
|
52
|
+
/**
|
|
53
|
+
* Check if a URL is reachable via HTTP HEAD request.
|
|
54
|
+
*/
|
|
55
|
+
private UrlIsValid;
|
|
56
|
+
/**
|
|
57
|
+
* Load existing ContentItems for this source, keyed by lowercase URL for upsert.
|
|
58
|
+
*/
|
|
59
|
+
private LoadExistingContentItems;
|
|
29
60
|
}
|
|
30
61
|
//# sourceMappingURL=AutotagRSSFeed.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"AutotagRSSFeed.d.ts","sourceRoot":"","sources":["../../../src/RSSFeed/generic/AutotagRSSFeed.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,
|
|
1
|
+
{"version":3,"file":"AutotagRSSFeed.d.ts","sourceRoot":"","sources":["../../../src/RSSFeed/generic/AutotagRSSFeed.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAA0C,MAAM,sBAAsB,CAAC;AAExF,OAAO,EAAE,WAAW,EAAE,uBAAuB,EAAE,MAAM,YAAY,CAAC;AAElE,OAAO,EAAE,qBAAqB,EAAE,mBAAmB,EAAE,MAAM,+BAA+B,CAAC;AAC3F,OAAO,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAKtC;;;;;;;;;;GAUG;AACH,qBACa,cAAe,SAAQ,WAAW;IAC3C,OAAO,CAAC,WAAW,CAAY;IAC/B,OAAO,CAAC,MAAM,CAAqB;IACnC,SAAS,CAAC,mBAAmB,EAAG,MAAM,CAAC;;IAO1B,OAAO,CAAC,WAAW,EAAE,QAAQ,EAAE,UAAU,CAAC,EAAE,uBAAuB,GAAG,OAAO,CAAC,IAAI,CAAC;IA+BnF,wBAAwB,CAAC,cAAc,EAAE,qBAAqB,EAAE,GAAG,OAAO,CAAC,mBAAmB,EAAE,CAAC;IAgB9G;;;OAGG;YACW,oBAAoB;IA4ClC;;;OAGG;YACW,qBAAqB;IAmDnC;;;;;;;OAOG;YACW,gBAAgB;IA4B9B;;;OAGG;YACW,oBAAoB;IAgBlC;;;OAGG;IACU,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,EAAE,CAAC;IAwC1D;;OAEG;YACW,UAAU;IASxB;;OAEG;YACW,wBAAwB;CAkBzC"}
|
|
@@ -7,166 +7,284 @@ var __decorate = (this && this.__decorate) || function (decorators, target, key,
|
|
|
7
7
|
var __metadata = (this && this.__metadata) || function (k, v) {
|
|
8
8
|
if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
|
|
9
9
|
};
|
|
10
|
-
import { Metadata, RunView } from '@memberjunction/core';
|
|
10
|
+
import { Metadata, RunView, LogStatus, LogError } from '@memberjunction/core';
|
|
11
11
|
import { RegisterClass } from '@memberjunction/global';
|
|
12
|
-
import { AutotagBase } from
|
|
13
|
-
import { AutotagBaseEngine } from
|
|
12
|
+
import { AutotagBase } from '../../Core/index.js';
|
|
13
|
+
import { AutotagBaseEngine } from '../../Engine/index.js';
|
|
14
14
|
import { RSSItem } from './RSS.types.js';
|
|
15
15
|
import axios from 'axios';
|
|
16
16
|
import crypto from 'crypto';
|
|
17
17
|
import Parser from 'rss-parser';
|
|
18
|
-
|
|
19
|
-
|
|
18
|
+
/**
|
|
19
|
+
* Autotag provider for RSS and Atom feeds. Parses feed items, follows article
|
|
20
|
+
* links to fetch full page content via Cheerio, and creates ContentItems with
|
|
21
|
+
* the extracted article text rather than raw RSS metadata.
|
|
22
|
+
*
|
|
23
|
+
* Fixes:
|
|
24
|
+
* - Text capture: follows item.link to fetch full article text instead of
|
|
25
|
+
* storing JSON.stringify(RSSItem)
|
|
26
|
+
* - Item naming: uses the RSS item title for ContentItem.Name, and the RSS
|
|
27
|
+
* description for ContentItem.Description, instead of the source name
|
|
28
|
+
*/
|
|
20
29
|
let AutotagRSSFeed = class AutotagRSSFeed extends AutotagBase {
|
|
21
30
|
constructor() {
|
|
22
31
|
super();
|
|
23
32
|
this.engine = AutotagBaseEngine.Instance;
|
|
24
33
|
}
|
|
25
|
-
|
|
26
|
-
return this.contextUser;
|
|
27
|
-
}
|
|
28
|
-
/**
|
|
29
|
-
* Implemented abstract method from the AutotagBase class. that runs the entire autotagging process. This method is the entry point for the autotagging process.
|
|
30
|
-
* It initializes the connection, retrieves the content sources corresponding to the content source type, sets the content items that we want to process,
|
|
31
|
-
* extracts and processes the text, and sets the results in the database.
|
|
32
|
-
*/
|
|
33
|
-
async Autotag(contextUser) {
|
|
34
|
+
async Autotag(contextUser, onProgress) {
|
|
34
35
|
this.contextUser = contextUser;
|
|
35
36
|
this.contentSourceTypeID = this.engine.SetSubclassContentSourceType('RSS Feed');
|
|
37
|
+
LogStatus(`[RSS] Starting RSS autotag...`);
|
|
36
38
|
const contentSources = await this.engine.getAllContentSources(this.contextUser, this.contentSourceTypeID);
|
|
37
|
-
|
|
38
|
-
|
|
39
|
+
LogStatus(`[RSS] Found ${contentSources.length} RSS source(s)`);
|
|
40
|
+
let contentItemsToProcess;
|
|
41
|
+
try {
|
|
42
|
+
contentItemsToProcess = await this.SetContentItemsToProcess(contentSources);
|
|
43
|
+
LogStatus(`[RSS] SetContentItemsToProcess returned ${contentItemsToProcess.length} items`);
|
|
44
|
+
}
|
|
45
|
+
catch (e) {
|
|
46
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
47
|
+
LogError(`[RSS] SetContentItemsToProcess THREW: ${msg}`);
|
|
48
|
+
return;
|
|
49
|
+
}
|
|
50
|
+
if (contentItemsToProcess.length > 0) {
|
|
51
|
+
LogStatus(`[RSS] Calling ExtractTextAndProcessWithLLM with ${contentItemsToProcess.length} items...`);
|
|
52
|
+
try {
|
|
53
|
+
await this.engine.ExtractTextAndProcessWithLLM(contentItemsToProcess, this.contextUser, undefined, undefined, onProgress);
|
|
54
|
+
LogStatus(`[RSS] ExtractTextAndProcessWithLLM completed successfully`);
|
|
55
|
+
}
|
|
56
|
+
catch (e) {
|
|
57
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
58
|
+
LogError(`[RSS] ExtractTextAndProcessWithLLM THREW: ${msg}`);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
else {
|
|
62
|
+
LogStatus('[RSS] No new or modified feed items to process');
|
|
63
|
+
}
|
|
39
64
|
}
|
|
40
|
-
/**
|
|
41
|
-
* Implemented abstract method from the AutotagBase class. Given a list of content sources, this method should return a list
|
|
42
|
-
* of content source items that have been modified or added after the most recent process run for that content source.
|
|
43
|
-
* @param contentSources - An array of content sources to check for modified or added content source items
|
|
44
|
-
* @returns - An array of content source items that have been modified or added after the most recent process run for that content source
|
|
45
|
-
*/
|
|
46
65
|
async SetContentItemsToProcess(contentSources) {
|
|
47
66
|
const contentItemsToProcess = [];
|
|
48
67
|
for (const contentSource of contentSources) {
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
// Override defaults with content source specific params
|
|
53
|
-
contentSourceParamsMap.forEach((value, key) => {
|
|
54
|
-
if (key in this) {
|
|
55
|
-
this[key] = value;
|
|
56
|
-
}
|
|
57
|
-
});
|
|
58
|
-
}
|
|
59
|
-
const contentSourceParams = {
|
|
60
|
-
contentSourceID: contentSource.ID,
|
|
61
|
-
name: contentSource.Name,
|
|
62
|
-
ContentTypeID: contentSource.ContentTypeID,
|
|
63
|
-
ContentFileTypeID: contentSource.ContentFileTypeID,
|
|
64
|
-
ContentSourceTypeID: contentSource.ContentSourceTypeID,
|
|
65
|
-
URL: contentSource.URL
|
|
66
|
-
};
|
|
67
|
-
const allRSSItems = await this.parseRSSFeed(contentSourceParams.URL);
|
|
68
|
-
const contentItems = await this.SetNewAndModifiedContentItems(allRSSItems, contentSourceParams);
|
|
69
|
-
if (contentItems && contentItems.length > 0) {
|
|
70
|
-
contentItemsToProcess.push(...contentItems);
|
|
68
|
+
try {
|
|
69
|
+
const items = await this.ProcessContentSource(contentSource);
|
|
70
|
+
contentItemsToProcess.push(...items);
|
|
71
71
|
}
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
72
|
+
catch (e) {
|
|
73
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
74
|
+
LogError(`AutotagRSSFeed: failed to process source "${contentSource.Name}": ${msg}`);
|
|
75
75
|
}
|
|
76
76
|
}
|
|
77
77
|
return contentItemsToProcess;
|
|
78
78
|
}
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
79
|
+
/**
|
|
80
|
+
* Process a single content source: parse the RSS feed, detect new/modified
|
|
81
|
+
* items, fetch full article text, and create/update ContentItems.
|
|
82
|
+
*/
|
|
83
|
+
async ProcessContentSource(contentSource) {
|
|
84
|
+
const contentSourceParams = {
|
|
85
|
+
contentSourceID: contentSource.ID,
|
|
86
|
+
name: contentSource.Name ?? '',
|
|
87
|
+
ContentTypeID: contentSource.ContentTypeID,
|
|
88
|
+
ContentFileTypeID: contentSource.ContentFileTypeID,
|
|
89
|
+
ContentSourceTypeID: contentSource.ContentSourceTypeID,
|
|
90
|
+
URL: contentSource.URL
|
|
91
|
+
};
|
|
92
|
+
LogStatus(`[RSS] Parsing feed "${contentSource.Name}" at ${contentSourceParams.URL}...`);
|
|
93
|
+
const allRSSItems = await this.ParseRSSFeed(contentSourceParams.URL);
|
|
94
|
+
if (allRSSItems.length === 0) {
|
|
95
|
+
LogStatus(`AutotagRSSFeed: no items in feed "${contentSource.Name}"`);
|
|
96
|
+
return [];
|
|
97
|
+
}
|
|
98
|
+
LogStatus(`[RSS] Parsed ${allRSSItems.length} items from "${contentSource.Name}"`);
|
|
99
|
+
// Load existing content items for upsert by URL
|
|
100
|
+
const existingItems = await this.LoadExistingContentItems(contentSourceParams.contentSourceID);
|
|
101
|
+
LogStatus(`[RSS] ${existingItems.size} existing items for "${contentSource.Name}"`);
|
|
102
|
+
const items = [];
|
|
103
|
+
for (let idx = 0; idx < allRSSItems.length; idx++) {
|
|
104
|
+
const rssItem = allRSSItems[idx];
|
|
105
|
+
try {
|
|
106
|
+
LogStatus(`[RSS] Processing item ${idx + 1}/${allRSSItems.length}: "${rssItem.title?.substring(0, 60) ?? 'untitled'}"...`);
|
|
107
|
+
const item = await this.ProcessSingleFeedItem(rssItem, contentSourceParams, existingItems);
|
|
108
|
+
if (item) {
|
|
109
|
+
items.push(item);
|
|
110
|
+
LogStatus(`[RSS] Item ${idx + 1} created/updated (text: ${item.Text?.length ?? 0} chars)`);
|
|
111
|
+
}
|
|
112
|
+
else {
|
|
113
|
+
LogStatus(`[RSS] Item ${idx + 1} skipped (unchanged or empty)`);
|
|
102
114
|
}
|
|
103
115
|
}
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
const contentItem = await md.GetEntityObject('MJ: Content Items', this.contextUser);
|
|
108
|
-
contentItem.ContentSourceID = contentSourceParams.contentSourceID;
|
|
109
|
-
contentItem.Name = contentSourceParams.name;
|
|
110
|
-
contentItem.Description = RSSContentItem.description || this.engine.GetContentItemDescription(contentSourceParams);
|
|
111
|
-
contentItem.ContentTypeID = contentSourceParams.ContentTypeID;
|
|
112
|
-
contentItem.ContentFileTypeID = contentSourceParams.ContentFileTypeID;
|
|
113
|
-
contentItem.ContentSourceTypeID = contentSourceParams.ContentSourceTypeID;
|
|
114
|
-
contentItem.Checksum = await this.getChecksumFromRSSItem(RSSContentItem, this.contextUser);
|
|
115
|
-
contentItem.URL = RSSContentItem.link || contentSourceParams.URL;
|
|
116
|
-
contentItem.Text = JSON.stringify(RSSContentItem);
|
|
117
|
-
await contentItem.Save();
|
|
118
|
-
contentItemsToProcess.push(contentItem); // Content item was added, add to list
|
|
116
|
+
catch (e) {
|
|
117
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
118
|
+
LogError(`[RSS] Item ${idx + 1} FAILED "${rssItem.title ?? rssItem.link}": ${msg}`);
|
|
119
119
|
}
|
|
120
120
|
}
|
|
121
|
-
|
|
121
|
+
LogStatus(`AutotagRSSFeed: ${items.length} new/modified items from "${contentSource.Name}"`);
|
|
122
|
+
return items;
|
|
122
123
|
}
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
124
|
+
/**
|
|
125
|
+
* Process a single RSS feed item: fetch full article text, compute checksum,
|
|
126
|
+
* create or update the ContentItem.
|
|
127
|
+
*/
|
|
128
|
+
async ProcessSingleFeedItem(rssItem, contentSourceParams, existingItems) {
|
|
129
|
+
// Fetch full article text from the link URL
|
|
130
|
+
const articleText = await this.FetchArticleText(rssItem);
|
|
131
|
+
if (!articleText || articleText.trim().length === 0) {
|
|
132
|
+
return null;
|
|
133
|
+
}
|
|
134
|
+
const checksum = crypto.createHash('sha256').update(articleText).digest('hex');
|
|
135
|
+
const itemUrl = rssItem.link ?? '';
|
|
136
|
+
const urlKey = itemUrl.toLowerCase();
|
|
137
|
+
// Check for existing content item (skip if unchanged, unless force reprocess)
|
|
138
|
+
const existing = existingItems.get(urlKey);
|
|
139
|
+
if (existing && existing.Checksum === checksum && !this.engine.ForceReprocess) {
|
|
140
|
+
return null; // Content unchanged
|
|
141
|
+
}
|
|
142
|
+
const md = new Metadata();
|
|
143
|
+
let contentItem;
|
|
144
|
+
if (existing) {
|
|
145
|
+
contentItem = existing;
|
|
146
|
+
}
|
|
147
|
+
else {
|
|
148
|
+
contentItem = await md.GetEntityObject('MJ: Content Items', this.contextUser);
|
|
149
|
+
contentItem.NewRecord();
|
|
150
|
+
contentItem.ContentSourceID = contentSourceParams.contentSourceID;
|
|
151
|
+
contentItem.ContentTypeID = contentSourceParams.ContentTypeID;
|
|
152
|
+
contentItem.ContentFileTypeID = contentSourceParams.ContentFileTypeID;
|
|
153
|
+
contentItem.ContentSourceTypeID = contentSourceParams.ContentSourceTypeID;
|
|
154
|
+
}
|
|
155
|
+
// Fix #6: Use RSS item title and description, not the source name
|
|
156
|
+
contentItem.Name = rssItem.title ?? contentSourceParams.name;
|
|
157
|
+
contentItem.Description = rssItem.description ?? this.engine.GetContentItemDescription(contentSourceParams);
|
|
158
|
+
contentItem.URL = itemUrl;
|
|
159
|
+
contentItem.Text = articleText;
|
|
160
|
+
contentItem.Checksum = checksum;
|
|
161
|
+
const saved = await contentItem.Save();
|
|
162
|
+
if (!saved) {
|
|
163
|
+
throw new Error(`Failed to save ContentItem for "${itemUrl}"`);
|
|
164
|
+
}
|
|
165
|
+
existingItems.set(urlKey, contentItem);
|
|
166
|
+
return contentItem;
|
|
167
|
+
}
|
|
168
|
+
/**
|
|
169
|
+
* Fetch the full article text for an RSS item.
|
|
170
|
+
*
|
|
171
|
+
* Strategy:
|
|
172
|
+
* 1. If the RSS item has inline content (content:encoded), use that
|
|
173
|
+
* 2. Otherwise, follow the item's link URL and extract text with Cheerio
|
|
174
|
+
* 3. Fall back to the RSS description if link fetching fails
|
|
175
|
+
*/
|
|
176
|
+
async FetchArticleText(rssItem) {
|
|
177
|
+
// 1. Prefer inline content if it's substantial (> 200 chars after HTML stripping)
|
|
178
|
+
if (rssItem.content && rssItem.content.trim().length > 200) {
|
|
179
|
+
return rssItem.content; // Already HTML-parsed by parseRSSFeed
|
|
180
|
+
}
|
|
181
|
+
// 2. Follow the link URL to get the full article
|
|
182
|
+
if (rssItem.link) {
|
|
183
|
+
try {
|
|
184
|
+
const fullText = await this.FetchAndParseWebPage(rssItem.link);
|
|
185
|
+
if (fullText && fullText.trim().length > 100) {
|
|
186
|
+
return fullText;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
catch (e) {
|
|
190
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
191
|
+
LogStatus(`AutotagRSSFeed: failed to fetch article from "${rssItem.link}": ${msg}`);
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
// 3. Fall back to whatever content we have
|
|
195
|
+
if (rssItem.content && rssItem.content.trim().length > 0) {
|
|
196
|
+
return rssItem.content;
|
|
197
|
+
}
|
|
198
|
+
// 4. Last resort: use description (usually just a summary)
|
|
199
|
+
return rssItem.description ?? '';
|
|
200
|
+
}
|
|
201
|
+
/**
|
|
202
|
+
* Fetch a web page and extract its main text content using Cheerio.
|
|
203
|
+
* Strips navigation, headers, footers, scripts, and styles.
|
|
204
|
+
*/
|
|
205
|
+
async FetchAndParseWebPage(url) {
|
|
206
|
+
const response = await axios.get(url, {
|
|
207
|
+
timeout: 8000,
|
|
208
|
+
headers: {
|
|
209
|
+
'User-Agent': 'Mozilla/5.0 (compatible; MemberJunction/1.0)',
|
|
210
|
+
'Accept': 'text/html,application/xhtml+xml'
|
|
147
211
|
}
|
|
148
|
-
|
|
149
|
-
|
|
212
|
+
});
|
|
213
|
+
if (typeof response.data !== 'string') {
|
|
214
|
+
return '';
|
|
215
|
+
}
|
|
216
|
+
return this.engine.parseHTML(response.data);
|
|
217
|
+
}
|
|
218
|
+
/**
|
|
219
|
+
* Parse an RSS/Atom feed URL and return structured items.
|
|
220
|
+
* The content field is HTML-stripped via the engine's parseHTML.
|
|
221
|
+
*/
|
|
222
|
+
async ParseRSSFeed(url) {
|
|
223
|
+
if (!await this.UrlIsValid(url)) {
|
|
224
|
+
LogError(`AutotagRSSFeed: invalid feed URL: ${url}`);
|
|
225
|
+
return [];
|
|
226
|
+
}
|
|
227
|
+
try {
|
|
228
|
+
const parser = new Parser();
|
|
229
|
+
const feed = await parser.parseURL(url);
|
|
230
|
+
const items = [];
|
|
231
|
+
for (const item of feed.items) {
|
|
232
|
+
const rssItem = new RSSItem();
|
|
233
|
+
rssItem.title = item.title ?? '';
|
|
234
|
+
rssItem.link = item.link ?? '';
|
|
235
|
+
rssItem.description = item.contentSnippet ?? item.description ?? '';
|
|
236
|
+
rssItem.pubDate = item.pubDate ?? '';
|
|
237
|
+
rssItem.guid = item.guid ?? '';
|
|
238
|
+
rssItem.category = item.categories?.join(', ') ?? '';
|
|
239
|
+
rssItem.author = item.creator ?? item.author ?? '';
|
|
240
|
+
rssItem.comments = item['comments'] ?? '';
|
|
241
|
+
rssItem.source = item['source'] ?? '';
|
|
242
|
+
// Parse inline content (content:encoded or content)
|
|
243
|
+
const rawContent = item['content:encoded'] ?? item.content ?? '';
|
|
244
|
+
if (rawContent) {
|
|
245
|
+
rssItem.content = await this.engine.parseHTML(rawContent);
|
|
246
|
+
}
|
|
247
|
+
items.push(rssItem);
|
|
150
248
|
}
|
|
249
|
+
return items;
|
|
151
250
|
}
|
|
152
251
|
catch (error) {
|
|
153
|
-
|
|
252
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
253
|
+
LogError(`AutotagRSSFeed: error parsing feed "${url}": ${msg}`);
|
|
154
254
|
return [];
|
|
155
255
|
}
|
|
156
256
|
}
|
|
157
|
-
|
|
257
|
+
/**
|
|
258
|
+
* Check if a URL is reachable via HTTP HEAD request.
|
|
259
|
+
*/
|
|
260
|
+
async UrlIsValid(url) {
|
|
158
261
|
try {
|
|
159
|
-
const response = await axios.head(url);
|
|
160
|
-
return response.status
|
|
262
|
+
const response = await axios.head(url, { timeout: 10000 });
|
|
263
|
+
return response.status >= 200 && response.status < 400;
|
|
161
264
|
}
|
|
162
|
-
catch
|
|
163
|
-
console.error(`Invalid URL: ${url}`);
|
|
265
|
+
catch {
|
|
164
266
|
return false;
|
|
165
267
|
}
|
|
166
268
|
}
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
269
|
+
/**
|
|
270
|
+
* Load existing ContentItems for this source, keyed by lowercase URL for upsert.
|
|
271
|
+
*/
|
|
272
|
+
async LoadExistingContentItems(contentSourceID) {
|
|
273
|
+
const rv = new RunView();
|
|
274
|
+
const result = await rv.RunView({
|
|
275
|
+
EntityName: 'MJ: Content Items',
|
|
276
|
+
ExtraFilter: `ContentSourceID='${contentSourceID}'`,
|
|
277
|
+
ResultType: 'entity_object'
|
|
278
|
+
}, this.contextUser);
|
|
279
|
+
const map = new Map();
|
|
280
|
+
if (result.Success) {
|
|
281
|
+
for (const ci of result.Results) {
|
|
282
|
+
if (ci.URL) {
|
|
283
|
+
map.set(ci.URL.toLowerCase(), ci);
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
return map;
|
|
170
288
|
}
|
|
171
289
|
};
|
|
172
290
|
AutotagRSSFeed = __decorate([
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"AutotagRSSFeed.js","sourceRoot":"","sources":["../../../src/RSSFeed/generic/AutotagRSSFeed.ts"],"names":[],"mappings":";;;;;;;;;AAAA,OAAO,EAAY,QAAQ,EAAE,OAAO,EAAE,MAAM,sBAAsB,CAAC;
|
|
1
|
+
{"version":3,"file":"AutotagRSSFeed.js","sourceRoot":"","sources":["../../../src/RSSFeed/generic/AutotagRSSFeed.ts"],"names":[],"mappings":";;;;;;;;;AAAA,OAAO,EAAY,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AACxF,OAAO,EAAE,aAAa,EAAiB,MAAM,wBAAwB,CAAC;AACtE,OAAO,EAAE,WAAW,EAA2B,MAAM,YAAY,CAAC;AAClE,OAAO,EAAE,iBAAiB,EAAuB,MAAM,cAAc,CAAC;AAEtE,OAAO,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AACtC,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,MAAM,MAAM,YAAY,CAAC;AAEhC;;;;;;;;;;GAUG;AAEI,IAAM,cAAc,GAApB,MAAM,cAAe,SAAQ,WAAW;IAK3C;QACI,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,MAAM,GAAG,iBAAiB,CAAC,QAAQ,CAAC;IAC7C,CAAC;IAEM,KAAK,CAAC,OAAO,CAAC,WAAqB,EAAE,UAAoC;QAC5E,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;QAC/B,IAAI,CAAC,mBAAmB,GAAG,IAAI,CAAC,MAAM,CAAC,4BAA4B,CAAC,UAAU,CAAC,CAAC;QAChF,SAAS,CAAC,+BAA+B,CAAC,CAAC;QAC3C,MAAM,cAAc,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,oBAAoB,CAAC,IAAI,CAAC,WAAW,EAAE,IAAI,CAAC,mBAAmB,CAAC,CAAC;QAC1G,SAAS,CAAC,eAAe,cAAc,CAAC,MAAM,gBAAgB,CAAC,CAAC;QAEhE,IAAI,qBAA4C,CAAC;QACjD,IAAI,CAAC;YACD,qBAAqB,GAAG,MAAM,IAAI,CAAC,wBAAwB,CAAC,cAAc,CAAC,CAAC;YAC5E,SAAS,CAAC,2CAA2C,qBAAqB,CAAC,MAAM,QAAQ,CAAC,CAAC;QAC/F,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACT,MAAM,GAAG,GAAG,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;YACvD,QAAQ,CAAC,yCAAyC,GAAG,EAAE,CAAC,CAAC;YACzD,OAAO;QACX,CAAC;QAED,IAAI,qBAAqB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACnC,SAAS,CAAC,mDAAmD,qBAAqB,CAAC,MAAM,WAAW,CAAC,CAAC;YACtG,IAAI,CAAC;gBACD,MAAM,IAAI,CAAC,MAAM,CAAC,4BAA4B,CAAC,qBAAqB,EAAE,IAAI,CAAC,WAAW,EAAE,SAAS,EAAE,SAAS,EAAE,UAAU,CAAC,CAAC;gBAC1H,SAAS,CAAC,2DAA2D,CAAC,CAAC;YAC3E,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACT,MAAM,GAAG,GAAG,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;gBACvD,QAAQ,CAAC,6CAA6C,GAAG,EAAE,CAAC,CAAC;YACjE,CAAC;QACL,CAAC;aAAM,CAAC;YACJ,SAAS,CAAC,gDAAgD,CAAC,CAAC;QAChE,CAAC;IACL,CAAC;IAEM,KAAK,CAAC,wBAAwB,CAAC,cAAuC;QACzE,MAAM,qBAAqB,GAA0B,EAAE,CAAC;QAExD,KAAK,MAAM,aAAa,IAAI,cAAc,EAAE,CAAC;YACzC,IAAI,CAAC;gBACD,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,oBAAoB,CAAC,aAAa,CAAC,CAAC;gBAC7D,qBAAqB,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,CAAC;YACzC,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACT,MAAM,GAAG,GAAG,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;gBACvD,QAAQ,CAAC,6CAA6C,aAAa,CAAC,IAAI,MAAM,GAAG,EAAE,CAAC,CAAC;YACzF,CAAC;QACL,CAAC;QAED,OAAO,qBAAqB,CAAC;IACjC,CAAC;IAED;;;OAGG;IACK,KAAK,CAAC,oBAAoB,CAAC,aAAoC;QACnE,MAAM,mBAAmB,GAAwB;YAC7C,eAAe,EAAE,aAAa,CAAC,EAAE;YACjC,IAAI,EAAE,aAAa,CAAC,IAAI,IAAI,EAAE;YAC9B,aAAa,EAAE,aAAa,CAAC,aAAa;YAC1C,iBAAiB,EAAE,aAAa,CAAC,iBAAiB;YAClD,mBAAmB,EAAE,aAAa,CAAC,mBAAmB;YACtD,GAAG,EAAE,aAAa,CAAC,GAAG;SACzB,CAAC;QAEF,SAAS,CAAC,uBAAuB,aAAa,CAAC,IAAI,QAAQ,mBAAmB,CAAC,GAAG,KAAK,CAAC,CAAC;QACzF,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,mBAAmB,CAAC,GAAG,CAAC,CAAC;QACrE,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3B,SAAS,CAAC,qCAAqC,aAAa,CAAC,IAAI,GAAG,CAAC,CAAC;YACtE,OAAO,EAAE,CAAC;QACd,CAAC;QACD,SAAS,CAAC,gBAAgB,WAAW,CAAC,MAAM,gBAAgB,aAAa,CAAC,IAAI,GAAG,CAAC,CAAC;QAEnF,gDAAgD;QAChD,MAAM,aAAa,GAAG,MAAM,IAAI,CAAC,wBAAwB,CAAC,mBAAmB,CAAC,eAAe,CAAC,CAAC;QAC/F,SAAS,CAAC,SAAS,aAAa,CAAC,IAAI,wBAAwB,aAAa,CAAC,IAAI,GAAG,CAAC,CAAC;QAEpF,MAAM,KAAK,GAA0B,EAAE,CAAC;QACxC,KAAK,IAAI,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,WAAW,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC;YAChD,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC;YACjC,IAAI,CAAC;gBACD,SAAS,CAAC,yBAAyB,GAAG,GAAG,CAAC,IAAI,WAAW,CAAC,MAAM,MAAM,OAAO,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,UAAU,MAAM,CAAC,CAAC;gBAC3H,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,qBAAqB,CAAC,OAAO,EAAE,mBAAmB,EAAE,aAAa,CAAC,CAAC;gBAC3F,IAAI,IAAI,EAAE,CAAC;oBACP,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBACjB,SAAS,CAAC,cAAc,GAAG,GAAG,CAAC,2BAA2B,IAAI,CAAC,IAAI,EAAE,MAAM,IAAI,CAAC,SAAS,CAAC,CAAC;gBAC/F,CAAC;qBAAM,CAAC;oBACJ,SAAS,CAAC,cAAc,GAAG,GAAG,CAAC,+BAA+B,CAAC,CAAC;gBACpE,CAAC;YACL,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACT,MAAM,GAAG,GAAG,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;gBACvD,QAAQ,CAAC,cAAc,GAAG,GAAG,CAAC,YAAY,OAAO,CAAC,KAAK,IAAI,OAAO,CAAC,IAAI,MAAM,GAAG,EAAE,CAAC,CAAC;YACxF,CAAC;QACL,CAAC;QAED,SAAS,CAAC,mBAAmB,KAAK,CAAC,MAAM,6BAA6B,aAAa,CAAC,IAAI,GAAG,CAAC,CAAC;QAC7F,OAAO,KAAK,CAAC;IACjB,CAAC;IAED;;;OAGG;IACK,KAAK,CAAC,qBAAqB,CAC/B,OAAgB,EAChB,mBAAwC,EACxC,aAA+C;QAE/C,4CAA4C;QAC5C,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC;QACzD,IAAI,CAAC,WAAW,IAAI,WAAW,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAClD,OAAO,IAAI,CAAC;QAChB,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QAC/E,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,IAAI,EAAE,CAAC;QACnC,MAAM,MAAM,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;QAErC,8EAA8E;QAC9E,MAAM,QAAQ,GAAG,aAAa,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QAC3C,IAAI,QAAQ,IAAI,QAAQ,CAAC,QAAQ,KAAK,QAAQ,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,cAAc,EAAE,CAAC;YAC5E,OAAO,IAAI,CAAC,CAAC,oBAAoB;QACrC,CAAC;QAED,MAAM,EAAE,GAAG,IAAI,QAAQ,EAAE,CAAC;QAC1B,IAAI,WAAgC,CAAC;QAErC,IAAI,QAAQ,EAAE,CAAC;YACX,WAAW,GAAG,QAAQ,CAAC;QAC3B,CAAC;aAAM,CAAC;YACJ,WAAW,GAAG,MAAM,EAAE,CAAC,eAAe,CAAsB,mBAAmB,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;YACnG,WAAW,CAAC,SAAS,EAAE,CAAC;YACxB,WAAW,CAAC,eAAe,GAAG,mBAAmB,CAAC,eAAe,CAAC;YAClE,WAAW,CAAC,aAAa,GAAG,mBAAmB,CAAC,aAAa,CAAC;YAC9D,WAAW,CAAC,iBAAiB,GAAG,mBAAmB,CAAC,iBAAiB,CAAC;YACtE,WAAW,CAAC,mBAAmB,GAAG,mBAAmB,CAAC,mBAAmB,CAAC;QAC9E,CAAC;QAED,kEAAkE;QAClE,WAAW,CAAC,IAAI,GAAG,OAAO,CAAC,KAAK,IAAI,mBAAmB,CAAC,IAAI,CAAC;QAC7D,WAAW,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW,IAAI,IAAI,CAAC,MAAM,CAAC,yBAAyB,CAAC,mBAAmB,CAAC,CAAC;QAC5G,WAAW,CAAC,GAAG,GAAG,OAAO,CAAC;QAC1B,WAAW,CAAC,IAAI,GAAG,WAAW,CAAC;QAC/B,WAAW,CAAC,QAAQ,GAAG,QAAQ,CAAC;QAEhC,MAAM,KAAK,GAAG,MAAM,WAAW,CAAC,IAAI,EAAE,CAAC;QACvC,IAAI,CAAC,KAAK,EAAE,CAAC;YACT,MAAM,IAAI,KAAK,CAAC,mCAAmC,OAAO,GAAG,CAAC,CAAC;QACnE,CAAC;QAED,aAAa,CAAC,GAAG,CAAC,MAAM,EAAE,WAAW,CAAC,CAAC;QACvC,OAAO,WAAW,CAAC;IACvB,CAAC;IAED;;;;;;;OAOG;IACK,KAAK,CAAC,gBAAgB,CAAC,OAAgB;QAC3C,kFAAkF;QAClF,IAAI,OAAO,CAAC,OAAO,IAAI,OAAO,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YACzD,OAAO,OAAO,CAAC,OAAO,CAAC,CAAC,sCAAsC;QAClE,CAAC;QAED,iDAAiD;QACjD,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YACf,IAAI,CAAC;gBACD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,oBAAoB,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;gBAC/D,IAAI,QAAQ,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;oBAC3C,OAAO,QAAQ,CAAC;gBACpB,CAAC;YACL,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACT,MAAM,GAAG,GAAG,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;gBACvD,SAAS,CAAC,iDAAiD,OAAO,CAAC,IAAI,MAAM,GAAG,EAAE,CAAC,CAAC;YACxF,CAAC;QACL,CAAC;QAED,2CAA2C;QAC3C,IAAI,OAAO,CAAC,OAAO,IAAI,OAAO,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvD,OAAO,OAAO,CAAC,OAAO,CAAC;QAC3B,CAAC;QAED,2DAA2D;QAC3D,OAAO,OAAO,CAAC,WAAW,IAAI,EAAE,CAAC;IACrC,CAAC;IAED;;;OAGG;IACK,KAAK,CAAC,oBAAoB,CAAC,GAAW;QAC1C,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE;YAClC,OAAO,EAAE,IAAI;YACb,OAAO,EAAE;gBACL,YAAY,EAAE,8CAA8C;gBAC5D,QAAQ,EAAE,iCAAiC;aAC9C;SACJ,CAAC,CAAC;QAEH,IAAI,OAAO,QAAQ,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YACpC,OAAO,EAAE,CAAC;QACd,CAAC;QAED,OAAO,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;IAChD,CAAC;IAED;;;OAGG;IACI,KAAK,CAAC,YAAY,CAAC,GAAW;QACjC,IAAI,CAAC,MAAM,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YAC9B,QAAQ,CAAC,qCAAqC,GAAG,EAAE,CAAC,CAAC;YACrD,OAAO,EAAE,CAAC;QACd,CAAC;QAED,IAAI,CAAC;YACD,MAAM,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;YAC5B,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;YAExC,MAAM,KAAK,GAAc,EAAE,CAAC;YAC5B,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;gBAC5B,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;gBAC9B,OAAO,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC;gBACjC,OAAO,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;gBAC/B,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC,cAAc,IAAI,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC;gBACpE,OAAO,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,IAAI,EAAE,CAAC;gBACrC,OAAO,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;gBAC/B,OAAO,CAAC,QAAQ,GAAG,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;gBACrD,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,MAAM,IAAI,EAAE,CAAC;gBACnD,OAAO,CAAC,QAAQ,GAAI,IAAgC,CAAC,UAAU,CAAW,IAAI,EAAE,CAAC;gBACjF,OAAO,CAAC,MAAM,GAAI,IAAgC,CAAC,QAAQ,CAAW,IAAI,EAAE,CAAC;gBAE7E,oDAAoD;gBACpD,MAAM,UAAU,GAAG,IAAI,CAAC,iBAAiB,CAAC,IAAI,IAAI,CAAC,OAAO,IAAI,EAAE,CAAC;gBACjE,IAAI,UAAU,EAAE,CAAC;oBACb,OAAO,CAAC,OAAO,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;gBAC9D,CAAC;gBAED,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACxB,CAAC;YAED,OAAO,KAAK,CAAC;QACjB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACb,MAAM,GAAG,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACnE,QAAQ,CAAC,uCAAuC,GAAG,MAAM,GAAG,EAAE,CAAC,CAAC;YAChE,OAAO,EAAE,CAAC;QACd,CAAC;IACL,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,UAAU,CAAC,GAAW;QAChC,IAAI,CAAC;YACD,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;YAC3D,OAAO,QAAQ,CAAC,MAAM,IAAI,GAAG,IAAI,QAAQ,CAAC,MAAM,GAAG,GAAG,CAAC;QAC3D,CAAC;QAAC,MAAM,CAAC;YACL,OAAO,KAAK,CAAC;QACjB,CAAC;IACL,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,wBAAwB,CAAC,eAAuB;QAC1D,MAAM,EAAE,GAAG,IAAI,OAAO,EAAE,CAAC;QACzB,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,OAAO,CAAsB;YACjD,UAAU,EAAE,mBAAmB;YAC/B,WAAW,EAAE,oBAAoB,eAAe,GAAG;YACnD,UAAU,EAAE,eAAe;SAC9B,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;QAErB,MAAM,GAAG,GAAG,IAAI,GAAG,EAA+B,CAAC;QACnD,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YACjB,KAAK,MAAM,EAAE,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;gBAC9B,IAAI,EAAE,CAAC,GAAG,EAAE,CAAC;oBACT,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,WAAW,EAAE,EAAE,EAAE,CAAC,CAAC;gBACtC,CAAC;YACL,CAAC;QACL,CAAC;QACD,OAAO,GAAG,CAAC;IACf,CAAC;CACJ,CAAA;AArSY,cAAc;IAD1B,aAAa,CAAC,WAAW,EAAE,gBAAgB,CAAC;;GAChC,cAAc,CAqS1B"}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { AutotagBase } from '../../Core/index.js';
|
|
1
|
+
import { AutotagBase, AutotagProgressCallback } from '../../Core/index.js';
|
|
2
2
|
import { ContentSourceParams } from '../../Engine/index.js';
|
|
3
3
|
import { UserInfo } from '@memberjunction/core';
|
|
4
4
|
import { MJContentSourceEntity, MJContentItemEntity } from '@memberjunction/core-entities';
|
|
@@ -20,7 +20,7 @@ export declare class AutotagWebsite extends AutotagBase {
|
|
|
20
20
|
* It initializes the connection, retrieves the content sources corresponding to the content source type, sets the content items that we want to process,
|
|
21
21
|
* extracts and processes the text, and sets the results in the database.
|
|
22
22
|
*/
|
|
23
|
-
Autotag(contextUser: UserInfo): Promise<void>;
|
|
23
|
+
Autotag(contextUser: UserInfo, onProgress?: AutotagProgressCallback): Promise<void>;
|
|
24
24
|
/**
|
|
25
25
|
* Given a content source, retrieve all content items associated with the content sources.
|
|
26
26
|
* The content items are then processed to determine if they have been modified since the last time they were processed or if they are new content items.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"AutotagWebsite.d.ts","sourceRoot":"","sources":["../../../src/Websites/generic/AutotagWebsite.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"AutotagWebsite.d.ts","sourceRoot":"","sources":["../../../src/Websites/generic/AutotagWebsite.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,uBAAuB,EAAE,MAAM,YAAY,CAAC;AAClE,OAAO,EAAqB,mBAAmB,EAAE,MAAM,cAAc,CAAC;AAEtE,OAAO,EAAE,QAAQ,EAAqB,MAAM,sBAAsB,CAAC;AACnE,OAAO,EAAE,qBAAqB,EAAE,mBAAmB,EAAE,MAAM,+BAA+B,CAAC;AAC3F,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAMnC,qBACa,cAAe,SAAQ,WAAW;IAC3C,OAAO,CAAC,WAAW,CAAW;IAC9B,OAAO,CAAC,MAAM,CAAoB;IAClC,SAAS,CAAC,mBAAmB,EAAE,MAAM,CAAA;IACrC,SAAS,CAAC,+BAA+B,EAAE,OAAO,CAAC;IACnD,SAAS,CAAC,4BAA4B,EAAE,OAAO,CAAC;IAChD,SAAS,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC3B,SAAS,CAAC,OAAO,EAAE,MAAM,CAAC;IAC1B,SAAS,CAAC,UAAU,EAAE,MAAM,CAAC;IAC7B,SAAS,CAAC,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;;IAQnC,SAAS,CAAC,cAAc,IAAI,QAAQ;IAIpC;;;;OAIG;IACU,OAAO,CAAC,WAAW,EAAE,QAAQ,EAAE,UAAU,CAAC,EAAE,uBAAuB,GAAG,OAAO,CAAC,IAAI,CAAC;IAShG;;;;;OAKG;IACU,wBAAwB,CAAC,cAAc,EAAE,qBAAqB,EAAE,GAAG,OAAO,CAAC,mBAAmB,EAAE,CAAC;IAoD9G;;;;;;;;OAQG;cACa,6BAA6B,CAAC,gBAAgB,EAAE,MAAM,EAAE,EAAE,mBAAmB,EAAE,mBAAmB,EAAE,WAAW,EAAE,QAAQ,GAAG,OAAO,CAAC,mBAAmB,EAAE,CAAC;IAsE7J,gBAAgB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAKpD,qBAAqB,CAAC,OAAO,EAAE,GAAG,EAAE,CAAC,EAAE,OAAO,CAAC,UAAU,GAAG,MAAM;IAgBzE;;;;OAIG;IACU,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAavD;;;;;;OAMG;cACa,4BAA4B,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAa5G;;;;;;OAMG;cACa,gBAAgB,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAmC7E;;;;OAIG;IACH,SAAS,CAAC,eAAe,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO;IAW/C,SAAS,CAAC,WAAW,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM;IAW1C,SAAS,CAAC,WAAW,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM;cAa1B,UAAU,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAWzD;;;;;;;OAOG;cACa,kBAAkB,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;cA+CnI,KAAK,CAAC,EAAE,EAAE,MAAM;CAGnC"}
|
|
@@ -30,12 +30,12 @@ let AutotagWebsite = class AutotagWebsite extends AutotagBase {
|
|
|
30
30
|
* It initializes the connection, retrieves the content sources corresponding to the content source type, sets the content items that we want to process,
|
|
31
31
|
* extracts and processes the text, and sets the results in the database.
|
|
32
32
|
*/
|
|
33
|
-
async Autotag(contextUser) {
|
|
33
|
+
async Autotag(contextUser, onProgress) {
|
|
34
34
|
this.contextUser = contextUser;
|
|
35
35
|
this.contentSourceTypeID = this.engine.SetSubclassContentSourceType('Website');
|
|
36
36
|
const contentSources = await this.engine.getAllContentSources(this.contextUser, this.contentSourceTypeID);
|
|
37
37
|
const contentItemsToProcess = await this.SetContentItemsToProcess(contentSources);
|
|
38
|
-
await this.engine.ExtractTextAndProcessWithLLM(contentItemsToProcess, this.contextUser);
|
|
38
|
+
await this.engine.ExtractTextAndProcessWithLLM(contentItemsToProcess, this.contextUser, undefined, undefined, onProgress);
|
|
39
39
|
}
|
|
40
40
|
/**
|
|
41
41
|
* Given a content source, retrieve all content items associated with the content sources.
|