@memberjunction/content-autotagging 3.3.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/dist/CloudStorage/generic/CloudStorageBase.d.ts +28 -0
  2. package/dist/CloudStorage/generic/CloudStorageBase.d.ts.map +1 -0
  3. package/dist/CloudStorage/generic/CloudStorageBase.js +38 -0
  4. package/dist/CloudStorage/generic/CloudStorageBase.js.map +1 -0
  5. package/dist/CloudStorage/index.d.ts +3 -0
  6. package/dist/CloudStorage/index.d.ts.map +1 -0
  7. package/dist/CloudStorage/index.js +3 -0
  8. package/dist/CloudStorage/index.js.map +1 -0
  9. package/dist/CloudStorage/providers/AutotagAzureBlob.d.ts +20 -0
  10. package/dist/CloudStorage/providers/AutotagAzureBlob.d.ts.map +1 -0
  11. package/dist/CloudStorage/providers/AutotagAzureBlob.js +86 -0
  12. package/dist/CloudStorage/providers/AutotagAzureBlob.js.map +1 -0
  13. package/dist/Core/generic/AutotagBase.d.ts +7 -0
  14. package/dist/Core/generic/AutotagBase.d.ts.map +1 -0
  15. package/dist/Core/generic/AutotagBase.js +3 -0
  16. package/dist/Core/generic/AutotagBase.js.map +1 -0
  17. package/dist/Core/index.d.ts +2 -0
  18. package/dist/Core/index.d.ts.map +1 -0
  19. package/dist/Core/index.js +2 -0
  20. package/dist/Core/index.js.map +1 -0
  21. package/dist/Engine/generic/AutotagBaseEngine.d.ts +131 -0
  22. package/dist/Engine/generic/AutotagBaseEngine.d.ts.map +1 -0
  23. package/dist/Engine/generic/AutotagBaseEngine.js +620 -0
  24. package/dist/Engine/generic/AutotagBaseEngine.js.map +1 -0
  25. package/dist/Engine/generic/content.types.d.ts +32 -0
  26. package/dist/Engine/generic/content.types.d.ts.map +1 -0
  27. package/dist/Engine/generic/content.types.js +7 -0
  28. package/dist/Engine/generic/content.types.js.map +1 -0
  29. package/dist/Engine/generic/process.types.d.ts +30 -0
  30. package/dist/Engine/generic/process.types.d.ts.map +1 -0
  31. package/dist/Engine/generic/process.types.js +7 -0
  32. package/dist/Engine/generic/process.types.js.map +1 -0
  33. package/dist/Engine/index.d.ts +4 -0
  34. package/dist/Engine/index.d.ts.map +1 -0
  35. package/dist/Engine/index.js +4 -0
  36. package/dist/Engine/index.js.map +1 -0
  37. package/dist/Entity/generic/AutotagEntity.d.ts +19 -0
  38. package/dist/Entity/generic/AutotagEntity.d.ts.map +1 -0
  39. package/dist/Entity/generic/AutotagEntity.js +127 -0
  40. package/dist/Entity/generic/AutotagEntity.js.map +1 -0
  41. package/dist/Entity/index.d.ts +2 -0
  42. package/dist/Entity/index.d.ts.map +1 -0
  43. package/dist/Entity/index.js +2 -0
  44. package/dist/Entity/index.js.map +1 -0
  45. package/dist/LocalFileSystem/generic/AutotagLocalFileSystem.d.ts +39 -0
  46. package/dist/LocalFileSystem/generic/AutotagLocalFileSystem.d.ts.map +1 -0
  47. package/dist/LocalFileSystem/generic/AutotagLocalFileSystem.js +171 -0
  48. package/dist/LocalFileSystem/generic/AutotagLocalFileSystem.js.map +1 -0
  49. package/dist/LocalFileSystem/index.d.ts +2 -0
  50. package/dist/LocalFileSystem/index.d.ts.map +1 -0
  51. package/dist/LocalFileSystem/index.js +2 -0
  52. package/dist/LocalFileSystem/index.js.map +1 -0
  53. package/dist/RSSFeed/generic/AutotagRSSFeed.d.ts +30 -0
  54. package/dist/RSSFeed/generic/AutotagRSSFeed.d.ts.map +1 -0
  55. package/dist/RSSFeed/generic/AutotagRSSFeed.js +177 -0
  56. package/dist/RSSFeed/generic/AutotagRSSFeed.js.map +1 -0
  57. package/dist/RSSFeed/generic/RSS.types.d.ts +13 -0
  58. package/dist/RSSFeed/generic/RSS.types.d.ts.map +1 -0
  59. package/dist/RSSFeed/generic/RSS.types.js +3 -0
  60. package/dist/RSSFeed/generic/RSS.types.js.map +1 -0
  61. package/dist/RSSFeed/index.d.ts +3 -0
  62. package/dist/RSSFeed/index.d.ts.map +1 -0
  63. package/dist/RSSFeed/index.js +3 -0
  64. package/dist/RSSFeed/index.js.map +1 -0
  65. package/dist/Websites/generic/AutotagWebsite.d.ts +85 -0
  66. package/dist/Websites/generic/AutotagWebsite.d.ts.map +1 -0
  67. package/dist/Websites/generic/AutotagWebsite.js +355 -0
  68. package/dist/Websites/generic/AutotagWebsite.js.map +1 -0
  69. package/dist/Websites/index.d.ts +2 -0
  70. package/dist/Websites/index.d.ts.map +1 -0
  71. package/dist/Websites/index.js +2 -0
  72. package/dist/Websites/index.js.map +1 -0
  73. package/dist/index.d.ts +7 -0
  74. package/dist/index.d.ts.map +1 -0
  75. package/dist/index.js +7 -0
  76. package/dist/index.js.map +1 -0
  77. package/dist/src/CloudStorage/generic/CloudStorageBase.d.ts +3 -3
  78. package/dist/src/CloudStorage/generic/CloudStorageBase.js +2 -2
  79. package/dist/src/CloudStorage/index.d.ts +2 -2
  80. package/dist/src/CloudStorage/index.js +2 -2
  81. package/dist/src/CloudStorage/providers/AutotagAzureBlob.d.ts +2 -2
  82. package/dist/src/CloudStorage/providers/AutotagAzureBlob.js +1 -1
  83. package/dist/src/Core/index.d.ts +1 -1
  84. package/dist/src/Core/index.js +1 -1
  85. package/dist/src/Engine/generic/AutotagBaseEngine.d.ts +2 -2
  86. package/dist/src/Engine/generic/AutotagBaseEngine.js +2 -2
  87. package/dist/src/Engine/index.d.ts +3 -3
  88. package/dist/src/Engine/index.js +3 -3
  89. package/dist/src/Entity/generic/AutotagEntity.d.ts +2 -2
  90. package/dist/src/Entity/generic/AutotagEntity.js +2 -2
  91. package/dist/src/Entity/index.d.ts +1 -1
  92. package/dist/src/Entity/index.js +1 -1
  93. package/dist/src/LocalFileSystem/generic/AutotagLocalFileSystem.d.ts +2 -2
  94. package/dist/src/LocalFileSystem/generic/AutotagLocalFileSystem.js +2 -2
  95. package/dist/src/LocalFileSystem/index.d.ts +1 -1
  96. package/dist/src/LocalFileSystem/index.js +1 -1
  97. package/dist/src/RSSFeed/generic/AutotagRSSFeed.d.ts +3 -3
  98. package/dist/src/RSSFeed/generic/AutotagRSSFeed.js +3 -3
  99. package/dist/src/RSSFeed/index.d.ts +2 -2
  100. package/dist/src/RSSFeed/index.js +2 -2
  101. package/dist/src/Websites/generic/AutotagWebsite.d.ts +2 -2
  102. package/dist/src/Websites/generic/AutotagWebsite.js +2 -2
  103. package/dist/src/Websites/index.d.ts +1 -1
  104. package/dist/src/Websites/index.js +1 -1
  105. package/dist/src/index.d.ts +6 -6
  106. package/dist/src/index.js +6 -6
  107. package/dist/tsconfig.tsbuildinfo +1 -1
  108. package/package.json +21 -20
@@ -0,0 +1,177 @@
1
+ var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
2
+ var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
3
+ if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
4
+ else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
5
+ return c > 3 && r && Object.defineProperty(target, key, r), r;
6
+ };
7
+ var __metadata = (this && this.__metadata) || function (k, v) {
8
+ if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
9
+ };
10
+ import { Metadata, RunView } from '@memberjunction/core';
11
+ import { RegisterClass } from '@memberjunction/global';
12
+ import { AutotagBase } from "../../Core/index.js";
13
+ import { AutotagBaseEngine } from "../../Engine/index.js";
14
+ import { RSSItem } from './RSS.types.js';
15
+ import axios from 'axios';
16
+ import crypto from 'crypto';
17
+ import Parser from 'rss-parser';
18
+ import dotenv from 'dotenv';
19
+ dotenv.config({ quiet: true });
20
+ let AutotagRSSFeed = class AutotagRSSFeed extends AutotagBase {
21
+ constructor() {
22
+ super();
23
+ this.engine = AutotagBaseEngine.Instance;
24
+ }
25
+ getContextUser() {
26
+ return this.contextUser;
27
+ }
28
+ /**
29
+ * Implemented abstract method from the AutotagBase class. that runs the entire autotagging process. This method is the entry point for the autotagging process.
30
+ * It initializes the connection, retrieves the content sources corresponding to the content source type, sets the content items that we want to process,
31
+ * extracts and processes the text, and sets the results in the database.
32
+ */
33
+ async Autotag(contextUser) {
34
+ this.contextUser = contextUser;
35
+ this.contentSourceTypeID = await this.engine.setSubclassContentSourceType('RSS Feed', this.contextUser);
36
+ const contentSources = await this.engine.getAllContentSources(this.contextUser, this.contentSourceTypeID);
37
+ const contentItemsToProcess = await this.SetContentItemsToProcess(contentSources);
38
+ await this.engine.ExtractTextAndProcessWithLLM(contentItemsToProcess, this.contextUser);
39
+ }
40
+ /**
41
+ * Implemented abstract method from the AutotagBase class. Given a list of content sources, this method should return a list
42
+ * of content source items that have been modified or added after the most recent process run for that content source.
43
+ * @param contentSources - An array of content sources to check for modified or added content source items
44
+ * @returns - An array of content source items that have been modified or added after the most recent process run for that content source
45
+ */
46
+ async SetContentItemsToProcess(contentSources) {
47
+ const contentItemsToProcess = [];
48
+ for (const contentSource of contentSources) {
49
+ // If content source parameters were provided, set them. Otherwise, use the default values.
50
+ const contentSourceParamsMap = await this.engine.getContentSourceParams(contentSource, this.contextUser);
51
+ if (contentSourceParamsMap) {
52
+ // Override defaults with content source specific params
53
+ contentSourceParamsMap.forEach((value, key) => {
54
+ if (key in this) {
55
+ this[key] = value;
56
+ }
57
+ });
58
+ }
59
+ const contentSourceParams = {
60
+ contentSourceID: contentSource.ID,
61
+ name: contentSource.Name,
62
+ ContentTypeID: contentSource.ContentTypeID,
63
+ ContentFileTypeID: contentSource.ContentFileTypeID,
64
+ ContentSourceTypeID: contentSource.ContentSourceTypeID,
65
+ URL: contentSource.URL
66
+ };
67
+ const allRSSItems = await this.parseRSSFeed(contentSourceParams.URL);
68
+ const contentItems = await this.SetNewAndModifiedContentItems(allRSSItems, contentSourceParams);
69
+ if (contentItems && contentItems.length > 0) {
70
+ contentItemsToProcess.push(...contentItems);
71
+ }
72
+ else {
73
+ // No content items found to process
74
+ console.log(`No content items found to process for content source: ${contentSource.Get('Name')}`);
75
+ }
76
+ }
77
+ return contentItemsToProcess;
78
+ }
79
+ async SetNewAndModifiedContentItems(allRSSItems, contentSourceParams) {
80
+ const contentItemsToProcess = [];
81
+ for (const RSSContentItem of allRSSItems) {
82
+ const rv = new RunView();
83
+ const results = await rv.RunView({
84
+ EntityName: 'Content Items',
85
+ ExtraFilter: `ContentSourceID = '${contentSourceParams.contentSourceID}' AND (URL = '${RSSContentItem.link}' OR Description = '${RSSContentItem.description}')`, // According to the RSS spec, all items must contain either a title or a description.
86
+ ResultType: 'entity_object',
87
+ }, this.contextUser);
88
+ if (results.Success && results.Results.length) {
89
+ const contentItemResult = results.Results[0];
90
+ // This content item already exists, check the last hash to see if it has been modified
91
+ const lastStoredHash = contentItemResult.Checksum;
92
+ const newHash = await this.getChecksumFromRSSItem(RSSContentItem, this.contextUser);
93
+ if (lastStoredHash !== newHash) {
94
+ // This content item has been modified
95
+ const md = new Metadata();
96
+ const contentItem = await md.GetEntityObject('Content Items', this.contextUser);
97
+ contentItem.Load(contentItemResult.ID);
98
+ contentItem.Checksum = newHash;
99
+ contentItem.Text = JSON.stringify(RSSContentItem);
100
+ await contentItem.Save();
101
+ contentItemsToProcess.push(contentItem); // Content item was modified, add to list
102
+ }
103
+ }
104
+ else {
105
+ // This content item does not exist, add it
106
+ const md = new Metadata();
107
+ const contentItem = await md.GetEntityObject('Content Items', this.contextUser);
108
+ contentItem.ContentSourceID = contentSourceParams.contentSourceID;
109
+ contentItem.Name = contentSourceParams.name;
110
+ contentItem.Description = RSSContentItem.description || await this.engine.getContentItemDescription(contentSourceParams, this.contextUser);
111
+ contentItem.ContentTypeID = contentSourceParams.ContentTypeID;
112
+ contentItem.ContentFileTypeID = contentSourceParams.ContentFileTypeID;
113
+ contentItem.ContentSourceTypeID = contentSourceParams.ContentSourceTypeID;
114
+ contentItem.Checksum = await this.getChecksumFromRSSItem(RSSContentItem, this.contextUser);
115
+ contentItem.URL = RSSContentItem.link || contentSourceParams.URL;
116
+ contentItem.Text = JSON.stringify(RSSContentItem);
117
+ await contentItem.Save();
118
+ contentItemsToProcess.push(contentItem); // Content item was added, add to list
119
+ }
120
+ }
121
+ return contentItemsToProcess;
122
+ }
123
+ async parseRSSFeed(url) {
124
+ try {
125
+ if (await this.urlIsValid(url)) {
126
+ const RSSItems = [];
127
+ const parser = new Parser();
128
+ const feed = await parser.parseURL(url);
129
+ const items = feed.items;
130
+ // Map each item to an RSSItem object and add it to the RSSItems array
131
+ items.forEach(async (item) => {
132
+ const rssItem = new RSSItem();
133
+ rssItem.title = item.title ?? '';
134
+ rssItem.link = item.link ?? '';
135
+ rssItem.description = item.description ?? '';
136
+ rssItem.pubDate = item.pubDate ?? '';
137
+ rssItem.guid = item.guid ?? '';
138
+ rssItem.category = item.category ?? '';
139
+ const content = item['content:encoded'] ?? item['content'] ?? '';
140
+ rssItem.content = await this.engine.parseHTML(content);
141
+ rssItem.author = item.author ?? '';
142
+ rssItem.comments = item.comments ?? '';
143
+ rssItem.source = item.source ?? '';
144
+ RSSItems.push(rssItem);
145
+ });
146
+ return RSSItems;
147
+ }
148
+ else {
149
+ throw new Error(`Invalid URL: ${url}`);
150
+ }
151
+ }
152
+ catch (error) {
153
+ console.error('Error fetching RSS feed:', error);
154
+ return [];
155
+ }
156
+ }
157
+ async urlIsValid(url) {
158
+ try {
159
+ const response = await axios.head(url);
160
+ return response.status === 200;
161
+ }
162
+ catch (e) {
163
+ console.error(`Invalid URL: ${url}`);
164
+ return false;
165
+ }
166
+ }
167
+ async getChecksumFromRSSItem(RSSContentItem, contextUser) {
168
+ const hash = crypto.createHash('sha256').update(JSON.stringify(RSSContentItem)).digest('hex');
169
+ return hash;
170
+ }
171
+ };
172
+ AutotagRSSFeed = __decorate([
173
+ RegisterClass(AutotagBase, 'AutotagRSSFeed'),
174
+ __metadata("design:paramtypes", [])
175
+ ], AutotagRSSFeed);
176
+ export { AutotagRSSFeed };
177
+ //# sourceMappingURL=AutotagRSSFeed.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"AutotagRSSFeed.js","sourceRoot":"","sources":["../../../src/RSSFeed/generic/AutotagRSSFeed.ts"],"names":[],"mappings":";;;;;;;;;AAAA,OAAO,EAAY,QAAQ,EAAE,OAAO,EAAE,MAAM,sBAAsB,CAAC;AACnE,OAAO,EAAE,aAAa,EAAE,MAAM,wBAAwB,CAAC;AACvD,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AACzC,OAAO,EAAE,iBAAiB,EAAuB,MAAM,cAAc,CAAC;AAEtE,OAAO,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AACtC,OAAO,KAAK,MAAM,OAAO,CAAA;AACzB,OAAO,MAAM,MAAM,QAAQ,CAAA;AAC3B,OAAO,MAAM,MAAM,YAAY,CAAA;AAC/B,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,MAAM,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAA;AAGvB,IAAM,cAAc,GAApB,MAAM,cAAe,SAAQ,WAAW;IAK3C;QACI,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,MAAM,GAAG,iBAAiB,CAAC,QAAQ,CAAC;IAC7C,CAAC;IAES,cAAc;QACpB,OAAO,IAAI,CAAC,WAAW,CAAC;IAC5B,CAAC;IAED;;;;OAIG;IACI,KAAK,CAAC,OAAO,CAAC,WAAqB;QACtC,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;QAC/B,IAAI,CAAC,mBAAmB,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,4BAA4B,CAAC,UAAU,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;QACxG,MAAM,cAAc,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,oBAAoB,CAAC,IAAI,CAAC,WAAW,EAAE,IAAI,CAAC,mBAAmB,CAAC,CAAC;QAC1G,MAAM,qBAAqB,GAAG,MAAM,IAAI,CAAC,wBAAwB,CAAC,cAAc,CAAC,CAAC;QAClF,MAAM,IAAI,CAAC,MAAM,CAAC,4BAA4B,CAAC,qBAAqB,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;IAC5F,CAAC;IAED;;;;;OAKG;IACI,KAAK,CAAC,wBAAwB,CAAC,cAAqC;QACvE,MAAM,qBAAqB,GAAwB,EAAE,CAAA;QACrD,KAAK,MAAM,aAAa,IAAI,cAAc,EAAE,CAAC;YAEzC,2FAA2F;YAC3F,MAAM,sBAAsB,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,sBAAsB,CAAC,aAAa,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;YACzG,IAAI,sBAAsB,EAAE,CAAC;gBACzB,wDAAwD;gBACxD,sBAAsB,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE;oBAC1C,IAAI,GAAG,IAAI,IAAI,EAAE,CAAC;wBACb,IAAY,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;oBAC/B,CAAC;gBACL,CAAC,CAAC,CAAA;YACN,CAAC;YAED,MAAM,mBAAmB,GAAwB;gBAC7C,eAAe,EAAE,aAAa,CAAC,EAAE;gBACjC,IAAI,EAAE,aAAa,CAAC,IAAI;gBACxB,aAAa,EAAE,aAAa,CAAC,aAAa;gBAC1C,iBAAiB,EAAE,aAAa,CAAC,iBAAiB;gBAClD,mBAAmB,EAAE,aAAa,CAAC,mBAAmB;gBACtD,GAAG,EAAE,aAAa,CAAC,GAAG;aACzB,CAAA;YAED,MAAM,WAAW,GAAc,MAAM,IAAI,CAAC,YAAY,CAAC,mBAAmB,CAAC,GAAG,CAAC,CAAC;YAEhF,MAAM,YAAY,GAAwB,MAAM,IAAI,CAAC,6BAA6B,CAAC,WAAW,EAAE,mBAAmB,CAAC,CAAA;YAEpH,IAAI,YAAY,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC1C,qBAAqB,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;YAChD,CAAC;iBACI,CAAC;gBACF,oCAAoC;gBACpC,OAAO,CAAC,GAAG,CAAC,yDAAyD,aAAa,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;YACtG,CAAC;QACL,CAAC;QACD,OAAO,qBAAqB,CAAA;IAChC,CAAC;IAEM,KAAK,CAAC,6BAA6B,CAAC,WAAsB,EAAE,mBAAwC;QACvG,MAAM,qBAAqB,GAAwB,EAAE,CAAC;QACtD,KAAK,MAAM,cAAc,IAAI,WAAW,EAAE,CAAC;YACvC,MAAM,EAAE,GAAG,IAAI,OAAO,EAAE,CAAC;YACzB,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,OAAO,CAAC;gBAC7B,UAAU,EAAE,eAAe;gBAC3B,WAAW,EAAE,sBAAsB,mBAAmB,CAAC,eAAe,iBAAiB,cAAc,CAAC,IAAI,uBAAuB,cAAc,CAAC,WAAW,IAAI,EAAE,qFAAqF;gBACtP,UAAU,EAAE,eAAe;aAC9B,EAAE,IAAI,CAAC,WAAW,CAAC,CAAA;YAEpB,IAAI,OAAO,CAAC,OAAO,IAAI,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;gBAC5C,MAAM,iBAAiB,GAAuB,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;gBACjE,uFAAuF;gBACvF,MAAM,cAAc,GAAW,iBAAiB,CAAC,QAAQ,CAAA;gBACzD,MAAM,OAAO,GAAW,MAAM,IAAI,CAAC,sBAAsB,CAAC,cAAc,EAAE,IAAI,CAAC,WAAW,CAAC,CAAA;gBAE3F,IAAI,cAAc,KAAK,OAAO,EAAE,CAAC;oBAC7B,sCAAsC;oBACtC,MAAM,EAAE,GAAG,IAAI,QAAQ,EAAE,CAAC;oBAC1B,MAAM,WAAW,GAAG,MAAM,EAAE,CAAC,eAAe,CAAoB,eAAe,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;oBACnG,WAAW,CAAC,IAAI,CAAC,iBAAiB,CAAC,EAAE,CAAC,CAAC;oBACvC,WAAW,CAAC,QAAQ,GAAG,OAAO,CAAA;oBAC9B,WAAW,CAAC,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,cAAc,CAAC,CAAA;oBAEjD,MAAM,WAAW,CAAC,IAAI,EAAE,CAAC;oBACzB,qBAAqB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,yCAAyC;gBACtF,CAAC;YACL,CAAC;iBACI,CAAC;gBACF,2CAA2C;gBAC3C,MAAM,EAAE,GAAG,IAAI,QAAQ,EAAE,CAAC;gBAC1B,MAAM,WAAW,GAAG,MAAM,EAAE,CAAC,eAAe,CAAoB,eAAe,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;gBACnG,WAAW,CAAC,eAAe,GAAG,mBAAmB,CAAC,eAAe,CAAA;gBACjE,WAAW,CAAC,IAAI,GAAG,mBAAmB,CAAC,IAAI,CAAA;gBAC3C,WAAW,CAAC,WAAW,GAAG,cAAc,CAAC,WAAW,IAAI,MAAM,IAAI,CAAC,MAAM,CAAC,yBAAyB,CAAC,mBAAmB,EAAE,IAAI,CAAC,WAAW,CAAC,CAAA;gBAC1I,WAAW,CAAC,aAAa,GAAG,mBAAmB,CAAC,aAAa,CAAA;gBAC7D,WAAW,CAAC,iBAAiB,GAAG,mBAAmB,CAAC,iBAAiB,CAAA;gBACrE,WAAW,CAAC,mBAAmB,GAAG,mBAAmB,CAAC,mBAAmB,CAAA;gBACzE,WAAW,CAAC,QAAQ,GAAG,MAAM,IAAI,CAAC,sBAAsB,CAAC,cAAc,EAAE,IAAI,CAAC,WAAW,CAAC,CAAA;gBAC1F,WAAW,CAAC,GAAG,GAAG,cAAc,CAAC,IAAI,IAAI,mBAAmB,CAAC,GAAG,CAAA;gBAChE,WAAW,CAAC,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,cAAc,CAAC,CAAA;gBAEjD,MAAM,WAAW,CAAC,IAAI,EAAE,CAAC;gBACzB,qBAAqB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,sCAAsC;YAEnF,CAAC;QACL,CAAC;QACD,OAAO,qBAAqB,CAAA;IAChC,CAAC;IAEM,KAAK,CAAC,YAAY,CAAC,GAAW;QACjC,IAAI,CAAC;YACD,IAAG,MAAM,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC5B,MAAM,QAAQ,GAAc,EAAE,CAAA;gBAC9B,MAAM,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;gBAC5B,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;gBACxC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC;gBAEzB,sEAAsE;gBACtE,KAAK,CAAC,OAAO,CAAC,KAAK,EAAE,IAAS,EAAE,EAAE;oBAC9B,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;oBAC9B,OAAO,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC;oBACjC,OAAO,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;oBAC/B,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC;oBAC7C,OAAO,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,IAAI,EAAE,CAAC;oBACrC,OAAO,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;oBAC/B,OAAO,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,IAAI,EAAE,CAAC;oBACvC,MAAM,OAAO,GAAG,IAAI,CAAC,iBAAiB,CAAC,IAAI,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;oBACjE,OAAO,CAAC,OAAO,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;oBACvD,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,EAAE,CAAC;oBACnC,OAAO,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,IAAI,EAAE,CAAC;oBACvC,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,EAAE,CAAC;oBACnC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBAC3B,CAAC,CAAC,CAAC;gBAEH,OAAO,QAAQ,CAAA;YACnB,CAAC;iBACI,CAAC;gBACF,MAAM,IAAI,KAAK,CAAC,gBAAgB,GAAG,EAAE,CAAC,CAAC;YAC3C,CAAC;QACL,CAAC;QACD,OAAO,KAAK,EAAE,CAAC;YACX,OAAO,CAAC,KAAK,CAAC,0BAA0B,EAAE,KAAK,CAAC,CAAC;YACjD,OAAO,EAAE,CAAC;QACZ,CAAC;IACP,CAAC;IAES,KAAK,CAAC,UAAU,CAAC,GAAW;QAClC,IAAI,CAAC;YACD,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACvC,OAAO,QAAQ,CAAC,MAAM,KAAK,GAAG,CAAC;QACnC,CAAC;QACD,OAAO,CAAC,EAAE,CAAC;YACP,OAAO,CAAC,KAAK,CAAC,gBAAgB,GAAG,EAAE,CAAC,CAAC;YACrC,OAAO,KAAK,CAAC;QACjB,CAAC;IACL,CAAC;IAEM,KAAK,CAAC,sBAAsB,CAAC,cAAuB,EAAE,WAAqB;QAC9E,MAAM,IAAI,GAAG,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;QAC7F,OAAO,IAAI,CAAA;IACf,CAAC;CACJ,CAAA;AA9KY,cAAc;IAD1B,aAAa,CAAC,WAAW,EAAE,gBAAgB,CAAC;;GAChC,cAAc,CA8K1B"}
@@ -0,0 +1,13 @@
1
+ export declare class RSSItem {
2
+ title?: string;
3
+ link?: string;
4
+ description?: string;
5
+ pubDate?: string;
6
+ guid?: string;
7
+ category?: string;
8
+ content?: string;
9
+ author?: string;
10
+ comments?: string;
11
+ source?: string;
12
+ }
13
+ //# sourceMappingURL=RSS.types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"RSS.types.d.ts","sourceRoot":"","sources":["../../../src/RSSFeed/generic/RSS.types.ts"],"names":[],"mappings":"AAAA,qBAAa,OAAO;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;CACnB"}
@@ -0,0 +1,3 @@
1
+ export class RSSItem {
2
+ }
3
+ //# sourceMappingURL=RSS.types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"RSS.types.js","sourceRoot":"","sources":["../../../src/RSSFeed/generic/RSS.types.ts"],"names":[],"mappings":"AAAA,MAAM,OAAO,OAAO;CAWnB"}
@@ -0,0 +1,3 @@
1
+ export * from './generic/RSS.types.js';
2
+ export * from './generic/AutotagRSSFeed.js';
3
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/RSSFeed/index.ts"],"names":[],"mappings":"AAAA,cAAc,qBAAqB,CAAA;AACnC,cAAc,0BAA0B,CAAA"}
@@ -0,0 +1,3 @@
1
+ export * from './generic/RSS.types.js';
2
+ export * from './generic/AutotagRSSFeed.js';
3
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/RSSFeed/index.ts"],"names":[],"mappings":"AAAA,cAAc,qBAAqB,CAAA;AACnC,cAAc,0BAA0B,CAAA"}
@@ -0,0 +1,85 @@
1
+ import { AutotagBase } from '../../Core/index.js';
2
+ import { ContentSourceParams } from '../../Engine/index.js';
3
+ import { UserInfo } from '@memberjunction/core';
4
+ import { ContentSourceEntity, ContentItemEntity } from '@memberjunction/core-entities';
5
+ import * as cheerio from 'cheerio';
6
+ export declare class AutotagWebsite extends AutotagBase {
7
+ private contextUser;
8
+ private engine;
9
+ protected contentSourceTypeID: string;
10
+ protected CrawlOtherSitesInTopLevelDomain: boolean;
11
+ protected CrawlSitesInLowerLevelDomain: boolean;
12
+ protected MaxDepth: number;
13
+ protected RootURL: string;
14
+ protected URLPattern: string;
15
+ protected visitedURLs: Set<string>;
16
+ constructor();
17
+ protected getContextUser(): UserInfo;
18
+ /**
19
+ * Implemented abstract method from the AutotagBase class. that runs the entire autotagging process. This method is the entry point for the autotagging process.
20
+ * It initializes the connection, retrieves the content sources corresponding to the content source type, sets the content items that we want to process,
21
+ * extracts and processes the text, and sets the results in the database.
22
+ */
23
+ Autotag(contextUser: UserInfo): Promise<void>;
24
+ /**
25
+ * Given a content source, retrieve all content items associated with the content sources.
26
+ * The content items are then processed to determine if they have been modified since the last time they were processed or if they are new content items.
27
+ * @param contentSource
28
+ * @returns
29
+ */
30
+ SetContentItemsToProcess(contentSources: ContentSourceEntity[]): Promise<ContentItemEntity[]>;
31
+ /**
32
+ * Given a list of content item links, check if the content item already exists in the database.
33
+ * If the content item exists, check if the content item has been modified since the last time it was processed.
34
+ * If the content item does not exist, create a new content item and add it to the list of content items to process.
35
+ * @param contentItemLinks
36
+ * @param contentSourceParams
37
+ * @param contextUser
38
+ * @returns
39
+ */
40
+ protected SetNewAndModifiedContentItems(contentItemLinks: string[], contentSourceParams: ContentSourceParams, contextUser: UserInfo): Promise<ContentItemEntity[]>;
41
+ fetchPageContent(url: string): Promise<string>;
42
+ getTextWithLineBreaks(element: any, $: cheerio.CheerioAPI): string;
43
+ /**
44
+ * Given a URL, this function extracts text from a webpage.
45
+ * @param url
46
+ * @returns The text extracted from the webpage
47
+ */
48
+ parseWebPage(url: string): Promise<string>;
49
+ /**
50
+ * Given a root URL that corresponds to a content source, retrieve all the links in accordance to the crawl settings.
51
+ * If the crawl settings are set to crawl other sites in the top level domain, then all links in the top level domain will be retrieved.
52
+ * If the crawl settings are set to crawl sites in lower level domains, then function is recursively called to retrieve all links in the lower level domains.
53
+ * @param url
54
+ * @returns
55
+ */
56
+ protected getAllLinksFromContentSource(url: string, rootURL: string, regex: RegExp): Promise<string[]>;
57
+ /**
58
+ * For a given URL, retrieves all other links at that top level domain.
59
+ * @param url
60
+ * @param rootURL
61
+ * @param visitedURLs
62
+ * @returns
63
+ */
64
+ protected getTopLevelLinks(url: string, rootURL: string): Promise<void>;
65
+ /**
66
+ * Simple check to see if the URL is at the highest level domain.
67
+ * @param url
68
+ * @returns
69
+ */
70
+ protected isHighestDomain(url: string): boolean;
71
+ protected getBasePath(url: string): string;
72
+ protected getPathName(url: string): string;
73
+ protected urlIsValid(url: string): Promise<boolean>;
74
+ /**
75
+ * For a given URL, retrieves all links at lower level domains up to the specified crawl depth.
76
+ * @param url
77
+ * @param rootURL
78
+ * @param crawlDepth
79
+ * @param visitedURLs
80
+ * @returns
81
+ */
82
+ protected getLowerLevelLinks(url: string, rootURL: string, crawlDepth: number, scrapedURLs: Set<string>, regex: RegExp): Promise<Set<string>>;
83
+ protected delay(ms: number): Promise<unknown>;
84
+ }
85
+ //# sourceMappingURL=AutotagWebsite.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"AutotagWebsite.d.ts","sourceRoot":"","sources":["../../../src/Websites/generic/AutotagWebsite.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AACzC,OAAO,EAAqB,mBAAmB,EAAE,MAAM,cAAc,CAAC;AAEtE,OAAO,EAAE,QAAQ,EAAqB,MAAM,sBAAsB,CAAC;AACnE,OAAO,EAAE,mBAAmB,EAAE,iBAAiB,EAAE,MAAM,+BAA+B,CAAC;AACvF,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAMnC,qBACa,cAAe,SAAQ,WAAW;IAC3C,OAAO,CAAC,WAAW,CAAW;IAC9B,OAAO,CAAC,MAAM,CAAoB;IAClC,SAAS,CAAC,mBAAmB,EAAE,MAAM,CAAA;IACrC,SAAS,CAAC,+BAA+B,EAAE,OAAO,CAAC;IACnD,SAAS,CAAC,4BAA4B,EAAE,OAAO,CAAC;IAChD,SAAS,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC3B,SAAS,CAAC,OAAO,EAAE,MAAM,CAAC;IAC1B,SAAS,CAAC,UAAU,EAAE,MAAM,CAAC;IAC7B,SAAS,CAAC,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;;IAQnC,SAAS,CAAC,cAAc,IAAI,QAAQ;IAIpC;;;;OAIG;IACU,OAAO,CAAC,WAAW,EAAE,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC;IAS1D;;;;;OAKG;IACU,wBAAwB,CAAC,cAAc,EAAE,mBAAmB,EAAE,GAAG,OAAO,CAAC,iBAAiB,EAAE,CAAC;IAoD1G;;;;;;;;OAQG;cACa,6BAA6B,CAAC,gBAAgB,EAAE,MAAM,EAAE,EAAE,mBAAmB,EAAE,mBAAmB,EAAE,WAAW,EAAE,QAAQ,GAAG,OAAO,CAAC,iBAAiB,EAAE,CAAC;IAsE3J,gBAAgB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAKpD,qBAAqB,CAAC,OAAO,EAAE,GAAG,EAAE,CAAC,EAAE,OAAO,CAAC,UAAU,GAAG,MAAM;IAgBzE;;;;OAIG;IACU,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAavD;;;;;;OAMG;cACa,4BAA4B,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAa5G;;;;;;OAMG;cACa,gBAAgB,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAmC7E;;;;OAIG;IACH,SAAS,CAAC,eAAe,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO;IAW/C,SAAS,CAAC,WAAW,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM;IAW1C,SAAS,CAAC,WAAW,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM;cAa1B,UAAU,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAWzD;;;;;;;OAOG;cACa,kBAAkB,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;cA+CnI,KAAK,CAAC,EAAE,EAAE,MAAM;CAGnC"}