@sockethub/platform-feeds 3.0.0-alpha.3 → 4.0.0-alpha.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts ADDED
@@ -0,0 +1,281 @@
1
+ /**
2
+ * This is a platform for Sockethub implementing Atom/RSS fetching functionality.
3
+ *
4
+ * Developed by Nick Jennings (https://github.com/silverbucket)
5
+ *
6
+ * Sockethub is licensed under the LGPLv3.
7
+ * See the LICENSE file for details.
8
+ *
9
+ * The latest version of this module can be found here:
10
+ * git://github.com/sockethub/sockethub.git
11
+ *
12
+ * For more information about Sockethub visit http://sockethub.org/.
13
+ *
14
+ * This program is distributed in the hope that it will be useful,
15
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
17
+ */
18
+
19
+ import htmlTags from "html-tags";
20
+ import getPodcastFromFeed, { type Episode, type Meta } from "podparse";
21
+
22
+ import type {
23
+ ActivityStream,
24
+ Logger,
25
+ PlatformCallback,
26
+ PlatformConfig,
27
+ PlatformInterface,
28
+ PlatformSchemaStruct,
29
+ PlatformSession,
30
+ } from "@sockethub/schemas";
31
+
32
+ import PlatformSchema from "./schema.js";
33
+ import {
34
+ ASFeedType,
35
+ ASObjectType,
36
+ type PlatformFeedsActivityActor,
37
+ type PlatformFeedsActivityObject,
38
+ type PlatformFeedsActivityStream,
39
+ } from "./types.js";
40
+
41
+ const MAX_NOTE_LENGTH = 256;
42
+
43
+ const basic = /\s?<!doctype html>|(<html\b[^>]*>|<body\b[^>]*>|<x-[^>]+>)+/i;
44
+ const full = new RegExp(
45
+ htmlTags.map((tag) => `<${tag}\\b[^>]*>`).join("|"),
46
+ "i",
47
+ );
48
+
49
+ function isHtml(s: string): boolean {
50
+ // limit it to a reasonable length to improve performance.
51
+ const limitedString = s.trim().slice(0, 1000);
52
+ return basic.test(limitedString) || full.test(s);
53
+ }
54
+
55
+ /**
56
+ * Class: Feeds
57
+ *
58
+ * Handles all actions related to fetching feeds.
59
+ *
60
+ * Current supported feed types:
61
+ *
62
+ * - RSS (1 & 2)
63
+ *
64
+ * - Atom
65
+ *
66
+ * Uses the `podparse` module as a base tool fetching feeds.
67
+ *
68
+ * https://github.com/Tombarr/podcast-feed-parser
69
+ *
70
+ */
71
+ export default class Feeds implements PlatformInterface {
72
+ id: string;
73
+ debug: Logger;
74
+ config: PlatformConfig = {
75
+ persist: false,
76
+ connectTimeoutMs: 5000,
77
+ };
78
+
79
+ /**
80
+ * @constructor
81
+ * @param session - a unique session object for this platform instance
82
+ */
83
+ constructor(session: PlatformSession) {
84
+ this.debug = session.debug;
85
+ }
86
+
87
+ get schema(): PlatformSchemaStruct {
88
+ return PlatformSchema;
89
+ }
90
+
91
+ /**
92
+ * Fetch feeds from specified source. Upon completion, it will send back a
93
+ * response to the original request with a complete ActivityStreams Collection
94
+ * containing all feed items and metadata.
95
+ *
96
+ * @param job - Activity streams object containing job data with actor.id as feed URL
97
+ * @param done - Callback function that receives (error, ASCollection)
98
+ *
99
+ * @example
100
+ * Request:
101
+ * {
102
+ * context: "feeds",
103
+ * type: "fetch",
104
+ * actor: {
105
+ * id: 'http://blog.example.com/rss',
106
+ * type: "feed"
107
+ * }
108
+ * }
109
+ *
110
+ *
111
+ * // Without any parameters specified, the platform will return most
112
+ * // recent 10 articles fetched from the feed.
113
+ *
114
+ * // Example of the resulting JSON AS Object:
115
+ *
116
+ * {
117
+ * context: 'feeds',
118
+ * type: 'collection',
119
+ * summary: 'Best Feed Inc.'
120
+ * totalItems: 10,
121
+ * items: [
122
+ * {
123
+ * context: 'feeds',
124
+ * type: 'post',
125
+ * actor: {
126
+ * type: 'feed',
127
+ * name: 'Best Feed Inc.',
128
+ * id: 'http://blog.example.com/rss',
129
+ * description: 'Where the best feed comes to be the best',
130
+ * image: {
131
+ * width: '144',
132
+ * height: '144',
133
+ * url: 'http://blog.example.com/images/bestfeed.jpg',
134
+ * }
135
+ * favicon: 'http://blog.example.com/favicon.ico',
136
+ * link: 'http://blog.example.com',
137
+ * categories: ['best', 'feed', 'aminals'],
138
+ * language: 'en',
139
+ * author: 'John Doe'
140
+ * },
141
+ * object: {
142
+ * id: "http://blog.example.com/articles/about-stuff"
143
+ * type: 'article',
144
+ * title: 'About stuff...',
145
+ * url: "http://blog.example.com/articles/about-stuff"
146
+ * date: "2013-05-28T12:00:00.000Z",
147
+ * datenum: 1369742400000,
148
+ * brief: "Brief synopsis of stuff...",
149
+ * content: "Once upon a time...",
150
+ * contentType: "text",
151
+ * media: [
152
+ * {
153
+ * length: '13908973',
154
+ * type: 'audio/mpeg',
155
+ * url: 'http://blog.example.com/media/thing.mpg'
156
+ * }
157
+ * ]
158
+ * tags: ['foo', 'bar']
159
+ * }
160
+ * },
161
+ * ...
162
+ * ]
163
+ * }
164
+ *
165
+ */
166
+ fetch(job: ActivityStream, done: PlatformCallback) {
167
+ // ready to execute job
168
+ this.fetchFeed(job.actor.id, job.id)
169
+ .then((results) => {
170
+ return done(null, {
171
+ id: job.id || null,
172
+ context: "feeds",
173
+ type: "collection",
174
+ summary:
175
+ results.length > 0 && results[0]?.actor?.name
176
+ ? results[0].actor.name
177
+ : "Unknown Feed",
178
+ totalItems: results.length,
179
+ items: results,
180
+ });
181
+ })
182
+ .catch(done);
183
+ }
184
+
185
+ /**
186
+ * Cleanup method called when platform instance is being shut down.
187
+ * Currently, no cleanup required for feeds platform.
188
+ *
189
+ * @param done - Callback function to signal completion
190
+ */
191
+ cleanup(done: PlatformCallback) {
192
+ done();
193
+ }
194
+
195
+ private async makeRequest(url: string): Promise<string> {
196
+ const opts = {
197
+ signal: undefined,
198
+ };
199
+ if (this.config.connectTimeoutMs) {
200
+ opts.signal = AbortSignal.timeout(this.config.connectTimeoutMs);
201
+ }
202
+ const res = await fetch(url, opts);
203
+ return await res.text();
204
+ }
205
+
206
+ // fetches the articles from a feed, adding them to an array
207
+ // for processing
208
+ private async fetchFeed(
209
+ url: string,
210
+ id: string,
211
+ ): Promise<Array<PlatformFeedsActivityStream>> {
212
+ this.debug(`fetching ${url}`);
213
+ const res = await this.makeRequest(url);
214
+ const feed = getPodcastFromFeed(res);
215
+ const actor = buildFeedChannel(url, feed.meta);
216
+ const articles = [];
217
+
218
+ for (const item of feed.episodes) {
219
+ const article = buildFeedStruct(actor);
220
+ article.id = id;
221
+ article.object = buildFeedItem(item as FeedItem);
222
+ articles.push(article);
223
+ }
224
+ this.debug(`fetched ${articles.length} articles`);
225
+ return articles;
226
+ }
227
+ }
228
+
229
+ interface FeedItem extends Episode {
230
+ meta: Meta;
231
+ date: string;
232
+ categories: Array<string>;
233
+ media: Array<unknown>;
234
+ source: string;
235
+ }
236
+
237
+ function buildFeedItem(item: FeedItem): PlatformFeedsActivityObject {
238
+ const dateNum = Date.parse(item.pubDate.toString()) || 0;
239
+ return {
240
+ type:
241
+ item.description.length > MAX_NOTE_LENGTH
242
+ ? ASObjectType.ARTICLE
243
+ : ASObjectType.NOTE,
244
+ title: item.title,
245
+ id: item.link || `${item.meta.link}#${dateNum}`,
246
+ brief: item.description === item.summary ? undefined : item.summary,
247
+ content: item.description,
248
+ contentType: isHtml(item.description || "") ? "html" : "text",
249
+ url: item.link || item.meta.link,
250
+ published: item.pubDate,
251
+ updated: item.pubDate === item.date ? undefined : item.date,
252
+ datenum: dateNum,
253
+ tags: item.categories,
254
+ media: item.media,
255
+ source: item.source,
256
+ };
257
+ }
258
+
259
+ function buildFeedStruct(
260
+ actor: PlatformFeedsActivityActor,
261
+ ): PlatformFeedsActivityStream {
262
+ return {
263
+ context: ASFeedType.FEEDS,
264
+ actor: actor,
265
+ type: "post",
266
+ };
267
+ }
268
+
269
+ function buildFeedChannel(url: string, meta: Meta): PlatformFeedsActivityActor {
270
+ return {
271
+ id: url,
272
+ type: ASFeedType.FEED_CHANNEL,
273
+ name: meta.title ? meta.title : meta.link ? meta.link : url,
274
+ link: meta.link || url,
275
+ description: meta.description ? meta.description : undefined,
276
+ image: meta.image ? meta.image : undefined,
277
+ categories: meta.category ? meta.category : [],
278
+ language: meta.language ? meta.language : undefined,
279
+ author: meta.author ? meta.author : undefined,
280
+ };
281
+ }
package/src/schema.ts ADDED
@@ -0,0 +1,22 @@
1
+ import packageJSON from "../package.json" with { type: "json" };
2
+
3
+ export default {
4
+ name: "feeds",
5
+ version: packageJSON.version,
6
+ messages: {
7
+ required: ["type"],
8
+ properties: {
9
+ type: {
10
+ type: "string",
11
+ enum: ["fetch"],
12
+ },
13
+ object: {
14
+ type: "object",
15
+ oneOf: [
16
+ { $ref: "#/definitions/objectTypes/feed-parameters-date" },
17
+ { $ref: "#/definitions/objectTypes/feed-parameters-url" },
18
+ ],
19
+ },
20
+ },
21
+ },
22
+ };
package/src/types.ts ADDED
@@ -0,0 +1,53 @@
1
+ import type { Author } from "podparse";
2
+
3
+ import type {
4
+ ActivityActor,
5
+ ActivityObject,
6
+ ActivityStream,
7
+ } from "@sockethub/schemas";
8
+
9
+ export enum ASFeedType {
10
+ FEED_CHANNEL = "feed",
11
+ FEEDS = "feeds",
12
+ }
13
+
14
+ export enum ASObjectType {
15
+ ARTICLE = "article",
16
+ NOTE = "note",
17
+ }
18
+
19
+ export interface PlatformFeedsActivityActor extends ActivityActor {
20
+ type: ASFeedType.FEED_CHANNEL;
21
+ name: string;
22
+ id: string;
23
+ link: string;
24
+ description: string;
25
+ image: unknown;
26
+ categories: Array<string>;
27
+ language: string;
28
+ author: Author;
29
+ }
30
+
31
+ export interface PlatformFeedsActivityStream extends ActivityStream {
32
+ id?: string;
33
+ context: ASFeedType.FEEDS;
34
+ actor: PlatformFeedsActivityActor;
35
+ type: string;
36
+ object?: PlatformFeedsActivityObject;
37
+ }
38
+
39
+ export interface PlatformFeedsActivityObject extends ActivityObject {
40
+ type: ASObjectType;
41
+ title: string;
42
+ id: string;
43
+ brief: string;
44
+ content: string;
45
+ contentType: string;
46
+ url: string;
47
+ published: string;
48
+ updated: string;
49
+ datenum: number;
50
+ tags: Array<string>;
51
+ media: Array<unknown>;
52
+ source: string;
53
+ }
package/API.md DELETED
@@ -1,197 +0,0 @@
1
- # Classes
2
-
3
- <dl>
4
- <dt><a href="#Feeds">Feeds</a></dt>
5
- <dd><p>Class: Feeds</p>
6
- <p>Handles all actions related to fetching feeds.</p>
7
- <p>Current supported feed types:</p>
8
- <ul>
9
- <li><p>RSS (1 &amp; 2)</p>
10
- </li>
11
- <li><p>Atom</p>
12
- </li>
13
- </ul>
14
- <p>Uses the <code>node-feedparser</code> module as a base tool fetching feeds.</p>
15
- <p><a href="https://github.com/danmactough/node-feedparser">https://github.com/danmactough/node-feedparser</a></p>
16
- </dd>
17
- </dl>
18
-
19
- # Constants
20
-
21
- <dl>
22
- <dt><a href="#FeedParser">FeedParser</a></dt>
23
- <dd><p>This is a platform for Sockethub implementing Atom/RSS fetching functionality.</p>
24
- <p>Developed by Nick Jennings (<a href="https://github.com/silverbucket">https://github.com/silverbucket</a>)</p>
25
- <p>Sockethub is licensed under the LGPLv3.
26
- See the LICENSE file for details.</p>
27
- <p>The latest version of this module can be found here:
28
- git://github.com/sockethub/sockethub.git</p>
29
- <p>For more information about Sockethub visit <a href="http://sockethub.org/">http://sockethub.org/</a>.</p>
30
- <p>This program is distributed in the hope that it will be useful,
31
- but WITHOUT ANY WARRANTY; without even the implied warranty of
32
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.</p>
33
- </dd>
34
- </dl>
35
-
36
- <a name="Feeds"></a>
37
-
38
- # Feeds
39
- Class: Feeds
40
-
41
- Handles all actions related to fetching feeds.
42
-
43
- Current supported feed types:
44
-
45
- - RSS (1 & 2)
46
-
47
- - Atom
48
-
49
- Uses the `node-feedparser` module as a base tool fetching feeds.
50
-
51
- https://github.com/danmactough/node-feedparser
52
-
53
- **Kind**: global class
54
-
55
- * [Feeds](#Feeds)
56
- * [new Feeds(cfg)](#new_Feeds_new)
57
- * [.fetch(job, cb)](#Feeds+fetch)
58
-
59
- <a name="new_Feeds_new"></a>
60
-
61
- ## new Feeds(cfg)
62
- <table>
63
- <thead>
64
- <tr>
65
- <th>Param</th><th>Type</th><th>Description</th>
66
- </tr>
67
- </thead>
68
- <tbody>
69
- <tr>
70
- <td>cfg</td><td><code>object</code></td><td><p>a unique config object for this instance</p>
71
- </td>
72
- </tr> </tbody>
73
- </table>
74
-
75
- <a name="Feeds+fetch"></a>
76
-
77
- ## feeds.fetch(job, cb)
78
- Function: fetch
79
-
80
- Fetches feeds from specified source. Upon completion it will send back a
81
- response to the original request with a complete list of URLs in the feed
82
- and total count.
83
-
84
- **Kind**: instance method of [<code>Feeds</code>](#Feeds)
85
- <table>
86
- <thead>
87
- <tr>
88
- <th>Param</th><th>Type</th><th>Description</th>
89
- </tr>
90
- </thead>
91
- <tbody>
92
- <tr>
93
- <td>job</td><td><code>object</code></td><td><p>Activity streams object containing job data.</p>
94
- </td>
95
- </tr><tr>
96
- <td>cb</td><td><code>object</code></td><td></td>
97
- </tr> </tbody>
98
- </table>
99
-
100
- **Example**
101
- ```js
102
- {
103
- context: "feeds",
104
- type: "fetch",
105
- actor: {
106
- id: 'https://dogfeed.com/user/nick@silverbucket',
107
- type: "person",
108
- name: "nick@silverbucket.net"
109
- },
110
- target: {
111
- id: 'http://blog.example.com/rss',
112
- type: "feed"
113
- },
114
- object: {
115
- type: "parameters",
116
- limit: 10, // default 10
117
- property: 'date'
118
- after: 'Tue Nov 26 2013 02:11:59 GMT+0100 (CET)',
119
-
120
- // ... OR ...
121
-
122
- property: 'link',
123
- after: 'http://www.news.com/articles/man-eats-car',
124
- }
125
- }
126
-
127
-
128
- // Without any parameters specified, the platform will return most
129
- // recent 10 articles fetched from the feed.
130
-
131
- // Example of the resulting JSON AS Object:
132
-
133
- {
134
- context: 'feeds',
135
- type: 'post',
136
- actor: {
137
- type: 'feed',
138
- name: 'Best Feed Inc.',
139
- id: 'http://blog.example.com/rss',
140
- description: 'Where the best feed comes to be the best',
141
- image: {
142
- width: '144',
143
- height: '144',
144
- url: 'http://example.com/images/bestfeed.jpg',
145
- }
146
- favicon: 'http://example.com/favicon.ico',
147
- categories: ['best', 'feed', 'aminals'],
148
- language: 'en',
149
- author: 'John Doe'
150
- },
151
- target: {
152
- id: 'https://dogfeed.com/user/nick@silverbucket',
153
- type: "person",
154
- name: "nick@silverbucket.net"
155
- },
156
- object: {
157
- id: "http://example.com/articles/about-stuff"
158
- type: 'post',
159
- title: 'About stuff...',
160
- url: "http://example.com/articles/about-stuff"
161
- date: "2013-05-28T12:00:00.000Z",
162
- datenum: 1369742400000,
163
- brief_html: "Brief synopsis of stuff...",
164
- brief_text: "Brief synopsis of stuff...",
165
- html: "Once upon a time...",
166
- text: "Once upon a time..."
167
- media: [
168
- {
169
- length: '13908973',
170
- type: 'audio/mpeg',
171
- url: 'http://example.com/media/thing.mpg'
172
- }
173
- ]
174
- tags: ['foo', 'bar']
175
- }
176
- }
177
- ```
178
- <a name="FeedParser"></a>
179
-
180
- # FeedParser
181
- This is a platform for Sockethub implementing Atom/RSS fetching functionality.
182
-
183
- Developed by Nick Jennings (https://github.com/silverbucket)
184
-
185
- Sockethub is licensed under the LGPLv3.
186
- See the LICENSE file for details.
187
-
188
- The latest version of this module can be found here:
189
- git://github.com/sockethub/sockethub.git
190
-
191
- For more information about Sockethub visit http://sockethub.org/.
192
-
193
- This program is distributed in the hope that it will be useful,
194
- but WITHOUT ANY WARRANTY; without even the implied warranty of
195
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
196
-
197
- **Kind**: global constant