substack-feed-api 1.1.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +233 -55
- package/dist/substackFeedApi.js +10051 -3372
- package/dist/substackFeedApi.umd.cjs +8 -11
- package/index.d.ts +55 -95
- package/package.json +12 -10
package/README.md
CHANGED
|
@@ -1,103 +1,281 @@
|
|
|
1
1
|
# Substack Feed API
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
`substack-feed-api` is a small TypeScript utility for turning RSS XML into typed objects using Cheerio, with first-class support for Substack and Goodreads feeds.
|
|
4
4
|
|
|
5
5
|
## Features
|
|
6
6
|
|
|
7
|
-
- **
|
|
8
|
-
-
|
|
9
|
-
-
|
|
10
|
-
-
|
|
7
|
+
- **Type-safe** mapping from RSS XML to your own TypeScript types via generic selector maps.
|
|
8
|
+
- Built-in helpers for Substack posts and Goodreads bookshelf RSS feeds (including shelves / reading status).
|
|
9
|
+
- Uses Cheerio in XML mode, works well with namespaced tags like `content:encoded`.
|
|
10
|
+
- Graceful error handling with configurable fallbacks and silent logging.
|
|
11
11
|
|
|
12
|
-
##
|
|
12
|
+
## Installation
|
|
13
13
|
|
|
14
|
-
|
|
14
|
+
```bash
|
|
15
|
+
npm install substack-feed-api
|
|
16
|
+
# or
|
|
17
|
+
yarn add substack-feed-api
|
|
18
|
+
# or
|
|
19
|
+
pnpm add substack-feed-api
|
|
20
|
+
```
|
|
15
21
|
|
|
16
|
-
|
|
17
|
-
- npm (latest version)
|
|
22
|
+
## Quick Start
|
|
18
23
|
|
|
19
|
-
###
|
|
24
|
+
### Parsing Substack RSS
|
|
20
25
|
|
|
21
|
-
|
|
26
|
+
Substack exposes a standard RSS 2.0 feed with a `<channel>` and multiple `<item>` entries; each item contains fields like `<title>`, `<description>`, `<link>`, `<pubDate>`, and `<content:encoded>` for the HTML body.
|
|
22
27
|
|
|
23
|
-
|
|
28
|
+
```ts
|
|
29
|
+
import { parseSubstackRss, SubstackItem } from 'substack-feed-api';
|
|
24
30
|
|
|
25
|
-
|
|
26
|
-
|
|
31
|
+
const xml = await fetch('https://example.substack.com/feed').then(r => r.text());
|
|
32
|
+
|
|
33
|
+
const posts: SubstackItem[] = parseSubstackRss(xml);
|
|
34
|
+
|
|
35
|
+
// Example item
|
|
36
|
+
// {
|
|
37
|
+
// title: 'Both Not Half by Jassa Ahluwalia',
|
|
38
|
+
// description: 'A Humorous Journey Through Identity, Yet Lacking Cohesion',
|
|
39
|
+
// link: 'https://…',
|
|
40
|
+
// pubDate: 'Sun, 06 Oct 2024 15:35:17 GMT',
|
|
41
|
+
// content: '<p>Jassa Ahluwalia\'s <strong>Both Not Half</strong>…'
|
|
42
|
+
// }
|
|
27
43
|
```
|
|
28
44
|
|
|
29
|
-
|
|
45
|
+
You can override any selector if your feed schema differs:
|
|
30
46
|
|
|
31
|
-
```
|
|
32
|
-
|
|
47
|
+
```ts
|
|
48
|
+
const postsCustom = parseSubstackRss(xml, {
|
|
49
|
+
selectors: {
|
|
50
|
+
// use <description> as content
|
|
51
|
+
content: 'description',
|
|
52
|
+
},
|
|
53
|
+
});
|
|
33
54
|
```
|
|
34
55
|
|
|
35
|
-
###
|
|
56
|
+
### Parsing Goodreads Bookshelf RSS
|
|
36
57
|
|
|
37
|
-
|
|
58
|
+
Goodreads’ “bookshelf” RSS feed exposes many book-related tags per `<item>` (e.g. `<title>`, `<book_description>`, `<book_large_image_url>`, `<author_name>`, `<user_shelves>`).
|
|
38
59
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
60
|
+
The library exposes a Goodreads-specific helper that returns a higher-level `GoodreadsReadingState`:
|
|
61
|
+
|
|
62
|
+
```ts
|
|
63
|
+
import {
|
|
64
|
+
parseGoodreadsRss,
|
|
65
|
+
GoodreadsReadingState,
|
|
66
|
+
} from 'substack-feed-api';
|
|
67
|
+
|
|
68
|
+
const xml = await fetch('<goodreads-list-rss-url>').then(r => r.text());
|
|
69
|
+
|
|
70
|
+
const states: GoodreadsReadingState[] = parseGoodreadsRss(xml);
|
|
71
|
+
|
|
72
|
+
// Example shape:
|
|
73
|
+
// {
|
|
74
|
+
// status: 'WANTS_TO_READ' | 'IS_READING' | 'FINISHED',
|
|
75
|
+
// book: {
|
|
76
|
+
// title: 'Malice (Detective Kaga, #1)',
|
|
77
|
+
// description: 'Acclaimed bestselling novelist Kunihiko Hidaka is found brutally murdered…',
|
|
78
|
+
// cover: 'https://i.gr-assets.com/.../20613611._SY475_.jpg',
|
|
79
|
+
// authors: [{ name: 'Keigo Higashino' }]
|
|
80
|
+
// }
|
|
81
|
+
// }
|
|
47
82
|
```
|
|
48
83
|
|
|
49
|
-
|
|
50
|
-
This function will be called with the parsed feed data as an argument:
|
|
84
|
+
By default, the Goodreads parser derives status from `user_shelves` (e.g. `to-read`, `currently-reading`, `read`).
|
|
51
85
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
86
|
+
You can still adjust selectors if Goodreads ever changes tag names:
|
|
87
|
+
|
|
88
|
+
```ts
|
|
89
|
+
const customStates = parseGoodreadsRss(xml, {
|
|
90
|
+
selectors: {
|
|
91
|
+
// Example: use medium image instead of large
|
|
92
|
+
cover: 'book_medium_image_url',
|
|
93
|
+
},
|
|
55
94
|
});
|
|
56
95
|
```
|
|
57
96
|
|
|
58
|
-
|
|
59
|
-
- `getFeedByLink`: Fetch a specific feed by its link.
|
|
60
|
-
- `getPosts`: Get all posts from a feed.
|
|
97
|
+
## API
|
|
61
98
|
|
|
62
|
-
|
|
99
|
+
### `parseRssItems` – Generic Core
|
|
63
100
|
|
|
64
|
-
|
|
101
|
+
```ts
|
|
102
|
+
function parseRssItems<TRaw extends Record<string, string>>(
|
|
103
|
+
xml: string,
|
|
104
|
+
options?: {
|
|
105
|
+
itemSelector?: string;
|
|
106
|
+
selectors?: Partial<Record<keyof TRaw, string>>;
|
|
107
|
+
fallback?: TRaw[];
|
|
108
|
+
}
|
|
109
|
+
): TRaw[];
|
|
110
|
+
```
|
|
65
111
|
|
|
66
|
-
|
|
112
|
+
- `xml`: Full RSS XML string.
|
|
113
|
+
- `itemSelector`: CSS selector for each RSS item node, default `'channel > item'`.
|
|
114
|
+
- `selectors`: Map from property name → CSS selector **relative to each item node**.
|
|
115
|
+
- `fallback`: Array to return if parsing fails (e.g., malformed XML); error is logged to `console.error` but not thrown.
|
|
67
116
|
|
|
68
|
-
|
|
117
|
+
Example: Minimal Generic Usage:
|
|
69
118
|
|
|
70
|
-
```
|
|
71
|
-
|
|
119
|
+
```ts
|
|
120
|
+
type MinimalItem = {
|
|
121
|
+
title: string;
|
|
122
|
+
link: string;
|
|
123
|
+
};
|
|
124
|
+
|
|
125
|
+
const items = parseRssItems<MinimalItem>(xml, {
|
|
126
|
+
selectors: {
|
|
127
|
+
title: 'title',
|
|
128
|
+
link: 'link',
|
|
129
|
+
},
|
|
130
|
+
});
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
### `parseSubstackRss`
|
|
134
|
+
|
|
135
|
+
```ts
|
|
136
|
+
type SubstackItem = {
|
|
137
|
+
title: string;
|
|
138
|
+
description: string;
|
|
139
|
+
link: string;
|
|
140
|
+
pubDate: string;
|
|
141
|
+
content: string;
|
|
142
|
+
};
|
|
143
|
+
|
|
144
|
+
function parseSubstackRss(
|
|
145
|
+
xml: string,
|
|
146
|
+
options?: {
|
|
147
|
+
itemSelector?: string;
|
|
148
|
+
selectors?: Partial<Record<keyof SubstackItem, string>>;
|
|
149
|
+
fallback?: SubstackItem[];
|
|
150
|
+
}
|
|
151
|
+
): SubstackItem[];
|
|
72
152
|
```
|
|
73
153
|
|
|
74
|
-
|
|
154
|
+
Default selectors (overridable):
|
|
75
155
|
|
|
76
|
-
```
|
|
77
|
-
|
|
156
|
+
```ts
|
|
157
|
+
{
|
|
158
|
+
title: 'title',
|
|
159
|
+
description: 'description',
|
|
160
|
+
link: 'link',
|
|
161
|
+
pubDate: 'pubDate',
|
|
162
|
+
content: 'content\\:encoded',
|
|
163
|
+
}
|
|
78
164
|
```
|
|
79
165
|
|
|
80
|
-
|
|
166
|
+
This matches typical Substack feeds which use `content:encoded` for the full HTML article body.
|
|
167
|
+
|
|
168
|
+
### `parseGoodreadsRss`
|
|
169
|
+
|
|
170
|
+
```ts
|
|
171
|
+
type BookAuthor = { name: string };
|
|
172
|
+
|
|
173
|
+
type GoodreadsBook = {
|
|
174
|
+
title: string;
|
|
175
|
+
description: string;
|
|
176
|
+
cover: string;
|
|
177
|
+
authors?: BookAuthor[];
|
|
178
|
+
};
|
|
179
|
+
|
|
180
|
+
type GoodreadsReadingStatus = 'IS_READING' | 'FINISHED' | 'WANTS_TO_READ';
|
|
181
|
+
|
|
182
|
+
type GoodreadsReadingState = {
|
|
183
|
+
book: GoodreadsBook;
|
|
184
|
+
status: GoodreadsReadingStatus;
|
|
185
|
+
};
|
|
186
|
+
|
|
187
|
+
function parseGoodreadsRss(
|
|
188
|
+
xml: string,
|
|
189
|
+
options?: {
|
|
190
|
+
itemSelector?: string;
|
|
191
|
+
selectors?: Partial<{
|
|
192
|
+
title: string;
|
|
193
|
+
description: string;
|
|
194
|
+
cover: string;
|
|
195
|
+
author: string;
|
|
196
|
+
shelves: string;
|
|
197
|
+
}>;
|
|
198
|
+
fallback?: GoodreadsReadingState[]; // via raw fallback mapping
|
|
199
|
+
}
|
|
200
|
+
): GoodreadsReadingState[];
|
|
201
|
+
```
|
|
81
202
|
|
|
82
|
-
|
|
83
|
-
|
|
203
|
+
Default Goodreads selectors map RSS tags to an internal flat type:
|
|
204
|
+
|
|
205
|
+
```ts
|
|
206
|
+
{
|
|
207
|
+
title: 'title',
|
|
208
|
+
description: 'book_description',
|
|
209
|
+
cover: 'book_large_image_url',
|
|
210
|
+
author: 'author_name',
|
|
211
|
+
shelves: 'user_shelves',
|
|
212
|
+
}
|
|
84
213
|
```
|
|
85
214
|
|
|
86
|
-
|
|
215
|
+
The parser then:
|
|
87
216
|
|
|
88
|
-
|
|
217
|
+
- Builds a flat raw record from each `<item>`.
|
|
218
|
+
- Maps `shelves` to a `GoodreadsReadingStatus` (e.g., `currently-reading` → `IS_READING`, `read` → `FINISHED`, otherwise `WANTS_TO_READ`).
|
|
219
|
+
- Wraps book information into `GoodreadsBook` and `BookAuthor`.
|
|
89
220
|
|
|
90
|
-
|
|
91
|
-
|
|
221
|
+
## Error Handling
|
|
222
|
+
|
|
223
|
+
All parsing functions follow the same pattern:
|
|
224
|
+
|
|
225
|
+
- Wrap parsing and traversal in a `try/catch`.
|
|
226
|
+
- On error, log a concise entry to `console.error` with context (selectors, item selector).
|
|
227
|
+
- Return the provided `fallback` (default `[]`) instead of throwing.
|
|
228
|
+
|
|
229
|
+
Example:
|
|
230
|
+
|
|
231
|
+
```ts
|
|
232
|
+
const items = parseSubstackRss('<invalid-xml>', {
|
|
233
|
+
fallback: [],
|
|
234
|
+
}); // returns [], logs an error, does not crash your app
|
|
92
235
|
```
|
|
93
236
|
|
|
94
|
-
This
|
|
237
|
+
This makes the library safe to use in background jobs, CLI tools, or edge handlers where a single bad feed should not bring down the entire process.
|
|
95
238
|
|
|
96
|
-
##
|
|
239
|
+
## Extending for Other Feeds
|
|
97
240
|
|
|
98
|
-
|
|
241
|
+
To support another RSS feed type, you generally:
|
|
242
|
+
|
|
243
|
+
1. Define a flat `TRaw` type that contains only string fields.
|
|
244
|
+
2. Call `parseRssItems<TRaw>` with a selector map that matches the feed’s tags.
|
|
245
|
+
3. Map `TRaw` to your domain model in a small wrapper, similar to `parseGoodreadsRss`.
|
|
246
|
+
|
|
247
|
+
Example skeleton:
|
|
248
|
+
|
|
249
|
+
```ts
|
|
250
|
+
type MyFeedRaw = {
|
|
251
|
+
title: string;
|
|
252
|
+
summary: string;
|
|
253
|
+
link: string;
|
|
254
|
+
};
|
|
255
|
+
|
|
256
|
+
type MyFeedItem = {
|
|
257
|
+
title: string;
|
|
258
|
+
summary: string;
|
|
259
|
+
url: string;
|
|
260
|
+
};
|
|
261
|
+
|
|
262
|
+
function parseMyFeed(xml: string): MyFeedItem[] {
|
|
263
|
+
const raw = parseRssItems<MyFeedRaw>(xml, {
|
|
264
|
+
selectors: {
|
|
265
|
+
title: 'title',
|
|
266
|
+
summary: 'summary',
|
|
267
|
+
link: 'link',
|
|
268
|
+
},
|
|
269
|
+
});
|
|
270
|
+
|
|
271
|
+
return raw.map(r => ({
|
|
272
|
+
title: r.title,
|
|
273
|
+
summary: r.summary,
|
|
274
|
+
url: r.link,
|
|
275
|
+
}));
|
|
276
|
+
}
|
|
277
|
+
```
|
|
99
278
|
|
|
100
279
|
## License
|
|
101
280
|
|
|
102
281
|
This project is licensed under the MIT License.
|
|
103
|
-
```
|