@miso.ai/server-feed 0.6.3-beta.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli/index.js +40 -0
- package/package.json +24 -0
- package/src/index.js +2 -0
- package/src/stream/date-filter.js +25 -0
- package/src/stream/feed.js +18 -0
- package/src/stream/index.js +3 -0
- package/src/stream/transform.js +61 -0
- package/src/version.js +1 -0
package/cli/index.js
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { yargs, stream } from '@miso.ai/server-commons';
|
|
3
|
+
import { version, feedStreams } from '../src/index.js';
|
|
4
|
+
|
|
5
|
+
yargs.build(yargs => {
|
|
6
|
+
yargs
|
|
7
|
+
.env('MISO_FEED')
|
|
8
|
+
.option('after', {
|
|
9
|
+
alias: 'a',
|
|
10
|
+
describe: 'Only include records after this time',
|
|
11
|
+
})
|
|
12
|
+
.option('update', {
|
|
13
|
+
alias: 'u',
|
|
14
|
+
describe: 'Only include records modified in given duration (3h, 2d, etc.)',
|
|
15
|
+
})
|
|
16
|
+
.option('transform', {
|
|
17
|
+
alias: 't',
|
|
18
|
+
type: 'boolean',
|
|
19
|
+
default: false,
|
|
20
|
+
})
|
|
21
|
+
.option('debug', {
|
|
22
|
+
type: 'boolean',
|
|
23
|
+
default: false,
|
|
24
|
+
})
|
|
25
|
+
.hide('debug')
|
|
26
|
+
.command({
|
|
27
|
+
command: '*',
|
|
28
|
+
description: 'Parse items from feed content',
|
|
29
|
+
handler: run,
|
|
30
|
+
})
|
|
31
|
+
.version(version);
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
async function run(options) {
|
|
35
|
+
await stream.pipeline(
|
|
36
|
+
process.stdin,
|
|
37
|
+
...feedStreams(options),
|
|
38
|
+
new stream.OutputStream(),
|
|
39
|
+
);
|
|
40
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@miso.ai/server-feed",
|
|
3
|
+
"description": "Miso RSS/Atom feed data tools",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"main": "src/index.js",
|
|
6
|
+
"bin": {
|
|
7
|
+
"miso-feed": "cli/index.js"
|
|
8
|
+
},
|
|
9
|
+
"publishConfig": {
|
|
10
|
+
"access": "public"
|
|
11
|
+
},
|
|
12
|
+
"scripts": {},
|
|
13
|
+
"repository": "MisoAI/miso-server-js-sdk",
|
|
14
|
+
"license": "MIT",
|
|
15
|
+
"contributors": [
|
|
16
|
+
"simonpai <simon.pai@askmiso.com>"
|
|
17
|
+
],
|
|
18
|
+
"dependencies": {
|
|
19
|
+
"@miso.ai/server-commons": "0.6.3-beta.10",
|
|
20
|
+
"feedparser": "^2.2.10",
|
|
21
|
+
"node-fetch": "^3.3.2"
|
|
22
|
+
},
|
|
23
|
+
"version": "0.6.3-beta.10"
|
|
24
|
+
}
|
package/src/index.js
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { Transform } from 'stream';
|
|
2
|
+
|
|
3
|
+
export default class DateFilterStream extends Transform {
|
|
4
|
+
|
|
5
|
+
constructor(threshold) {
|
|
6
|
+
super({ objectMode: true });
|
|
7
|
+
this._threshold = threshold;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
async _transform(record, _) {
|
|
11
|
+
try {
|
|
12
|
+
const timestamp = Date.parse(record.date);
|
|
13
|
+
if (timestamp < this._threshold) {
|
|
14
|
+
//this.end();
|
|
15
|
+
return;
|
|
16
|
+
}
|
|
17
|
+
} catch (err) {
|
|
18
|
+
console.error(err);
|
|
19
|
+
//this.end();
|
|
20
|
+
return;
|
|
21
|
+
}
|
|
22
|
+
this.push(record);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { parseDuration, startOfDate } from '@miso.ai/server-commons';
|
|
2
|
+
import FeedParser from 'feedparser';
|
|
3
|
+
import DateFilterStream from './date-filter.js';
|
|
4
|
+
import ArticleTransformStream from './transform.js';
|
|
5
|
+
|
|
6
|
+
export default function feedStreams({ parse, after, update, transform } = {}) {
|
|
7
|
+
const threshold = update ? (Date.now() - parseDuration(update)) : startOfDate(after);
|
|
8
|
+
const streams = [
|
|
9
|
+
new FeedParser(parse),
|
|
10
|
+
];
|
|
11
|
+
if (threshold) {
|
|
12
|
+
streams.push(new DateFilterStream(threshold));
|
|
13
|
+
}
|
|
14
|
+
if (transform) {
|
|
15
|
+
streams.push(new ArticleTransformStream());
|
|
16
|
+
}
|
|
17
|
+
return streams;
|
|
18
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import { trimObj } from '@miso.ai/server-commons';
|
|
2
|
+
import { Transform } from 'stream';
|
|
3
|
+
|
|
4
|
+
export default class ArticleTransformStream extends Transform {
|
|
5
|
+
|
|
6
|
+
constructor({ after } = {}) {
|
|
7
|
+
super({ objectMode: true });
|
|
8
|
+
this._after = after;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
async _transform({
|
|
12
|
+
title,
|
|
13
|
+
description,
|
|
14
|
+
summary,
|
|
15
|
+
date,
|
|
16
|
+
pubdate,
|
|
17
|
+
link,
|
|
18
|
+
origlink,
|
|
19
|
+
guid,
|
|
20
|
+
image,
|
|
21
|
+
author,
|
|
22
|
+
categories,
|
|
23
|
+
}, _) {
|
|
24
|
+
if (this._after !== undefined) {
|
|
25
|
+
try {
|
|
26
|
+
const timestamp = Date.parse(date);
|
|
27
|
+
if (timestamp < this._after) {
|
|
28
|
+
this.end();
|
|
29
|
+
return;
|
|
30
|
+
}
|
|
31
|
+
} catch (err) {
|
|
32
|
+
console.error(err);
|
|
33
|
+
this.end();
|
|
34
|
+
return;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
const authors = author ? [author] : [];
|
|
38
|
+
title = title || undefined;
|
|
39
|
+
categories = categories && categories.map(c => [c]) || [];
|
|
40
|
+
const url = origlink || link || undefined;
|
|
41
|
+
const cover_image = image && image.url || undefined;
|
|
42
|
+
summary = summary || undefined;
|
|
43
|
+
this.push(trimObj({
|
|
44
|
+
product_id: guid,
|
|
45
|
+
type: 'article',
|
|
46
|
+
title,
|
|
47
|
+
updated_at: date || undefined,
|
|
48
|
+
published_at: pubdate || undefined,
|
|
49
|
+
created_at: pubdate || undefined,
|
|
50
|
+
categories,
|
|
51
|
+
url,
|
|
52
|
+
cover_image,
|
|
53
|
+
authors,
|
|
54
|
+
html: description || undefined,
|
|
55
|
+
custom_attributes: trimObj({
|
|
56
|
+
summary,
|
|
57
|
+
}),
|
|
58
|
+
}));
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
}
|
package/src/version.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export default '0.6.3-beta.10';
|