@miso.ai/server-feed 0.6.3-beta.2 → 0.6.3-beta.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli/index.js +16 -5
- package/package.json +2 -2
- package/src/index.js +2 -1
- package/src/stream/date-filter.js +25 -0
- package/src/stream/feed.js +16 -0
- package/src/stream/index.js +4 -0
- package/src/{stream.js → stream/raw.js} +1 -1
- package/src/stream/transform.js +61 -0
- package/src/version.js +1 -1
package/cli/index.js
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { yargs, stream } from '@miso.ai/server-commons';
|
|
3
|
-
import version from '../src/
|
|
4
|
-
import { stream as createFeedStream } from '../src/index.js';
|
|
3
|
+
import { version, feedStream } from '../src/index.js';
|
|
5
4
|
|
|
6
5
|
yargs.build(yargs => {
|
|
7
6
|
yargs
|
|
@@ -10,9 +9,21 @@ yargs.build(yargs => {
|
|
|
10
9
|
type: 'string',
|
|
11
10
|
})
|
|
12
11
|
.option('auth', {
|
|
13
|
-
alias: 'u',
|
|
14
12
|
type: 'string',
|
|
15
13
|
})
|
|
14
|
+
.option('after', {
|
|
15
|
+
alias: 'a',
|
|
16
|
+
describe: 'Only include records after this time',
|
|
17
|
+
})
|
|
18
|
+
.option('update', {
|
|
19
|
+
alias: 'u',
|
|
20
|
+
describe: 'Only include records modified in given duration (3h, 2d, etc.)',
|
|
21
|
+
})
|
|
22
|
+
.option('transform', {
|
|
23
|
+
alias: 't',
|
|
24
|
+
type: 'boolean',
|
|
25
|
+
default: false,
|
|
26
|
+
})
|
|
16
27
|
.option('debug', {
|
|
17
28
|
type: 'boolean',
|
|
18
29
|
default: false,
|
|
@@ -26,9 +37,9 @@ yargs.build(yargs => {
|
|
|
26
37
|
.version(version);
|
|
27
38
|
});
|
|
28
39
|
|
|
29
|
-
async function run({ url, auth } = {}) {
|
|
40
|
+
async function run({ url, auth, after, update, transform } = {}) {
|
|
30
41
|
await stream.pipeline(
|
|
31
|
-
await
|
|
42
|
+
await feedStream(url, { fetch: { auth }, after, update, transform }),
|
|
32
43
|
new stream.OutputStream(),
|
|
33
44
|
);
|
|
34
45
|
}
|
package/package.json
CHANGED
|
@@ -16,9 +16,9 @@
|
|
|
16
16
|
"simonpai <simon.pai@askmiso.com>"
|
|
17
17
|
],
|
|
18
18
|
"dependencies": {
|
|
19
|
-
"@miso.ai/server-commons": "0.6.3-beta.
|
|
19
|
+
"@miso.ai/server-commons": "0.6.3-beta.4",
|
|
20
20
|
"feedparser": "^2.2.10",
|
|
21
21
|
"node-fetch": "^3.3.2"
|
|
22
22
|
},
|
|
23
|
-
"version": "0.6.3-beta.
|
|
23
|
+
"version": "0.6.3-beta.4"
|
|
24
24
|
}
|
package/src/index.js
CHANGED
|
@@ -1 +1,2 @@
|
|
|
1
|
-
export
|
|
1
|
+
export * from './stream/index.js';
|
|
2
|
+
export { default as version } from './version.js';
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { Transform } from 'stream';
|
|
2
|
+
|
|
3
|
+
export default class DateFilterStream extends Transform {
|
|
4
|
+
|
|
5
|
+
constructor(threshold) {
|
|
6
|
+
super({ objectMode: true });
|
|
7
|
+
this._threshold = threshold;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
async _transform(record, _) {
|
|
11
|
+
try {
|
|
12
|
+
const timestamp = Date.parse(record.date);
|
|
13
|
+
if (timestamp < this._threshold) {
|
|
14
|
+
this.end();
|
|
15
|
+
return;
|
|
16
|
+
}
|
|
17
|
+
} catch (err) {
|
|
18
|
+
console.error(err);
|
|
19
|
+
this.end();
|
|
20
|
+
return;
|
|
21
|
+
}
|
|
22
|
+
this.push(record);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { parseDuration, startOfDate } from '@miso.ai/server-commons';
|
|
2
|
+
import rawFeedStream from './raw.js';
|
|
3
|
+
import DateFilterStream from './date-filter.js';
|
|
4
|
+
import ArticleTransformStream from './transform.js';
|
|
5
|
+
|
|
6
|
+
export default async function feedStream(url, { fetch, parse, after, update, transform } = {}) {
|
|
7
|
+
let stream = await rawFeedStream(url, { fetch, parse });
|
|
8
|
+
const threshold = update ? (Date.now() - parseDuration(update)) : startOfDate(after);
|
|
9
|
+
if (threshold) {
|
|
10
|
+
stream = stream.pipe(new DateFilterStream(threshold));
|
|
11
|
+
}
|
|
12
|
+
if (transform) {
|
|
13
|
+
stream = stream.pipe(new ArticleTransformStream());
|
|
14
|
+
}
|
|
15
|
+
return stream;
|
|
16
|
+
}
|
|
@@ -2,7 +2,7 @@ import { Buffer } from 'buffer';
|
|
|
2
2
|
import fetch, { Headers } from 'node-fetch';
|
|
3
3
|
import FeedParser from 'feedparser';
|
|
4
4
|
|
|
5
|
-
export default async function
|
|
5
|
+
export default async function rawFeedStream(url, { fetch: fetchOptions, parse: parseOptions } = {}) {
|
|
6
6
|
const { status, body } = await fetch(url, buildFetchOptions(fetchOptions));
|
|
7
7
|
if (status !== 200) {
|
|
8
8
|
throw new Error(`Failed to fetch ${url}: ${status}`);
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import { trimObj } from '@miso.ai/server-commons';
|
|
2
|
+
import { Transform } from 'stream';
|
|
3
|
+
|
|
4
|
+
export default class ArticleTransformStream extends Transform {
|
|
5
|
+
|
|
6
|
+
constructor({ after } = {}) {
|
|
7
|
+
super({ objectMode: true });
|
|
8
|
+
this._after = after;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
async _transform({
|
|
12
|
+
title,
|
|
13
|
+
description,
|
|
14
|
+
summary,
|
|
15
|
+
date,
|
|
16
|
+
pubdate,
|
|
17
|
+
link,
|
|
18
|
+
origlink,
|
|
19
|
+
guid,
|
|
20
|
+
image,
|
|
21
|
+
author,
|
|
22
|
+
categories,
|
|
23
|
+
}, _) {
|
|
24
|
+
if (this._after !== undefined) {
|
|
25
|
+
try {
|
|
26
|
+
const timestamp = Date.parse(date);
|
|
27
|
+
if (timestamp < this._after) {
|
|
28
|
+
this.end();
|
|
29
|
+
return;
|
|
30
|
+
}
|
|
31
|
+
} catch (err) {
|
|
32
|
+
console.error(err);
|
|
33
|
+
this.end();
|
|
34
|
+
return;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
const authors = author ? [author] : [];
|
|
38
|
+
title = title || undefined;
|
|
39
|
+
categories = categories && categories.map(c => [c]) || [];
|
|
40
|
+
const url = origlink || link || undefined;
|
|
41
|
+
const cover_image = image && image.url || undefined;
|
|
42
|
+
summary = summary || undefined;
|
|
43
|
+
this.push(trimObj({
|
|
44
|
+
product_id: guid,
|
|
45
|
+
type: 'article',
|
|
46
|
+
title,
|
|
47
|
+
updated_at: date || undefined,
|
|
48
|
+
published_at: pubdate || undefined,
|
|
49
|
+
created_at: pubdate || undefined,
|
|
50
|
+
categories,
|
|
51
|
+
url,
|
|
52
|
+
cover_image,
|
|
53
|
+
authors,
|
|
54
|
+
html: description || undefined,
|
|
55
|
+
custom_attributes: trimObj({
|
|
56
|
+
summary,
|
|
57
|
+
}),
|
|
58
|
+
}));
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
}
|
package/src/version.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export default '0.6.3-beta.
|
|
1
|
+
export default '0.6.3-beta.4';
|