@miso.ai/server-feed 0.6.3-beta.2 → 0.6.3-beta.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cli/index.js CHANGED
@@ -1,17 +1,22 @@
1
1
  #!/usr/bin/env node
2
2
  import { yargs, stream } from '@miso.ai/server-commons';
3
- import version from '../src/version.js';
4
- import { stream as createFeedStream } from '../src/index.js';
3
+ import { version, feedStreams } from '../src/index.js';
5
4
 
6
5
  yargs.build(yargs => {
7
6
  yargs
8
7
  .env('MISO_FEED')
9
- .option('url', {
10
- type: 'string',
8
+ .option('after', {
9
+ alias: 'a',
10
+ describe: 'Only include records after this time',
11
11
  })
12
- .option('auth', {
12
+ .option('update', {
13
13
  alias: 'u',
14
- type: 'string',
14
+ describe: 'Only include records modified in given duration (3h, 2d, etc.)',
15
+ })
16
+ .option('transform', {
17
+ alias: 't',
18
+ type: 'boolean',
19
+ default: false,
15
20
  })
16
21
  .option('debug', {
17
22
  type: 'boolean',
@@ -19,16 +24,17 @@ yargs.build(yargs => {
19
24
  })
20
25
  .hide('debug')
21
26
  .command({
22
- command: '* [url]',
23
- description: 'Read items from feed',
27
+ command: '*',
28
+ description: 'Parse items from feed content',
24
29
  handler: run,
25
30
  })
26
31
  .version(version);
27
32
  });
28
33
 
29
- async function run({ url, auth } = {}) {
34
+ async function run(options) {
30
35
  await stream.pipeline(
31
- await createFeedStream(url, { fetch: { auth } }),
36
+ process.stdin,
37
+ ...feedStreams(options),
32
38
  new stream.OutputStream(),
33
39
  );
34
40
  }
package/package.json CHANGED
@@ -16,9 +16,9 @@
16
16
  "simonpai <simon.pai@askmiso.com>"
17
17
  ],
18
18
  "dependencies": {
19
- "@miso.ai/server-commons": "0.6.3-beta.2",
19
+ "@miso.ai/server-commons": "0.6.3-beta.20",
20
20
  "feedparser": "^2.2.10",
21
21
  "node-fetch": "^3.3.2"
22
22
  },
23
- "version": "0.6.3-beta.2"
23
+ "version": "0.6.3-beta.20"
24
24
  }
package/src/index.js CHANGED
@@ -1 +1,2 @@
1
- export { default as stream } from './stream.js';
1
+ export * from './stream/index.js';
2
+ export { default as version } from './version.js';
@@ -0,0 +1,22 @@
1
+ import { Transform } from 'stream';
2
+
3
+ export default class DateFilterStream extends Transform {
4
+
5
+ constructor(threshold) {
6
+ super({ objectMode: true });
7
+ this._threshold = threshold;
8
+ }
9
+
10
+ _transform(record, _, next) {
11
+ try {
12
+ const timestamp = Date.parse(record.date);
13
+ if (timestamp >= this._threshold) {
14
+ this.push(record);
15
+ }
16
+ } catch (err) {
17
+ console.error(err);
18
+ }
19
+ next();
20
+ }
21
+
22
+ }
@@ -0,0 +1,23 @@
1
+ import { parseDuration, startOfDate } from '@miso.ai/server-commons';
2
+ import FeedParser from 'feedparser';
3
+ import DateFilterStream from './date-filter.js';
4
+ import ArticleTransformStream from './transform.js';
5
+ import IdentityStream from './identity.js';
6
+
7
+ export default function feedStreams({ parse, after, update, transform } = {}) {
8
+ const threshold = update ? (Date.now() - parseDuration(update)) : startOfDate(after);
9
+ const streams = [
10
+ new FeedParser(parse),
11
+ ];
12
+ if (threshold) {
13
+ streams.push(new DateFilterStream(threshold));
14
+ }
15
+ if (transform) {
16
+ streams.push(new ArticleTransformStream());
17
+ }
18
+ if (streams.length === 1) {
19
+ // because Duplex stream is not async iterable
20
+ streams.push(new IdentityStream());
21
+ }
22
+ return streams;
23
+ }
@@ -0,0 +1,13 @@
1
+ import { Transform } from 'stream';
2
+
3
+ export default class IdentityStream extends Transform {
4
+
5
+ constructor() {
6
+ super({ objectMode: true });
7
+ }
8
+
9
+ _transform(record, _, next) {
10
+ next(undefined, record);
11
+ }
12
+
13
+ }
@@ -0,0 +1,3 @@
1
+ export { default as feedStreams } from './feed.js';
2
+ export { default as ArticleTransformStream } from './transform.js';
3
+ export { default as DateFilterStream } from './date-filter.js';
@@ -0,0 +1,62 @@
1
+ import { trimObj } from '@miso.ai/server-commons';
2
+ import { Transform } from 'stream';
3
+
4
+ export default class ArticleTransformStream extends Transform {
5
+
6
+ constructor({ after } = {}) {
7
+ super({ objectMode: true });
8
+ this._after = after;
9
+ }
10
+
11
+ _transform({
12
+ title,
13
+ description,
14
+ summary,
15
+ date,
16
+ pubdate,
17
+ link,
18
+ origlink,
19
+ guid,
20
+ image,
21
+ author,
22
+ categories,
23
+ }, _, next) {
24
+ if (this._after !== undefined) {
25
+ try {
26
+ const timestamp = Date.parse(date);
27
+ if (timestamp < this._after) {
28
+ this.end();
29
+ return;
30
+ }
31
+ } catch (err) {
32
+ console.error(err);
33
+ this.end();
34
+ return;
35
+ }
36
+ }
37
+ const authors = author ? [author] : [];
38
+ title = title || undefined;
39
+ categories = categories && categories.map(c => [c]) || [];
40
+ const url = origlink || link || undefined;
41
+ const cover_image = image && image.url || undefined;
42
+ summary = summary || undefined;
43
+ this.push(trimObj({
44
+ product_id: guid,
45
+ type: 'article',
46
+ title,
47
+ updated_at: date || undefined,
48
+ published_at: pubdate || undefined,
49
+ created_at: pubdate || undefined,
50
+ categories,
51
+ url,
52
+ cover_image,
53
+ authors,
54
+ html: description || undefined,
55
+ custom_attributes: trimObj({
56
+ summary,
57
+ }),
58
+ }));
59
+ next();
60
+ }
61
+
62
+ }
package/src/version.js CHANGED
@@ -1 +1 @@
1
- export default '0.6.3-beta.2';
1
+ export default '0.6.3-beta.20';
package/src/stream.js DELETED
@@ -1,28 +0,0 @@
1
- import { Buffer } from 'buffer';
2
- import fetch, { Headers } from 'node-fetch';
3
- import FeedParser from 'feedparser';
4
-
5
- export default async function stream(url, { fetch: fetchOptions, parse: parseOptions } = {}) {
6
- const { status, body } = await fetch(url, buildFetchOptions(fetchOptions));
7
- if (status !== 200) {
8
- throw new Error(`Failed to fetch ${url}: ${status}`);
9
- }
10
- return body.pipe(new FeedParser(parseOptions));
11
- }
12
-
13
- function buildFetchOptions({ headers, auth, ...options } = {}) {
14
- headers = new Headers(headers);
15
- if (auth) {
16
- if (typeof auth === 'object' && auth.username && auth.password) {
17
- auth = `${auth.username}:${auth.password}`;
18
- }
19
- if (typeof auth !== 'string') {
20
- throw new TypeError(`Invalid auth: must me a string or an object.`);
21
- }
22
- headers.set('Authorization', 'Basic ' + Buffer.from(auth).toString('base64'));
23
- }
24
- return {
25
- ...options,
26
- headers,
27
- };
28
- }