@miso.ai/server-wordpress 0.6.3-beta.2 → 0.6.3-beta.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,116 @@
1
+ import { createWriteStream } from 'fs';
2
+ import { access, mkdir } from 'fs/promises';
3
+ import { createGzip } from 'zlib';
4
+ import { startOfDate, endOfDate, stream } from '@miso.ai/server-commons';
5
+ import { WordPressClient } from '../src/index.js';
6
+ import { buildForEntities } from './utils.js';
7
+
8
+ function build(yargs) {
9
+ return buildForEntities(yargs);
10
+ }
11
+
12
+ async function run({
13
+ destination = './data',
14
+ batchSize = 30000,
15
+ ...options
16
+ } = {}) {
17
+ const client = new WordPressClient(options);
18
+
19
+ // TODO: respect --after and --before
20
+ const [firstPostYear, lastPostYear] = await client.posts.yearRange();
21
+
22
+ // divide into batches
23
+ const batches = [];
24
+ let endYear, sum = 0;
25
+ for (let year = lastPostYear; year >= firstPostYear; year--) {
26
+ if (endYear === undefined) {
27
+ endYear = year;
28
+ }
29
+ const after = startOfDate(year);
30
+ const before = endOfDate(year);
31
+ const count = await client.posts.count({ ...options, after, before });
32
+ sum += count;
33
+ if (sum >= batchSize) {
34
+ batches.push({ start: year, end: endYear, records: sum });
35
+ endYear = undefined;
36
+ sum = 0;
37
+ }
38
+ }
39
+ if (sum > 0) {
40
+ batches.push({ start: firstPostYear, end: endYear, records: sum });
41
+ }
42
+ const batchCount = batches.length;
43
+ if (batchCount === 0) {
44
+ console.log('No posts found.');
45
+ return;
46
+ }
47
+ // merge last two batches if last batch is too small
48
+ if (batches[batchCount - 1].records < batchSize * 0.2) {
49
+ const last = batches.pop();
50
+ batches[batchCount - 2].end = last.end;
51
+ batches[batchCount - 2].records += last.records;
52
+ }
53
+ console.log(`Divide into ${batchCount} batches:`);
54
+ for (const { start, end, records } of batches) {
55
+ console.log(`- ${start} -> ${end} (${records} records)`);
56
+ }
57
+
58
+ // mkdir -p
59
+ try {
60
+ await access(destination);
61
+ } catch (err) {
62
+ if (err.code !== 'ENOENT') {
63
+ throw err;
64
+ }
65
+ await mkdir(destination, { recursive: true });
66
+ console.log(`Created directory ${destination}`);
67
+ }
68
+
69
+ // download
70
+ let index = 0;
71
+ for (const batch of batches) {
72
+ const { start, end, records } = batch;
73
+ const after = startOfDate(start);
74
+ const before = endOfDate(end);
75
+ const filename = `${options.transform ? 'miso' : 'wp'}-posts.${start}-${end}.jsonl.gz`;
76
+
77
+ console.log(`[${index + 1} / ${batchCount}] Downloading ${filename}`);
78
+
79
+ const startTime = Date.now();
80
+ const sourceStream = await client.posts.stream({ ...options, after, before });
81
+
82
+ await stream.pipeline(
83
+ sourceStream,
84
+ stream.stringify(),
85
+ createGzip(),
86
+ createWriteStream(`${destination}/${filename}`),
87
+ );
88
+
89
+ const elapsed = Date.now() - startTime;
90
+ console.log(`[${index + 1} / ${batchCount}] Downloaded ${filename} (${records} records in ${formatDuration(elapsed)})`);
91
+ index++;
92
+ }
93
+
94
+ console.log('Done.');
95
+ }
96
+
97
+ function formatDuration(duration) {
98
+ const seconds = Math.floor(duration / 1000);
99
+ const minutes = Math.floor(seconds / 60);
100
+ const hours = Math.floor(minutes / 60);
101
+ if (hours > 0) {
102
+ return `${hours}h${minutes % 60}m${seconds % 60}s`;
103
+ }
104
+ if (minutes > 0) {
105
+ return `${minutes}m${seconds % 60}s`;
106
+ }
107
+ return `${seconds}s`;
108
+ }
109
+
110
+ export default {
111
+ command: 'download',
112
+ aliases: ['down'],
113
+ desc: 'Download all posts and save as files.',
114
+ builder: build,
115
+ handler: run,
116
+ };
package/cli/entities.js CHANGED
@@ -1,62 +1,9 @@
1
1
  import { Transform } from 'stream';
2
+ import { pipeline } from 'stream/promises';
3
+ import split2 from 'split2';
2
4
  import { stream, parseDuration } from '@miso.ai/server-commons';
3
5
  import { WordPressClient } from '../src/index.js';
4
- import { normalizeOptions, normalizeTransform, parseDate } from './utils.js';
5
-
6
- export function buildForEntities(yargs) {
7
- // TODO: make them mutually exclusive
8
- return yargs
9
- .option('terms', {
10
- describe: 'Display terms associated with this type of resource',
11
- type: 'boolean',
12
- })
13
- .option('count', {
14
- alias: 'c',
15
- describe: 'Return the total number of records',
16
- type: 'boolean',
17
- })
18
- .option('date', {
19
- alias: 'd',
20
- describe: 'Only include records in this year/month/day',
21
- })
22
- .option('after', {
23
- alias: 'a',
24
- describe: 'Only include records after this time',
25
- })
26
- .option('before', {
27
- alias: 'b',
28
- describe: 'Only include records before this time',
29
- })
30
- .option('update', {
31
- alias: 'u',
32
- describe: 'Only include records modified in given duration (3h, 2d, etc.)',
33
- })
34
- .option('ids', {
35
- alias: 'include',
36
- describe: 'Specify post ids'
37
- })
38
- .option('fields', {
39
- describe: 'Specify which record fields are retrieved',
40
- type: 'array',
41
- coerce: yargs.coerceToArray,
42
- })
43
- .option('resolve', {
44
- alias: 'r',
45
- describe: 'Attach resolved entities (author, catagories) linked with the subjects',
46
- type: 'boolean',
47
- })
48
- .option('transform', {
49
- alias: 't',
50
- describe: 'Apply transform function to the entities',
51
- });
52
- /*
53
- .option('limit', {
54
- alias: 'n',
55
- describe: 'Limit the amount of records',
56
- type: 'number',
57
- })
58
- */
59
- }
6
+ import { normalizeOptions, buildForEntities } from './utils.js';
60
7
 
61
8
  function build(yargs) {
62
9
  return buildForEntities(yargs)
@@ -76,6 +23,12 @@ async function run({ subcmd, count, terms, update, name, ...options }) {
76
23
  case 'count':
77
24
  await runCount(client, name, options);
78
25
  return;
26
+ case 'absence':
27
+ await runPresence(client, name, { present: false });
28
+ return;
29
+ case 'presence':
30
+ await runPresence(client, name, { present: true });
31
+ return;
79
32
  }
80
33
  if (count) {
81
34
  await runCount(client, name, options);
@@ -99,20 +52,17 @@ export async function runTerms(client, name, options) {
99
52
  }
100
53
  }
101
54
 
102
- export async function runGet(client, name, { transform, ...options }) {
103
- await stream.pipelineToStdout(
104
- await client.entities(name).stream({
105
- ...options,
106
- transform: await normalizeTransform(transform),
107
- }),
108
- stream.stringify(),
55
+ export async function runGet(client, name, options) {
56
+ await pipeline(
57
+ await client.entities(name).stream(options),
58
+ new stream.OutputStream(),
109
59
  );
110
60
  }
111
61
 
112
62
  export async function runIds(client, name, { update, transform, resolve, fields, ...options }) {
113
63
  if (update) {
114
64
  await stream.pipeline(
115
- await buildUpdateStream(client, name, update, { ...options, fields: ['id', 'modified_gmt'] }),
65
+ await buildUpdateStream(client, name, update, { ...options, fields: ['id'] }),
116
66
  new Transform({
117
67
  objectMode: true,
118
68
  transform({ id }, _, callback) {
@@ -136,13 +86,23 @@ export async function runUpdate(client, name, update, options) {
136
86
  );
137
87
  }
138
88
 
89
+ export async function runPresence(client, name, options) {
90
+ await stream.pipeline(
91
+ process.stdin,
92
+ split2(),
93
+ client.entities(name).presence(options),
94
+ new stream.OutputStream({
95
+ objectMode: false,
96
+ }),
97
+ );
98
+ }
99
+
139
100
  async function buildUpdateStream(client, name, update, {
140
101
  date, after, before, orderBy, order, // strip off date filters and order criteria
141
102
  transform,
142
103
  ...options
143
104
  }) {
144
105
  // TODO: move the logic into client itself
145
- transform = await normalizeTransform(transform);
146
106
  const now = Date.now();
147
107
  update = parseDuration(update);
148
108
  const threshold = now - update;
@@ -156,6 +116,14 @@ async function buildUpdateStream(client, name, update, {
156
116
  after: threshold,
157
117
  }),
158
118
  // get recent modified, excluding ones already fetched
119
+ entities.stream({
120
+ ...options,
121
+ transform,
122
+ orderBy: 'modified',
123
+ modifiedAfter: threshold,
124
+ before: threshold,
125
+ }),
126
+ /*
159
127
  entities.stream({
160
128
  ...options,
161
129
  transform,
@@ -168,6 +136,7 @@ async function buildUpdateStream(client, name, update, {
168
136
  terminate: entity => parseDate(entity.modified_gmt) < threshold,
169
137
  },
170
138
  })
139
+ */
171
140
  ])
172
141
  );
173
142
  }
package/cli/index.js CHANGED
@@ -4,6 +4,8 @@ import version from '../src/version.js';
4
4
  import { profile, init } from './profile.js';
5
5
  import taxonomies from './taxonomies.js';
6
6
  import entities from './entities.js';
7
+ import summarize from './summarize.js';
8
+ import download from './download.js';
7
9
 
8
10
  yargs.build(yargs => {
9
11
  yargs
@@ -16,6 +18,9 @@ yargs.build(yargs => {
16
18
  alias: 'p',
17
19
  describe: 'Site profile file location',
18
20
  })
21
+ .option('auth', {
22
+ describe: 'Authentication string',
23
+ })
19
24
  .option('debug', {
20
25
  type: 'boolean',
21
26
  default: false,
@@ -23,6 +28,8 @@ yargs.build(yargs => {
23
28
  .hide('debug')
24
29
  .command(init)
25
30
  .command(profile)
31
+ .command(summarize)
32
+ .command(download)
26
33
  .command(taxonomies)
27
34
  .command(entities)
28
35
  .version(version);
@@ -0,0 +1,57 @@
1
+ import { startOfDate, endOfDate, getYear } from '@miso.ai/server-commons';
2
+ import { WordPressClient } from '../src/index.js';
3
+
4
+ function build(yargs) {
5
+ return yargs;
6
+ }
7
+
8
+ async function run({ ...options } = {}) {
9
+ const client = new WordPressClient(options);
10
+ const [total, [firstPostDate, lastPostDate]] = await Promise.all([
11
+ client.posts.count(options),
12
+ client.posts.dateRange(),
13
+ ]);
14
+ const totalStrLength = `${total}`.length;
15
+ console.log();
16
+ console.log(`Total posts: ${total}`);
17
+ console.log(`First post at: ${firstPostDate}`);
18
+ console.log(`Last post at: ${lastPostDate}`);
19
+
20
+ // drill down by year
21
+ console.log();
22
+ const bar = `| ---- | ${'-'.repeat(totalStrLength)} |`;
23
+ console.log(bar);
24
+ console.log(`| Year | ${'Posts'.padStart(totalStrLength)} |`);
25
+ console.log(bar);
26
+ for (let year = getYear(firstPostDate), lastYear = getYear(lastPostDate); year <= lastYear; year++) {
27
+ const after = startOfDate(year);
28
+ const before = endOfDate(year);
29
+ const count = await client.posts.count({ ...options, after, before });
30
+ console.log(`| ${year} | ${`${count}`.padStart(totalStrLength)} |`);
31
+ }
32
+ console.log(bar);
33
+ }
34
+
35
+ function printTable(arr) {
36
+ arr = arr.map((row) => row.map(str));
37
+ const colWidths = arr[0].map((_, i) => Math.max(...arr.map((row) => (row[i] || '').length)));
38
+ for (const row of arr) {
39
+ console.log(row.map((v, i) => rightPad(v, colWidths[i])).join(' '));
40
+ }
41
+ }
42
+
43
+ function str(value) {
44
+ return value === undefined ? '--' : `${value}`;
45
+ }
46
+
47
+ function rightPad(str = '', length) {
48
+ return str.padEnd(length);
49
+ }
50
+
51
+ export default {
52
+ command: 'summarize',
53
+ aliases: ['sum'],
54
+ desc: 'Print out a summary of the WordPress site',
55
+ builder: build,
56
+ handler: run,
57
+ };
package/cli/utils.js CHANGED
@@ -1,24 +1,68 @@
1
- import { join } from 'path';
2
1
  import { startOfDate, endOfDate } from '@miso.ai/server-commons';
3
2
 
4
- const PWD = process.env.PWD;
5
-
6
- export function normalizeOptions({ date, after, before, ids, ...options }) {
3
+ export function normalizeOptions({ date, after, before, ids, include, ...options }) {
4
+ // TODO: should be able to turn this off, as it's covered by helper
7
5
  [after, before] = [startOfDate(date || after), endOfDate(date || before)];
6
+ // TODO: rely on yargs to coerce to array
8
7
  ids = ids ? `${ids}`.split(',').map(s => s.trim()) : ids;
9
8
  return { ...options, after, before, ids };
10
9
  }
11
10
 
12
- export async function normalizeTransform(transform) {
13
- if (typeof transform === 'string') {
14
- if (transform === 'default' || transform === 'legacy') {
15
- return transform;
16
- }
17
- return (await import(join(PWD, transform))).default;
18
- }
19
- return !!transform;
20
- }
21
-
11
+ /*
22
12
  export function parseDate(value) {
23
13
  return Date.parse(`${value}Z`);
24
14
  }
15
+ */
16
+
17
+ export function buildForEntities(yargs) {
18
+ // TODO: make them mutually exclusive
19
+ return yargs
20
+ .option('terms', {
21
+ describe: 'Display terms associated with this type of resource',
22
+ type: 'boolean',
23
+ })
24
+ .option('count', {
25
+ alias: 'c',
26
+ describe: 'Return the total number of records',
27
+ type: 'boolean',
28
+ })
29
+ .option('date', {
30
+ alias: 'd',
31
+ describe: 'Only include records in this year/month/day',
32
+ })
33
+ .option('after', {
34
+ alias: 'a',
35
+ describe: 'Only include records after this time',
36
+ })
37
+ .option('before', {
38
+ alias: 'b',
39
+ describe: 'Only include records before this time',
40
+ })
41
+ .option('update', {
42
+ alias: 'u',
43
+ describe: 'Only include records modified in given duration (3h, 2d, etc.)',
44
+ })
45
+ .option('ids', {
46
+ alias: 'include',
47
+ describe: 'Specify post ids',
48
+ })
49
+ .option('fields', {
50
+ describe: 'Specify which record fields are retrieved',
51
+ type: 'array',
52
+ coerce: yargs.coerceToArray,
53
+ })
54
+ .option('resolve', {
55
+ alias: 'r',
56
+ describe: 'Attach resolved entities (author, catagories) linked with the subjects',
57
+ type: 'boolean',
58
+ })
59
+ .option('transform', {
60
+ alias: 't',
61
+ describe: 'Apply transform function to the entities',
62
+ })
63
+ .option('limit', {
64
+ alias: 'n',
65
+ describe: 'Limit the amount of records',
66
+ type: 'number',
67
+ });
68
+ }
package/package.json CHANGED
@@ -17,9 +17,9 @@
17
17
  "simonpai <simon.pai@askmiso.com>"
18
18
  ],
19
19
  "dependencies": {
20
- "@miso.ai/server-commons": "0.6.3-beta.2",
21
- "axios": "^0.27.2",
20
+ "@miso.ai/server-commons": "0.6.3-beta.20",
21
+ "axios": "^1.6.2",
22
22
  "axios-retry": "^3.3.1"
23
23
  },
24
- "version": "0.6.3-beta.2"
24
+ "version": "0.6.3-beta.20"
25
25
  }
package/src/client.js CHANGED
@@ -70,7 +70,7 @@ export default class WordPressClient {
70
70
 
71
71
  }
72
72
 
73
- const SITE_PROFILE_PROPS = ['site', 'utcOffset'];
73
+ const SITE_PROFILE_PROPS = ['site', 'utcOffset', 'resources', 'defaults'];
74
74
 
75
75
  class SiteProfile {
76
76
 
@@ -2,7 +2,7 @@ import { asArray, Resolution } from '@miso.ai/server-commons';
2
2
 
3
3
  export default class EntityIndex {
4
4
 
5
- constructor(entities, { process, value } = {}) {
5
+ constructor(entities, { process, value, fields } = {}) {
6
6
  this._entities = entities;
7
7
  if (process) {
8
8
  this._process = process;
@@ -10,6 +10,7 @@ export default class EntityIndex {
10
10
  if (value) {
11
11
  this._value = (en => en && value(en)); // null-safe
12
12
  }
13
+ this._fields = fields;
13
14
  this.name = entities.name;
14
15
  this._index = new Map();
15
16
  this._notFound = new Set();
@@ -49,7 +50,7 @@ export default class EntityIndex {
49
50
  if (this.hierarchical) {
50
51
  return; // already all fetched
51
52
  }
52
- ids = asArray(ids);
53
+ ids = asArray(ids).filter(id => id); // discard 0, null, undefined
53
54
 
54
55
  const promises = []
55
56
  const idsToFetch = [];
@@ -66,7 +67,7 @@ export default class EntityIndex {
66
67
  if (idsToFetch.length > 0) {
67
68
  (async () => {
68
69
  const idsFetchSet = new Set(idsToFetch);
69
- const stream = await this._entities.stream({ ids: idsToFetch });
70
+ const stream = await this._entities.stream({ ids: idsToFetch, fields: this._fields });
70
71
  for await (const entity of stream) {
71
72
  const { id } = entity;
72
73
  this._index.set(id, this._process(entity));
@@ -84,8 +85,11 @@ export default class EntityIndex {
84
85
  }
85
86
 
86
87
  _resolveFetch(id) {
87
- this._fetching.get(id).resolve();
88
- this._fetching.delete(id);
88
+ const res = this._fetching.get(id);
89
+ if (res) {
90
+ res.resolve();
91
+ this._fetching.delete(id);
92
+ }
89
93
  }
90
94
 
91
95
  async get(id) {
@@ -95,13 +99,14 @@ export default class EntityIndex {
95
99
  }
96
100
 
97
101
  async getAll(ids) {
102
+ ids = ids.filter(id => id); // discard 0, null, undefined
98
103
  await this._dataReady();
99
104
  await this.fetch(ids);
100
105
  return ids.map(id => this._index.get(id));
101
106
  }
102
107
 
103
108
  async getValue(id) {
104
- if (id === undefined) {
109
+ if (!id) { // 0, null, undefined
105
110
  return undefined;
106
111
  }
107
112
  return this._value(await this.get(id));
@@ -1,7 +1,9 @@
1
+ import { join } from 'path';
1
2
  import { Transform } from 'stream';
2
- import { asArray, stream } from '@miso.ai/server-commons';
3
+ import { asArray, stream, getYear } from '@miso.ai/server-commons';
3
4
  import EntityIndex from './entity-index.js';
4
5
  import EntityTransformStream from './transform.js';
6
+ import EntityPresenceStream from './presence.js';
5
7
  import defaultTransform from './transform-default.js';
6
8
  import legacyTransform from './transform-legacy.js';
7
9
 
@@ -19,19 +21,23 @@ export default class Entities {
19
21
  if (!resolve && !transform) {
20
22
  return this._client._helpers.stream(this.name, options);
21
23
  }
22
- transform = getTransformFn(transform);
23
-
24
24
  const client = this._client;
25
+ transform = await getTransformFn(client, this.name, transform);
25
26
 
26
27
  // we need taxonomy fetched so we know whether it's hierarchical
27
28
  const taxonomies = await client._helpers.findAssociatedTaxonomies(this.name);
28
29
 
30
+ // TODO: omit specific indicies by config
29
31
  // prepare entity indicies
32
+ const { resources = {} } = client._profile || {};
33
+ const ignored = new Set(resources.ignore || []);
34
+
30
35
  const indicies = [
31
36
  client.users.index,
32
37
  client.media.index,
33
38
  ...taxonomies.map(({ rest_base }) => client.entities(rest_base).index),
34
- ];
39
+ ].filter(index => !ignored.has(index.name));
40
+
35
41
  await Promise.all(indicies.map(index => index.ready()));
36
42
  for (const index of indicies) {
37
43
  if (index.hierarchical) {
@@ -56,12 +62,7 @@ export default class Entities {
56
62
  }
57
63
 
58
64
  async ids(options = {}) {
59
- const { before, after, u } = options;
60
- const fields = ['id'];
61
- if (before || after) {
62
- fields.push('modified_gmt');
63
- }
64
- return (await this._client._helpers.stream(this.name, { ...options, fields }))
65
+ return (await this._client._helpers.stream(this.name, { ...options, fields: ['id'] }))
65
66
  .pipe(new Transform({
66
67
  objectMode: true,
67
68
  transform({ id }, _, callback) {
@@ -71,6 +72,10 @@ export default class Entities {
71
72
  }
72
73
 
73
74
  async getAll(options) {
75
+ return this.all(options);
76
+ }
77
+
78
+ async all(options) {
74
79
  return stream.collect(await this.stream(options));
75
80
  }
76
81
 
@@ -82,6 +87,22 @@ export default class Entities {
82
87
  return this._client._helpers.terms(this.name, options);
83
88
  }
84
89
 
90
+ presence(options) {
91
+ return new EntityPresenceStream(this._client, this.name, options);
92
+ }
93
+
94
+ async dateRange() {
95
+ // TODO: options?
96
+ return Promise.all([
97
+ getPostDate(this._client, 'asc'),
98
+ getPostDate(this._client, 'desc'),
99
+ ]);
100
+ }
101
+
102
+ async yearRange() {
103
+ return (await this.dateRange()).map(getYear);
104
+ }
105
+
85
106
  get index() {
86
107
  return this._index;
87
108
  }
@@ -96,6 +117,30 @@ export default class Entities {
96
117
 
97
118
  }
98
119
 
120
+ async function getTransformFn(client, name, transform) {
121
+ switch (transform) {
122
+ case 'default':
123
+ return defaultTransform;
124
+ case 'legacy':
125
+ return legacyTransform;
126
+ }
127
+ if (transform === true) {
128
+ const { defaults } = client._profile || {};
129
+ if (!defaults || !defaults.transform || !defaults.transform[name]) {
130
+ return defaultTransform;
131
+ }
132
+ transform = defaults.transform[name];
133
+ }
134
+ if (typeof transform === 'string') {
135
+ // try as file path
136
+ transform = (await import(join(process.env.PWD, transform))).default;
137
+ }
138
+ if (typeof transform === 'function') {
139
+ return post => transform(post, { defaultTransform });
140
+ }
141
+ return undefined;
142
+ }
143
+
99
144
  function aggregateIds(records, propName) {
100
145
  return Array.from(records.reduce((idSet, record) => {
101
146
  for (const id of asArray(record[propName])) {
@@ -105,8 +150,6 @@ function aggregateIds(records, propName) {
105
150
  }, new Set()));
106
151
  }
107
152
 
108
- function getTransformFn(transform) {
109
- return typeof transform === 'function' ? post => transform(post, { defaultTransform }) :
110
- (transform === true || transform === 'default') ? defaultTransform :
111
- transform === 'legacy' ? legacyTransform : undefined;
153
+ async function getPostDate(client, order, options = {}) {
154
+ return (await client.posts.getAll({ ...options, limit: 1, order, fields: ['date_gmt'] }))[0].date_gmt;
112
155
  }
@@ -0,0 +1,105 @@
1
+ import { Transform } from 'stream';
2
+
3
+ export default class EntityPresenceStream extends Transform {
4
+
5
+ constructor(client, name, {
6
+ present = true,
7
+ fetchSize = 20,
8
+ preserveOrder = true,
9
+ } = {}) {
10
+ super();
11
+ this._client = client;
12
+ this._name = name;
13
+ this._options = {
14
+ present,
15
+ fetchSize,
16
+ preserveOrder,
17
+ }
18
+ this._inputs = [];
19
+ this._pendingSet = new Set();
20
+ this._requests = [];
21
+ this._map = new Map();
22
+ this._done = false;
23
+ }
24
+
25
+ _transform(id, _, next) {
26
+ id = `${id}`; // buffer -> string
27
+ if (id) {
28
+ this._inputs.push(id);
29
+ this._outputAll();
30
+ this._requestAll();
31
+ }
32
+ next();
33
+ }
34
+
35
+ _flush(done) {
36
+ this._done = done;
37
+ this._outputAll();
38
+ if (this._inputs.length > 0) {
39
+ this._requestAll(true);
40
+ }
41
+ }
42
+
43
+ _outputAll() {
44
+ // TODO: implement when preserveOrder = false
45
+ let i = 0;
46
+ for (const len = this._inputs.length; i < len; i++) {
47
+ const id = this._inputs[i];
48
+ const entry = this._map.get(id);
49
+ if (!entry || entry.value === undefined) {
50
+ break;
51
+ }
52
+ if (this._options.present === entry.value) {
53
+ this.push(id);
54
+ }
55
+ }
56
+ if (i > 0) {
57
+ this._inputs = this._inputs.slice(i);
58
+ }
59
+ if (this._done && this._inputs.length === 0) {
60
+ this._done();
61
+ }
62
+ }
63
+
64
+ _requestAll(flush = false) {
65
+ for (const id of this._inputs) {
66
+ this._fetchAll();
67
+ if (!this._map.has(id)) {
68
+ this._map.set(id, { status: 'pending' });
69
+ this._pendingSet.add(id);
70
+ }
71
+ }
72
+ this._fetchAll(flush);
73
+ }
74
+
75
+ async _fetchAll(flush = false) {
76
+ if (!flush && this._pendingSet.size < this._options.fetchSize) {
77
+ return;
78
+ }
79
+ const ids = Array.from(this._pendingSet);
80
+ for (const id of ids) {
81
+ this._map.get(id).status = 'fetching';
82
+ }
83
+ this._pendingSet = new Set();
84
+
85
+ const presences = await this._fetch(ids);
86
+
87
+ for (const id of ids) {
88
+ const entry = this._map.get(id);
89
+ entry.status = 'ready';
90
+ entry.value = presences.has(id);
91
+ }
92
+ this._outputAll();
93
+ }
94
+
95
+ async _fetch(ids) {
96
+ const url = await this._client._helpers.url.build(this._name, { include: ids, fields: ['id'] });
97
+ const { data } = await this._client._helpers.axios.get(url);
98
+ const presences = new Set();
99
+ for (const { id } of data) {
100
+ presences.add(`${id}`);
101
+ }
102
+ return presences;
103
+ }
104
+
105
+ }
@@ -14,14 +14,14 @@ export default function transform({
14
14
  modified_gmt,
15
15
  guid: {
16
16
  rendered: guid,
17
- },
17
+ } = {},
18
18
  slug,
19
19
  title: {
20
20
  rendered: title,
21
- },
21
+ } = {},
22
22
  content: {
23
23
  rendered: html,
24
- },
24
+ } = {},
25
25
  link: url,
26
26
  status,
27
27
  sticky,
@@ -42,6 +42,7 @@ export default function transform({
42
42
  product_id,
43
43
  type,
44
44
  created_at,
45
+ published_at: created_at,
45
46
  updated_at,
46
47
  title,
47
48
  cover_image,
package/src/helpers.js CHANGED
@@ -1,24 +1,52 @@
1
- import { asNumber, splitObj, stream } from '@miso.ai/server-commons';
2
- import axios from './axios.js';
1
+ import axios from 'axios';
2
+ import axiosRetry from 'axios-retry';
3
+ import { asNumber, splitObj, stream, startOfDate, endOfDate } from '@miso.ai/server-commons';
3
4
  import DataSource from './source/index.js';
5
+ import version from './version.js';
4
6
 
5
7
  const MS_PER_HOUR = 1000 * 60 * 60;
6
8
 
7
- const STREAM_OPTIONS = ['offset', 'limit', 'strategy', 'filter', 'transform', 'onLoad'];
9
+ const STREAM_OPTIONS = ['offset', 'strategy', 'filter', 'transform', 'onLoad'];
10
+
11
+ function createAxios(client) {
12
+ const { auth } = client._options || {};
13
+ const headers = {
14
+ 'User-Agent': `MisoBot/${version}`,
15
+ };
16
+ if (auth) {
17
+ if (typeof auth === 'object' && auth.username && auth.password) {
18
+ auth = `${auth.username}:${auth.password}`;
19
+ }
20
+ if (typeof auth !== 'string') {
21
+ throw new TypeError(`Invalid auth: must me a string or an object.`);
22
+ }
23
+ headers['Authorization'] = 'Basic ' + Buffer.from(auth).toString('base64');
24
+ }
25
+ const instance = axios.create({
26
+ headers,
27
+ });
28
+ axiosRetry(instance, { retries: 5, retryDelay: count => count * 300 });
29
+ return instance;
30
+ }
8
31
 
9
32
  export default class Helpers {
10
33
 
11
34
  constructor(client) {
12
35
  this._start = Date.now();
13
36
  this._client = client;
37
+ this._axios = createAxios(client);
14
38
  this.url = new Url(this);
15
39
  this._samples = {};
16
40
  this.debug = this.debug.bind(this);
17
41
  }
18
42
 
19
- async stream(resource, options) {
43
+ get axios() {
44
+ return this._axios;
45
+ }
46
+
47
+ async stream(resource, options = {}) {
20
48
  const [streamOptions, sourceOptions] = splitObj(options, STREAM_OPTIONS);
21
- const source = new DataSource(this, resource, sourceOptions);
49
+ const source = options.source || new DataSource(this, resource, sourceOptions);
22
50
  return new stream.BufferedReadStream(source, { ...streamOptions, debug: this.debug });
23
51
  }
24
52
 
@@ -32,7 +60,7 @@ export default class Helpers {
32
60
 
33
61
  async _fetchSample(resource) {
34
62
  const url = await this.url.build(resource, { page: 0, pageSize: 1 });
35
- const { data, headers } = await axios.get(url);
63
+ const { data, headers } = await this.axios.get(url);
36
64
  if (!data.length) {
37
65
  throw new Error(`No record of ${resource} avaliable`);
38
66
  }
@@ -71,7 +99,7 @@ export default class Helpers {
71
99
 
72
100
  async _fetchTaxonomies() {
73
101
  const url = await this.url.build('taxonomies');
74
- const { data } = await axios.get(url);
102
+ const { data } = await this.axios.get(url);
75
103
  this.debug(`Fetched taxonomies.`);
76
104
  return Object.values(data);
77
105
  }
@@ -82,7 +110,7 @@ export default class Helpers {
82
110
 
83
111
  async count(resource, { offset: _, ...options } = {}) {
84
112
  const url = await this.url.build(resource, { ...options, page: 0, pageSize: 1 });
85
- const { headers } = await axios.get(url);
113
+ const { headers } = await this.axios.get(url);
86
114
  return asNumber(headers['x-wp-total']);
87
115
  }
88
116
 
@@ -92,7 +120,7 @@ export default class Helpers {
92
120
 
93
121
  async countUrl(url) {
94
122
  url = await this.url.append(url, { page: 0, pageSize: 1 });
95
- const { headers } = await axios.get(url);
123
+ const { headers } = await this.axios.get(url);
96
124
  return asNumber(headers['x-wp-total']);
97
125
  }
98
126
 
@@ -133,18 +161,34 @@ class Url {
133
161
 
134
162
  // modifiedAfter, modifiedBefore is supported since WordPress 5.7
135
163
  // https://make.wordpress.org/core/2021/02/23/rest-api-changes-in-wordpress-5-7/
136
- async append(url, options = {}) {
137
- const { after, before, order, orderBy, page, pageSize, offset, include, exclude } = options;
138
- let { fields } = options;
164
+ async append(url, {
165
+ date,
166
+ after,
167
+ before,
168
+ modifiedAfter,
169
+ modifiedBefore,
170
+ order,
171
+ orderBy,
172
+ page,
173
+ pageSize,
174
+ offset,
175
+ include,
176
+ exclude,
177
+ fields,
178
+ } = {}) {
139
179
  const params = [];
140
180
 
141
181
  // TODO: support single id
142
182
 
183
+ [after, before] = [startOfDate(date || after), endOfDate(date || before)];
184
+
143
185
  // The date is compared against site's local time, not UTC, so we have to work on timezone offset
144
- if (has(after) || has(before)) {
186
+ if (has(after) || has(before) || has(modifiedAfter) || has(modifiedBefore)) {
145
187
  const utcOffset = await this._helpers.utcOffsetInMs();
146
188
  has(after) && params.push(`after=${toISOString(after, utcOffset)}`);
147
189
  has(before) && params.push(`before=${toISOString(before, utcOffset)}`);
190
+ has(modifiedAfter) && params.push(`modified_after=${toISOString(modifiedAfter, utcOffset)}`);
191
+ has(modifiedBefore) && params.push(`modified_before=${toISOString(modifiedBefore, utcOffset)}`);
148
192
  }
149
193
 
150
194
  has(order) && params.push(`order=${order}`);
@@ -155,6 +199,7 @@ class Url {
155
199
  has(include) && include.length && params.push(`include=${joinIds(include)}`);
156
200
  has(exclude) && exclude.length && params.push(`exclude=${joinIds(exclude)}`);
157
201
  if (has(fields) && fields.length) {
202
+ // TODO: is this unused?
158
203
  if (has(before) && !fields.includes('modified_gmt')) {
159
204
  fields = [...fields, 'modified_gmt'];
160
205
  }
@@ -10,8 +10,12 @@ export default class Posts extends Entities {
10
10
  super(client, RESOURCE_NAME);
11
11
  }
12
12
 
13
- async getAll() {
14
- throw new Error(`Getting all posts is not supported.`);
13
+ async getAll(options = {}) {
14
+ if (!options.ids && !options.limit) {
15
+ // TODO: should be more tolerant
16
+ throw new Error(`Getting all posts is not supported.`);
17
+ }
18
+ return super.getAll(options);
15
19
  }
16
20
 
17
21
  async index() {
@@ -1,5 +1,3 @@
1
- import axios from '../axios.js';
2
-
3
1
  export default class WordPressDataSource {
4
2
 
5
3
  constructor(helpers, resource, options = {}) {
@@ -32,14 +30,21 @@ export default class WordPressDataSource {
32
30
  this._debug(`[WordPressDataSource] request ${url}`);
33
31
  const response = await this._axiosGet(url);
34
32
  this._debug(`[WordPressDataSource] response ${response.status} ${url}`);
35
- return this._process(response);
33
+ return this._process(response, { request, url });
36
34
  }
37
35
 
38
- _process({ status, data }) {
36
+ _process({ status, data }, { request, url }) {
39
37
  if (status >= 400 && status < 500 && data.code === 'rest_post_invalid_page_number') {
40
38
  // out of bound, so there is no more data
41
39
  return { data: [], terminate: true };
42
40
  }
41
+ if (!Array.isArray(data)) {
42
+ throw new Error(`Unexpected response from WordPress API for ${url}. Expected an array of objects: ${data}`);
43
+ }
44
+ const { records } = request;
45
+ if (records) {
46
+ data = data.slice(0, records);
47
+ }
43
48
  if (!this._options.preserveLinks) {
44
49
  data = data.map(this._helpers.removeLinks);
45
50
  }
@@ -52,13 +57,13 @@ export default class WordPressDataSource {
52
57
 
53
58
  async _buildBaseUrl() {
54
59
  // exclude parameters meant to be dealt with state
55
- const { page, ...options } = this._options;
60
+ const { page, ids, ...options } = this._options;
56
61
  return this._helpers.url.build(this._resource, options);
57
62
  }
58
63
 
59
64
  async _axiosGet(url) {
60
65
  try {
61
- return await axios.get(url);
66
+ return await this._helpers.axios.get(url);
62
67
  } catch(error) {
63
68
  if (error.response) {
64
69
  return error.response;
@@ -14,7 +14,7 @@ export default class PagedWordPressDataSource extends WordPressDataSource {
14
14
  if (pageSize > MAX_PAGE_SIZE) {
15
15
  throw new Error(`Page size cannot be greater than ${MAX_PAGE_SIZE}: ${pageSize}`);
16
16
  }
17
- // TODO: limit
17
+ this._limit = limit;
18
18
  this._pageSize = options.pageSize = pageSize;
19
19
  this._page = 0;
20
20
  }
@@ -25,10 +25,13 @@ export default class PagedWordPressDataSource extends WordPressDataSource {
25
25
 
26
26
  request() {
27
27
  const page = this._page++;
28
- const records = this._pageSize;
29
- const total = this._totalValue;
28
+ let records = this._pageSize;
29
+ const limit = combineLimit(this._totalValue, this._limit);
30
30
  // if we know total, we know when the data is exhausted
31
- const exhaust = total !== undefined && ((page + 1) * this._pageSize > total + 10); // 10 for a buffer
31
+ const exhaust = limit !== undefined && ((page + 1) * this._pageSize > limit);
32
+ if (exhaust && this._limit !== undefined) {
33
+ records = this._limit - (page * this._pageSize);
34
+ }
32
35
  return exhaust ? { records, page, exhaust } : { records, page };
33
36
  }
34
37
 
@@ -36,9 +39,18 @@ export default class PagedWordPressDataSource extends WordPressDataSource {
36
39
  return this._totalPromise || (this._totalPromise = this._fetchTotal());
37
40
  }
38
41
 
39
- async _url(baseUrl, { page }) {
42
+ async _url(baseUrl, { records, page }) {
40
43
  const head = baseUrl.indexOf('?') < 0 ? '?' : '&';
41
- return `${baseUrl}${head}page=${page + 1}`;
44
+ let url = `${baseUrl}${head}page=${page + 1}`;
45
+ // optimize: if limit < page size we can save much bandwidth
46
+ if (page === 0 && records < this._pageSize) {
47
+ if (url.indexOf('per_page=') > -1) {
48
+ url = url.replace(/per_page=\d+/, `per_page=${records}`);
49
+ } else {
50
+ url += `&per_page=${records}`;
51
+ }
52
+ }
53
+ return url;
42
54
  }
43
55
 
44
56
  async _fetchTotal() {
@@ -50,8 +62,8 @@ export default class PagedWordPressDataSource extends WordPressDataSource {
50
62
  return total;
51
63
  }
52
64
 
53
- _process({ status, data, headers }) {
54
- const result = super._process({ status, data, headers });
65
+ _process({ status, data, headers }, meta) {
66
+ const result = super._process({ status, data, headers }, meta);
55
67
  const total = asNumber(headers['x-wp-total']);
56
68
  if (total !== undefined) {
57
69
  result.total = total;
@@ -63,3 +75,9 @@ export default class PagedWordPressDataSource extends WordPressDataSource {
63
75
  }
64
76
 
65
77
  }
78
+
79
+ const TOTAL_BUFFER = 10;
80
+
81
+ function combineLimit(total, limit) {
82
+ return total === undefined ? limit : limit === undefined ? total + TOTAL_BUFFER : Math.min(total + TOTAL_BUFFER, limit);
83
+ }
package/src/version.js CHANGED
@@ -1 +1 @@
1
- export default '0.6.3-beta.2';
1
+ export default '0.6.3-beta.20';
package/src/axios.js DELETED
@@ -1,8 +0,0 @@
1
- import axios from 'axios';
2
- import axiosRetry from 'axios-retry';
3
-
4
- // TODO: create an instance
5
-
6
- axiosRetry(axios, { retries: 5, retryDelay: count => count * 300 });
7
-
8
- export default axios;