@miso.ai/server-wordpress 0.6.3-beta.9 → 0.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli/download.js +116 -0
- package/cli/entities.js +16 -65
- package/cli/index.js +7 -0
- package/cli/summarize.js +57 -0
- package/cli/utils.js +58 -14
- package/package.json +3 -3
- package/src/client.js +1 -1
- package/src/entities/entity-index.js +8 -4
- package/src/entities/index.js +46 -13
- package/src/entities/presence.js +2 -3
- package/src/helpers.js +55 -12
- package/src/posts/index.js +6 -2
- package/src/source/base.js +8 -6
- package/src/source/paged.js +24 -6
- package/src/version.js +1 -1
- package/src/axios.js +0 -8
package/cli/download.js
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import { createWriteStream } from 'fs';
|
|
2
|
+
import { access, mkdir } from 'fs/promises';
|
|
3
|
+
import { createGzip } from 'zlib';
|
|
4
|
+
import { startOfDate, endOfDate, stream } from '@miso.ai/server-commons';
|
|
5
|
+
import { WordPressClient } from '../src/index.js';
|
|
6
|
+
import { buildForEntities } from './utils.js';
|
|
7
|
+
|
|
8
|
+
function build(yargs) {
|
|
9
|
+
return buildForEntities(yargs);
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
async function run({
|
|
13
|
+
destination = './data',
|
|
14
|
+
batchSize = 30000,
|
|
15
|
+
...options
|
|
16
|
+
} = {}) {
|
|
17
|
+
const client = new WordPressClient(options);
|
|
18
|
+
|
|
19
|
+
// TODO: respect --after and --before
|
|
20
|
+
const [firstPostYear, lastPostYear] = await client.posts.yearRange();
|
|
21
|
+
|
|
22
|
+
// divide into batches
|
|
23
|
+
const batches = [];
|
|
24
|
+
let endYear, sum = 0;
|
|
25
|
+
for (let year = lastPostYear; year >= firstPostYear; year--) {
|
|
26
|
+
if (endYear === undefined) {
|
|
27
|
+
endYear = year;
|
|
28
|
+
}
|
|
29
|
+
const after = startOfDate(year);
|
|
30
|
+
const before = endOfDate(year);
|
|
31
|
+
const count = await client.posts.count({ ...options, after, before });
|
|
32
|
+
sum += count;
|
|
33
|
+
if (sum >= batchSize) {
|
|
34
|
+
batches.push({ start: year, end: endYear, records: sum });
|
|
35
|
+
endYear = undefined;
|
|
36
|
+
sum = 0;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
if (sum > 0) {
|
|
40
|
+
batches.push({ start: firstPostYear, end: endYear, records: sum });
|
|
41
|
+
}
|
|
42
|
+
const batchCount = batches.length;
|
|
43
|
+
if (batchCount === 0) {
|
|
44
|
+
console.log('No posts found.');
|
|
45
|
+
return;
|
|
46
|
+
}
|
|
47
|
+
// merge last two batches if last batch is too small
|
|
48
|
+
if (batches[batchCount - 1].records < batchSize * 0.2) {
|
|
49
|
+
const last = batches.pop();
|
|
50
|
+
batches[batchCount - 2].end = last.end;
|
|
51
|
+
batches[batchCount - 2].records += last.records;
|
|
52
|
+
}
|
|
53
|
+
console.log(`Divide into ${batchCount} batches:`);
|
|
54
|
+
for (const { start, end, records } of batches) {
|
|
55
|
+
console.log(`- ${start} -> ${end} (${records} records)`);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// mkdir -p
|
|
59
|
+
try {
|
|
60
|
+
await access(destination);
|
|
61
|
+
} catch (err) {
|
|
62
|
+
if (err.code !== 'ENOENT') {
|
|
63
|
+
throw err;
|
|
64
|
+
}
|
|
65
|
+
await mkdir(destination, { recursive: true });
|
|
66
|
+
console.log(`Created directory ${destination}`);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// download
|
|
70
|
+
let index = 0;
|
|
71
|
+
for (const batch of batches) {
|
|
72
|
+
const { start, end, records } = batch;
|
|
73
|
+
const after = startOfDate(start);
|
|
74
|
+
const before = endOfDate(end);
|
|
75
|
+
const filename = `${options.transform ? 'miso' : 'wp'}-posts.${start}-${end}.jsonl.gz`;
|
|
76
|
+
|
|
77
|
+
console.log(`[${index + 1} / ${batchCount}] Downloading ${filename}`);
|
|
78
|
+
|
|
79
|
+
const startTime = Date.now();
|
|
80
|
+
const sourceStream = await client.posts.stream({ ...options, after, before });
|
|
81
|
+
|
|
82
|
+
await stream.pipeline(
|
|
83
|
+
sourceStream,
|
|
84
|
+
stream.stringify(),
|
|
85
|
+
createGzip(),
|
|
86
|
+
createWriteStream(`${destination}/${filename}`),
|
|
87
|
+
);
|
|
88
|
+
|
|
89
|
+
const elapsed = Date.now() - startTime;
|
|
90
|
+
console.log(`[${index + 1} / ${batchCount}] Downloaded ${filename} (${records} records in ${formatDuration(elapsed)})`);
|
|
91
|
+
index++;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
console.log('Done.');
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function formatDuration(duration) {
|
|
98
|
+
const seconds = Math.floor(duration / 1000);
|
|
99
|
+
const minutes = Math.floor(seconds / 60);
|
|
100
|
+
const hours = Math.floor(minutes / 60);
|
|
101
|
+
if (hours > 0) {
|
|
102
|
+
return `${hours}h${minutes % 60}m${seconds % 60}s`;
|
|
103
|
+
}
|
|
104
|
+
if (minutes > 0) {
|
|
105
|
+
return `${minutes}m${seconds % 60}s`;
|
|
106
|
+
}
|
|
107
|
+
return `${seconds}s`;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
export default {
|
|
111
|
+
command: 'download',
|
|
112
|
+
aliases: ['down'],
|
|
113
|
+
desc: 'Download all posts and save as files.',
|
|
114
|
+
builder: build,
|
|
115
|
+
handler: run,
|
|
116
|
+
};
|
package/cli/entities.js
CHANGED
|
@@ -1,63 +1,9 @@
|
|
|
1
1
|
import { Transform } from 'stream';
|
|
2
|
+
import { pipeline } from 'stream/promises';
|
|
2
3
|
import split2 from 'split2';
|
|
3
4
|
import { stream, parseDuration } from '@miso.ai/server-commons';
|
|
4
5
|
import { WordPressClient } from '../src/index.js';
|
|
5
|
-
import { normalizeOptions,
|
|
6
|
-
|
|
7
|
-
export function buildForEntities(yargs) {
|
|
8
|
-
// TODO: make them mutually exclusive
|
|
9
|
-
return yargs
|
|
10
|
-
.option('terms', {
|
|
11
|
-
describe: 'Display terms associated with this type of resource',
|
|
12
|
-
type: 'boolean',
|
|
13
|
-
})
|
|
14
|
-
.option('count', {
|
|
15
|
-
alias: 'c',
|
|
16
|
-
describe: 'Return the total number of records',
|
|
17
|
-
type: 'boolean',
|
|
18
|
-
})
|
|
19
|
-
.option('date', {
|
|
20
|
-
alias: 'd',
|
|
21
|
-
describe: 'Only include records in this year/month/day',
|
|
22
|
-
})
|
|
23
|
-
.option('after', {
|
|
24
|
-
alias: 'a',
|
|
25
|
-
describe: 'Only include records after this time',
|
|
26
|
-
})
|
|
27
|
-
.option('before', {
|
|
28
|
-
alias: 'b',
|
|
29
|
-
describe: 'Only include records before this time',
|
|
30
|
-
})
|
|
31
|
-
.option('update', {
|
|
32
|
-
alias: 'u',
|
|
33
|
-
describe: 'Only include records modified in given duration (3h, 2d, etc.)',
|
|
34
|
-
})
|
|
35
|
-
.option('ids', {
|
|
36
|
-
alias: 'include',
|
|
37
|
-
describe: 'Specify post ids'
|
|
38
|
-
})
|
|
39
|
-
.option('fields', {
|
|
40
|
-
describe: 'Specify which record fields are retrieved',
|
|
41
|
-
type: 'array',
|
|
42
|
-
coerce: yargs.coerceToArray,
|
|
43
|
-
})
|
|
44
|
-
.option('resolve', {
|
|
45
|
-
alias: 'r',
|
|
46
|
-
describe: 'Attach resolved entities (author, catagories) linked with the subjects',
|
|
47
|
-
type: 'boolean',
|
|
48
|
-
})
|
|
49
|
-
.option('transform', {
|
|
50
|
-
alias: 't',
|
|
51
|
-
describe: 'Apply transform function to the entities',
|
|
52
|
-
});
|
|
53
|
-
/*
|
|
54
|
-
.option('limit', {
|
|
55
|
-
alias: 'n',
|
|
56
|
-
describe: 'Limit the amount of records',
|
|
57
|
-
type: 'number',
|
|
58
|
-
})
|
|
59
|
-
*/
|
|
60
|
-
}
|
|
6
|
+
import { normalizeOptions, buildForEntities } from './utils.js';
|
|
61
7
|
|
|
62
8
|
function build(yargs) {
|
|
63
9
|
return buildForEntities(yargs)
|
|
@@ -106,20 +52,17 @@ export async function runTerms(client, name, options) {
|
|
|
106
52
|
}
|
|
107
53
|
}
|
|
108
54
|
|
|
109
|
-
export async function runGet(client, name,
|
|
110
|
-
await
|
|
111
|
-
await client.entities(name).stream(
|
|
112
|
-
|
|
113
|
-
transform: await normalizeTransform(transform),
|
|
114
|
-
}),
|
|
115
|
-
stream.stringify(),
|
|
55
|
+
export async function runGet(client, name, options) {
|
|
56
|
+
await pipeline(
|
|
57
|
+
await client.entities(name).stream(options),
|
|
58
|
+
new stream.OutputStream(),
|
|
116
59
|
);
|
|
117
60
|
}
|
|
118
61
|
|
|
119
62
|
export async function runIds(client, name, { update, transform, resolve, fields, ...options }) {
|
|
120
63
|
if (update) {
|
|
121
64
|
await stream.pipeline(
|
|
122
|
-
await buildUpdateStream(client, name, update, { ...options, fields: ['id'
|
|
65
|
+
await buildUpdateStream(client, name, update, { ...options, fields: ['id'] }),
|
|
123
66
|
new Transform({
|
|
124
67
|
objectMode: true,
|
|
125
68
|
transform({ id }, _, callback) {
|
|
@@ -160,7 +103,6 @@ async function buildUpdateStream(client, name, update, {
|
|
|
160
103
|
...options
|
|
161
104
|
}) {
|
|
162
105
|
// TODO: move the logic into client itself
|
|
163
|
-
transform = await normalizeTransform(transform);
|
|
164
106
|
const now = Date.now();
|
|
165
107
|
update = parseDuration(update);
|
|
166
108
|
const threshold = now - update;
|
|
@@ -174,6 +116,14 @@ async function buildUpdateStream(client, name, update, {
|
|
|
174
116
|
after: threshold,
|
|
175
117
|
}),
|
|
176
118
|
// get recent modified, excluding ones already fetched
|
|
119
|
+
entities.stream({
|
|
120
|
+
...options,
|
|
121
|
+
transform,
|
|
122
|
+
orderBy: 'modified',
|
|
123
|
+
modifiedAfter: threshold,
|
|
124
|
+
before: threshold,
|
|
125
|
+
}),
|
|
126
|
+
/*
|
|
177
127
|
entities.stream({
|
|
178
128
|
...options,
|
|
179
129
|
transform,
|
|
@@ -186,6 +136,7 @@ async function buildUpdateStream(client, name, update, {
|
|
|
186
136
|
terminate: entity => parseDate(entity.modified_gmt) < threshold,
|
|
187
137
|
},
|
|
188
138
|
})
|
|
139
|
+
*/
|
|
189
140
|
])
|
|
190
141
|
);
|
|
191
142
|
}
|
package/cli/index.js
CHANGED
|
@@ -4,6 +4,8 @@ import version from '../src/version.js';
|
|
|
4
4
|
import { profile, init } from './profile.js';
|
|
5
5
|
import taxonomies from './taxonomies.js';
|
|
6
6
|
import entities from './entities.js';
|
|
7
|
+
import summarize from './summarize.js';
|
|
8
|
+
import download from './download.js';
|
|
7
9
|
|
|
8
10
|
yargs.build(yargs => {
|
|
9
11
|
yargs
|
|
@@ -16,6 +18,9 @@ yargs.build(yargs => {
|
|
|
16
18
|
alias: 'p',
|
|
17
19
|
describe: 'Site profile file location',
|
|
18
20
|
})
|
|
21
|
+
.option('auth', {
|
|
22
|
+
describe: 'Authentication string',
|
|
23
|
+
})
|
|
19
24
|
.option('debug', {
|
|
20
25
|
type: 'boolean',
|
|
21
26
|
default: false,
|
|
@@ -23,6 +28,8 @@ yargs.build(yargs => {
|
|
|
23
28
|
.hide('debug')
|
|
24
29
|
.command(init)
|
|
25
30
|
.command(profile)
|
|
31
|
+
.command(summarize)
|
|
32
|
+
.command(download)
|
|
26
33
|
.command(taxonomies)
|
|
27
34
|
.command(entities)
|
|
28
35
|
.version(version);
|
package/cli/summarize.js
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import { startOfDate, endOfDate, getYear } from '@miso.ai/server-commons';
|
|
2
|
+
import { WordPressClient } from '../src/index.js';
|
|
3
|
+
|
|
4
|
+
function build(yargs) {
|
|
5
|
+
return yargs;
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
async function run({ ...options } = {}) {
|
|
9
|
+
const client = new WordPressClient(options);
|
|
10
|
+
const [total, [firstPostDate, lastPostDate]] = await Promise.all([
|
|
11
|
+
client.posts.count(options),
|
|
12
|
+
client.posts.dateRange(),
|
|
13
|
+
]);
|
|
14
|
+
const totalStrLength = `${total}`.length;
|
|
15
|
+
console.log();
|
|
16
|
+
console.log(`Total posts: ${total}`);
|
|
17
|
+
console.log(`First post at: ${firstPostDate}`);
|
|
18
|
+
console.log(`Last post at: ${lastPostDate}`);
|
|
19
|
+
|
|
20
|
+
// drill down by year
|
|
21
|
+
console.log();
|
|
22
|
+
const bar = `| ---- | ${'-'.repeat(totalStrLength)} |`;
|
|
23
|
+
console.log(bar);
|
|
24
|
+
console.log(`| Year | ${'Posts'.padStart(totalStrLength)} |`);
|
|
25
|
+
console.log(bar);
|
|
26
|
+
for (let year = getYear(firstPostDate), lastYear = getYear(lastPostDate); year <= lastYear; year++) {
|
|
27
|
+
const after = startOfDate(year);
|
|
28
|
+
const before = endOfDate(year);
|
|
29
|
+
const count = await client.posts.count({ ...options, after, before });
|
|
30
|
+
console.log(`| ${year} | ${`${count}`.padStart(totalStrLength)} |`);
|
|
31
|
+
}
|
|
32
|
+
console.log(bar);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function printTable(arr) {
|
|
36
|
+
arr = arr.map((row) => row.map(str));
|
|
37
|
+
const colWidths = arr[0].map((_, i) => Math.max(...arr.map((row) => (row[i] || '').length)));
|
|
38
|
+
for (const row of arr) {
|
|
39
|
+
console.log(row.map((v, i) => rightPad(v, colWidths[i])).join(' '));
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function str(value) {
|
|
44
|
+
return value === undefined ? '--' : `${value}`;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function rightPad(str = '', length) {
|
|
48
|
+
return str.padEnd(length);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export default {
|
|
52
|
+
command: 'summarize',
|
|
53
|
+
aliases: ['sum'],
|
|
54
|
+
desc: 'Print out a summary of the WordPress site',
|
|
55
|
+
builder: build,
|
|
56
|
+
handler: run,
|
|
57
|
+
};
|
package/cli/utils.js
CHANGED
|
@@ -1,24 +1,68 @@
|
|
|
1
|
-
import { join } from 'path';
|
|
2
1
|
import { startOfDate, endOfDate } from '@miso.ai/server-commons';
|
|
3
2
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
export function normalizeOptions({ date, after, before, ids, ...options }) {
|
|
3
|
+
export function normalizeOptions({ date, after, before, ids, include, ...options }) {
|
|
4
|
+
// TODO: should be able to turn this off, as it's covered by helper
|
|
7
5
|
[after, before] = [startOfDate(date || after), endOfDate(date || before)];
|
|
6
|
+
// TODO: rely on yargs to coerce to array
|
|
8
7
|
ids = ids ? `${ids}`.split(',').map(s => s.trim()) : ids;
|
|
9
8
|
return { ...options, after, before, ids };
|
|
10
9
|
}
|
|
11
10
|
|
|
12
|
-
|
|
13
|
-
if (typeof transform === 'string') {
|
|
14
|
-
if (transform === 'default' || transform === 'legacy') {
|
|
15
|
-
return transform;
|
|
16
|
-
}
|
|
17
|
-
return (await import(join(PWD, transform))).default;
|
|
18
|
-
}
|
|
19
|
-
return !!transform;
|
|
20
|
-
}
|
|
21
|
-
|
|
11
|
+
/*
|
|
22
12
|
export function parseDate(value) {
|
|
23
13
|
return Date.parse(`${value}Z`);
|
|
24
14
|
}
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
export function buildForEntities(yargs) {
|
|
18
|
+
// TODO: make them mutually exclusive
|
|
19
|
+
return yargs
|
|
20
|
+
.option('terms', {
|
|
21
|
+
describe: 'Display terms associated with this type of resource',
|
|
22
|
+
type: 'boolean',
|
|
23
|
+
})
|
|
24
|
+
.option('count', {
|
|
25
|
+
alias: 'c',
|
|
26
|
+
describe: 'Return the total number of records',
|
|
27
|
+
type: 'boolean',
|
|
28
|
+
})
|
|
29
|
+
.option('date', {
|
|
30
|
+
alias: 'd',
|
|
31
|
+
describe: 'Only include records in this year/month/day',
|
|
32
|
+
})
|
|
33
|
+
.option('after', {
|
|
34
|
+
alias: 'a',
|
|
35
|
+
describe: 'Only include records after this time',
|
|
36
|
+
})
|
|
37
|
+
.option('before', {
|
|
38
|
+
alias: 'b',
|
|
39
|
+
describe: 'Only include records before this time',
|
|
40
|
+
})
|
|
41
|
+
.option('update', {
|
|
42
|
+
alias: 'u',
|
|
43
|
+
describe: 'Only include records modified in given duration (3h, 2d, etc.)',
|
|
44
|
+
})
|
|
45
|
+
.option('ids', {
|
|
46
|
+
alias: 'include',
|
|
47
|
+
describe: 'Specify post ids',
|
|
48
|
+
})
|
|
49
|
+
.option('fields', {
|
|
50
|
+
describe: 'Specify which record fields are retrieved',
|
|
51
|
+
type: 'array',
|
|
52
|
+
coerce: yargs.coerceToArray,
|
|
53
|
+
})
|
|
54
|
+
.option('resolve', {
|
|
55
|
+
alias: 'r',
|
|
56
|
+
describe: 'Attach resolved entities (author, catagories) linked with the subjects',
|
|
57
|
+
type: 'boolean',
|
|
58
|
+
})
|
|
59
|
+
.option('transform', {
|
|
60
|
+
alias: 't',
|
|
61
|
+
describe: 'Apply transform function to the entities',
|
|
62
|
+
})
|
|
63
|
+
.option('limit', {
|
|
64
|
+
alias: 'n',
|
|
65
|
+
describe: 'Limit the amount of records',
|
|
66
|
+
type: 'number',
|
|
67
|
+
});
|
|
68
|
+
}
|
package/package.json
CHANGED
|
@@ -17,9 +17,9 @@
|
|
|
17
17
|
"simonpai <simon.pai@askmiso.com>"
|
|
18
18
|
],
|
|
19
19
|
"dependencies": {
|
|
20
|
-
"@miso.ai/server-commons": "0.6.3
|
|
21
|
-
"axios": "^
|
|
20
|
+
"@miso.ai/server-commons": "0.6.3",
|
|
21
|
+
"axios": "^1.6.2",
|
|
22
22
|
"axios-retry": "^3.3.1"
|
|
23
23
|
},
|
|
24
|
-
"version": "0.6.3
|
|
24
|
+
"version": "0.6.3"
|
|
25
25
|
}
|
package/src/client.js
CHANGED
|
@@ -50,7 +50,7 @@ export default class EntityIndex {
|
|
|
50
50
|
if (this.hierarchical) {
|
|
51
51
|
return; // already all fetched
|
|
52
52
|
}
|
|
53
|
-
ids = asArray(ids);
|
|
53
|
+
ids = asArray(ids).filter(id => id); // discard 0, null, undefined
|
|
54
54
|
|
|
55
55
|
const promises = []
|
|
56
56
|
const idsToFetch = [];
|
|
@@ -85,8 +85,11 @@ export default class EntityIndex {
|
|
|
85
85
|
}
|
|
86
86
|
|
|
87
87
|
_resolveFetch(id) {
|
|
88
|
-
this._fetching.get(id)
|
|
89
|
-
|
|
88
|
+
const res = this._fetching.get(id);
|
|
89
|
+
if (res) {
|
|
90
|
+
res.resolve();
|
|
91
|
+
this._fetching.delete(id);
|
|
92
|
+
}
|
|
90
93
|
}
|
|
91
94
|
|
|
92
95
|
async get(id) {
|
|
@@ -96,13 +99,14 @@ export default class EntityIndex {
|
|
|
96
99
|
}
|
|
97
100
|
|
|
98
101
|
async getAll(ids) {
|
|
102
|
+
ids = ids.filter(id => id); // discard 0, null, undefined
|
|
99
103
|
await this._dataReady();
|
|
100
104
|
await this.fetch(ids);
|
|
101
105
|
return ids.map(id => this._index.get(id));
|
|
102
106
|
}
|
|
103
107
|
|
|
104
108
|
async getValue(id) {
|
|
105
|
-
if (id
|
|
109
|
+
if (!id) { // 0, null, undefined
|
|
106
110
|
return undefined;
|
|
107
111
|
}
|
|
108
112
|
return this._value(await this.get(id));
|
package/src/entities/index.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
|
+
import { join } from 'path';
|
|
1
2
|
import { Transform } from 'stream';
|
|
2
|
-
import { asArray, stream } from '@miso.ai/server-commons';
|
|
3
|
+
import { asArray, stream, getYear } from '@miso.ai/server-commons';
|
|
3
4
|
import EntityIndex from './entity-index.js';
|
|
4
5
|
import EntityTransformStream from './transform.js';
|
|
5
6
|
import EntityPresenceStream from './presence.js';
|
|
@@ -20,9 +21,8 @@ export default class Entities {
|
|
|
20
21
|
if (!resolve && !transform) {
|
|
21
22
|
return this._client._helpers.stream(this.name, options);
|
|
22
23
|
}
|
|
23
|
-
transform = getTransformFn(transform);
|
|
24
|
-
|
|
25
24
|
const client = this._client;
|
|
25
|
+
transform = await getTransformFn(client, this.name, transform);
|
|
26
26
|
|
|
27
27
|
// we need taxonomy fetched so we know whether it's hierarchical
|
|
28
28
|
const taxonomies = await client._helpers.findAssociatedTaxonomies(this.name);
|
|
@@ -62,12 +62,7 @@ export default class Entities {
|
|
|
62
62
|
}
|
|
63
63
|
|
|
64
64
|
async ids(options = {}) {
|
|
65
|
-
|
|
66
|
-
const fields = ['id'];
|
|
67
|
-
if (before || after) {
|
|
68
|
-
fields.push('modified_gmt');
|
|
69
|
-
}
|
|
70
|
-
return (await this._client._helpers.stream(this.name, { ...options, fields }))
|
|
65
|
+
return (await this._client._helpers.stream(this.name, { ...options, fields: ['id'] }))
|
|
71
66
|
.pipe(new Transform({
|
|
72
67
|
objectMode: true,
|
|
73
68
|
transform({ id }, _, callback) {
|
|
@@ -77,6 +72,10 @@ export default class Entities {
|
|
|
77
72
|
}
|
|
78
73
|
|
|
79
74
|
async getAll(options) {
|
|
75
|
+
return this.all(options);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
async all(options) {
|
|
80
79
|
return stream.collect(await this.stream(options));
|
|
81
80
|
}
|
|
82
81
|
|
|
@@ -92,6 +91,18 @@ export default class Entities {
|
|
|
92
91
|
return new EntityPresenceStream(this._client, this.name, options);
|
|
93
92
|
}
|
|
94
93
|
|
|
94
|
+
async dateRange() {
|
|
95
|
+
// TODO: options?
|
|
96
|
+
return Promise.all([
|
|
97
|
+
getPostDate(this._client, 'asc'),
|
|
98
|
+
getPostDate(this._client, 'desc'),
|
|
99
|
+
]);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
async yearRange() {
|
|
103
|
+
return (await this.dateRange()).map(getYear);
|
|
104
|
+
}
|
|
105
|
+
|
|
95
106
|
get index() {
|
|
96
107
|
return this._index;
|
|
97
108
|
}
|
|
@@ -106,6 +117,30 @@ export default class Entities {
|
|
|
106
117
|
|
|
107
118
|
}
|
|
108
119
|
|
|
120
|
+
async function getTransformFn(client, name, transform) {
|
|
121
|
+
switch (transform) {
|
|
122
|
+
case 'default':
|
|
123
|
+
return defaultTransform;
|
|
124
|
+
case 'legacy':
|
|
125
|
+
return legacyTransform;
|
|
126
|
+
}
|
|
127
|
+
if (transform === true) {
|
|
128
|
+
const { defaults } = client._profile || {};
|
|
129
|
+
if (!defaults || !defaults.transform || !defaults.transform[name]) {
|
|
130
|
+
return defaultTransform;
|
|
131
|
+
}
|
|
132
|
+
transform = defaults.transform[name];
|
|
133
|
+
}
|
|
134
|
+
if (typeof transform === 'string') {
|
|
135
|
+
// try as file path
|
|
136
|
+
transform = (await import(join(process.env.PWD, transform))).default;
|
|
137
|
+
}
|
|
138
|
+
if (typeof transform === 'function') {
|
|
139
|
+
return post => transform(post, { defaultTransform });
|
|
140
|
+
}
|
|
141
|
+
return undefined;
|
|
142
|
+
}
|
|
143
|
+
|
|
109
144
|
function aggregateIds(records, propName) {
|
|
110
145
|
return Array.from(records.reduce((idSet, record) => {
|
|
111
146
|
for (const id of asArray(record[propName])) {
|
|
@@ -115,8 +150,6 @@ function aggregateIds(records, propName) {
|
|
|
115
150
|
}, new Set()));
|
|
116
151
|
}
|
|
117
152
|
|
|
118
|
-
function
|
|
119
|
-
return
|
|
120
|
-
(transform === true || transform === 'default') ? defaultTransform :
|
|
121
|
-
transform === 'legacy' ? legacyTransform : undefined;
|
|
153
|
+
async function getPostDate(client, order, options = {}) {
|
|
154
|
+
return (await client.posts.getAll({ ...options, limit: 1, order, fields: ['date_gmt'] }))[0].date_gmt;
|
|
122
155
|
}
|
package/src/entities/presence.js
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import { Transform } from 'stream';
|
|
2
|
-
import axios from '../axios.js';
|
|
3
2
|
|
|
4
3
|
export default class EntityPresenceStream extends Transform {
|
|
5
4
|
|
|
@@ -23,7 +22,7 @@ export default class EntityPresenceStream extends Transform {
|
|
|
23
22
|
this._done = false;
|
|
24
23
|
}
|
|
25
24
|
|
|
26
|
-
|
|
25
|
+
_transform(id, _, next) {
|
|
27
26
|
id = `${id}`; // buffer -> string
|
|
28
27
|
if (id) {
|
|
29
28
|
this._inputs.push(id);
|
|
@@ -95,7 +94,7 @@ export default class EntityPresenceStream extends Transform {
|
|
|
95
94
|
|
|
96
95
|
async _fetch(ids) {
|
|
97
96
|
const url = await this._client._helpers.url.build(this._name, { include: ids, fields: ['id'] });
|
|
98
|
-
const { data } = await axios.get(url);
|
|
97
|
+
const { data } = await this._client._helpers.axios.get(url);
|
|
99
98
|
const presences = new Set();
|
|
100
99
|
for (const { id } of data) {
|
|
101
100
|
presences.add(`${id}`);
|
package/src/helpers.js
CHANGED
|
@@ -1,24 +1,52 @@
|
|
|
1
|
-
import
|
|
2
|
-
import
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
import axiosRetry from 'axios-retry';
|
|
3
|
+
import { asNumber, splitObj, stream, startOfDate, endOfDate } from '@miso.ai/server-commons';
|
|
3
4
|
import DataSource from './source/index.js';
|
|
5
|
+
import version from './version.js';
|
|
4
6
|
|
|
5
7
|
const MS_PER_HOUR = 1000 * 60 * 60;
|
|
6
8
|
|
|
7
|
-
const STREAM_OPTIONS = ['offset', '
|
|
9
|
+
const STREAM_OPTIONS = ['offset', 'strategy', 'filter', 'transform', 'onLoad'];
|
|
10
|
+
|
|
11
|
+
function createAxios(client) {
|
|
12
|
+
const { auth } = client._options || {};
|
|
13
|
+
const headers = {
|
|
14
|
+
'User-Agent': `MisoBot/${version}`,
|
|
15
|
+
};
|
|
16
|
+
if (auth) {
|
|
17
|
+
if (typeof auth === 'object' && auth.username && auth.password) {
|
|
18
|
+
auth = `${auth.username}:${auth.password}`;
|
|
19
|
+
}
|
|
20
|
+
if (typeof auth !== 'string') {
|
|
21
|
+
throw new TypeError(`Invalid auth: must me a string or an object.`);
|
|
22
|
+
}
|
|
23
|
+
headers['Authorization'] = 'Basic ' + Buffer.from(auth).toString('base64');
|
|
24
|
+
}
|
|
25
|
+
const instance = axios.create({
|
|
26
|
+
headers,
|
|
27
|
+
});
|
|
28
|
+
axiosRetry(instance, { retries: 5, retryDelay: count => count * 300 });
|
|
29
|
+
return instance;
|
|
30
|
+
}
|
|
8
31
|
|
|
9
32
|
export default class Helpers {
|
|
10
33
|
|
|
11
34
|
constructor(client) {
|
|
12
35
|
this._start = Date.now();
|
|
13
36
|
this._client = client;
|
|
37
|
+
this._axios = createAxios(client);
|
|
14
38
|
this.url = new Url(this);
|
|
15
39
|
this._samples = {};
|
|
16
40
|
this.debug = this.debug.bind(this);
|
|
17
41
|
}
|
|
18
42
|
|
|
19
|
-
|
|
43
|
+
get axios() {
|
|
44
|
+
return this._axios;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
async stream(resource, options = {}) {
|
|
20
48
|
const [streamOptions, sourceOptions] = splitObj(options, STREAM_OPTIONS);
|
|
21
|
-
const source = new DataSource(this, resource, sourceOptions);
|
|
49
|
+
const source = options.source || new DataSource(this, resource, sourceOptions);
|
|
22
50
|
return new stream.BufferedReadStream(source, { ...streamOptions, debug: this.debug });
|
|
23
51
|
}
|
|
24
52
|
|
|
@@ -32,7 +60,7 @@ export default class Helpers {
|
|
|
32
60
|
|
|
33
61
|
async _fetchSample(resource) {
|
|
34
62
|
const url = await this.url.build(resource, { page: 0, pageSize: 1 });
|
|
35
|
-
const { data, headers } = await axios.get(url);
|
|
63
|
+
const { data, headers } = await this.axios.get(url);
|
|
36
64
|
if (!data.length) {
|
|
37
65
|
throw new Error(`No record of ${resource} avaliable`);
|
|
38
66
|
}
|
|
@@ -71,7 +99,7 @@ export default class Helpers {
|
|
|
71
99
|
|
|
72
100
|
async _fetchTaxonomies() {
|
|
73
101
|
const url = await this.url.build('taxonomies');
|
|
74
|
-
const { data } = await axios.get(url);
|
|
102
|
+
const { data } = await this.axios.get(url);
|
|
75
103
|
this.debug(`Fetched taxonomies.`);
|
|
76
104
|
return Object.values(data);
|
|
77
105
|
}
|
|
@@ -82,7 +110,7 @@ export default class Helpers {
|
|
|
82
110
|
|
|
83
111
|
async count(resource, { offset: _, ...options } = {}) {
|
|
84
112
|
const url = await this.url.build(resource, { ...options, page: 0, pageSize: 1 });
|
|
85
|
-
const { headers } = await axios.get(url);
|
|
113
|
+
const { headers } = await this.axios.get(url);
|
|
86
114
|
return asNumber(headers['x-wp-total']);
|
|
87
115
|
}
|
|
88
116
|
|
|
@@ -92,7 +120,7 @@ export default class Helpers {
|
|
|
92
120
|
|
|
93
121
|
async countUrl(url) {
|
|
94
122
|
url = await this.url.append(url, { page: 0, pageSize: 1 });
|
|
95
|
-
const { headers } = await axios.get(url);
|
|
123
|
+
const { headers } = await this.axios.get(url);
|
|
96
124
|
return asNumber(headers['x-wp-total']);
|
|
97
125
|
}
|
|
98
126
|
|
|
@@ -133,13 +161,27 @@ class Url {
|
|
|
133
161
|
|
|
134
162
|
// modifiedAfter, modifiedBefore is supported since WordPress 5.7
|
|
135
163
|
// https://make.wordpress.org/core/2021/02/23/rest-api-changes-in-wordpress-5-7/
|
|
136
|
-
async append(url,
|
|
137
|
-
|
|
138
|
-
|
|
164
|
+
async append(url, {
|
|
165
|
+
date,
|
|
166
|
+
after,
|
|
167
|
+
before,
|
|
168
|
+
modifiedAfter,
|
|
169
|
+
modifiedBefore,
|
|
170
|
+
order,
|
|
171
|
+
orderBy,
|
|
172
|
+
page,
|
|
173
|
+
pageSize,
|
|
174
|
+
offset,
|
|
175
|
+
include,
|
|
176
|
+
exclude,
|
|
177
|
+
fields,
|
|
178
|
+
} = {}) {
|
|
139
179
|
const params = [];
|
|
140
180
|
|
|
141
181
|
// TODO: support single id
|
|
142
182
|
|
|
183
|
+
[after, before] = [startOfDate(date || after), endOfDate(date || before)];
|
|
184
|
+
|
|
143
185
|
// The date is compared against site's local time, not UTC, so we have to work on timezone offset
|
|
144
186
|
if (has(after) || has(before) || has(modifiedAfter) || has(modifiedBefore)) {
|
|
145
187
|
const utcOffset = await this._helpers.utcOffsetInMs();
|
|
@@ -157,6 +199,7 @@ class Url {
|
|
|
157
199
|
has(include) && include.length && params.push(`include=${joinIds(include)}`);
|
|
158
200
|
has(exclude) && exclude.length && params.push(`exclude=${joinIds(exclude)}`);
|
|
159
201
|
if (has(fields) && fields.length) {
|
|
202
|
+
// TODO: is this unused?
|
|
160
203
|
if (has(before) && !fields.includes('modified_gmt')) {
|
|
161
204
|
fields = [...fields, 'modified_gmt'];
|
|
162
205
|
}
|
package/src/posts/index.js
CHANGED
|
@@ -10,8 +10,12 @@ export default class Posts extends Entities {
|
|
|
10
10
|
super(client, RESOURCE_NAME);
|
|
11
11
|
}
|
|
12
12
|
|
|
13
|
-
async getAll() {
|
|
14
|
-
|
|
13
|
+
async getAll(options = {}) {
|
|
14
|
+
if (!options.ids && !options.limit) {
|
|
15
|
+
// TODO: should be more tolerant
|
|
16
|
+
throw new Error(`Getting all posts is not supported.`);
|
|
17
|
+
}
|
|
18
|
+
return super.getAll(options);
|
|
15
19
|
}
|
|
16
20
|
|
|
17
21
|
async index() {
|
package/src/source/base.js
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
import axios from '../axios.js';
|
|
2
|
-
|
|
3
1
|
export default class WordPressDataSource {
|
|
4
2
|
|
|
5
3
|
constructor(helpers, resource, options = {}) {
|
|
@@ -32,10 +30,10 @@ export default class WordPressDataSource {
|
|
|
32
30
|
this._debug(`[WordPressDataSource] request ${url}`);
|
|
33
31
|
const response = await this._axiosGet(url);
|
|
34
32
|
this._debug(`[WordPressDataSource] response ${response.status} ${url}`);
|
|
35
|
-
return this._process(response, { url });
|
|
33
|
+
return this._process(response, { request, url });
|
|
36
34
|
}
|
|
37
35
|
|
|
38
|
-
_process({ status, data }, { url }) {
|
|
36
|
+
_process({ status, data }, { request, url }) {
|
|
39
37
|
if (status >= 400 && status < 500 && data.code === 'rest_post_invalid_page_number') {
|
|
40
38
|
// out of bound, so there is no more data
|
|
41
39
|
return { data: [], terminate: true };
|
|
@@ -43,6 +41,10 @@ export default class WordPressDataSource {
|
|
|
43
41
|
if (!Array.isArray(data)) {
|
|
44
42
|
throw new Error(`Unexpected response from WordPress API for ${url}. Expected an array of objects: ${data}`);
|
|
45
43
|
}
|
|
44
|
+
const { records } = request;
|
|
45
|
+
if (records) {
|
|
46
|
+
data = data.slice(0, records);
|
|
47
|
+
}
|
|
46
48
|
if (!this._options.preserveLinks) {
|
|
47
49
|
data = data.map(this._helpers.removeLinks);
|
|
48
50
|
}
|
|
@@ -55,13 +57,13 @@ export default class WordPressDataSource {
|
|
|
55
57
|
|
|
56
58
|
async _buildBaseUrl() {
|
|
57
59
|
// exclude parameters meant to be dealt with state
|
|
58
|
-
const { page, ...options } = this._options;
|
|
60
|
+
const { page, ids, ...options } = this._options;
|
|
59
61
|
return this._helpers.url.build(this._resource, options);
|
|
60
62
|
}
|
|
61
63
|
|
|
62
64
|
async _axiosGet(url) {
|
|
63
65
|
try {
|
|
64
|
-
return await axios.get(url);
|
|
66
|
+
return await this._helpers.axios.get(url);
|
|
65
67
|
} catch(error) {
|
|
66
68
|
if (error.response) {
|
|
67
69
|
return error.response;
|
package/src/source/paged.js
CHANGED
|
@@ -14,7 +14,7 @@ export default class PagedWordPressDataSource extends WordPressDataSource {
|
|
|
14
14
|
if (pageSize > MAX_PAGE_SIZE) {
|
|
15
15
|
throw new Error(`Page size cannot be greater than ${MAX_PAGE_SIZE}: ${pageSize}`);
|
|
16
16
|
}
|
|
17
|
-
|
|
17
|
+
this._limit = limit;
|
|
18
18
|
this._pageSize = options.pageSize = pageSize;
|
|
19
19
|
this._page = 0;
|
|
20
20
|
}
|
|
@@ -25,10 +25,13 @@ export default class PagedWordPressDataSource extends WordPressDataSource {
|
|
|
25
25
|
|
|
26
26
|
request() {
|
|
27
27
|
const page = this._page++;
|
|
28
|
-
|
|
29
|
-
const
|
|
28
|
+
let records = this._pageSize;
|
|
29
|
+
const limit = combineLimit(this._totalValue, this._limit);
|
|
30
30
|
// if we know total, we know when the data is exhausted
|
|
31
|
-
const exhaust =
|
|
31
|
+
const exhaust = limit !== undefined && ((page + 1) * this._pageSize > limit);
|
|
32
|
+
if (exhaust && this._limit !== undefined) {
|
|
33
|
+
records = this._limit - (page * this._pageSize);
|
|
34
|
+
}
|
|
32
35
|
return exhaust ? { records, page, exhaust } : { records, page };
|
|
33
36
|
}
|
|
34
37
|
|
|
@@ -36,9 +39,18 @@ export default class PagedWordPressDataSource extends WordPressDataSource {
|
|
|
36
39
|
return this._totalPromise || (this._totalPromise = this._fetchTotal());
|
|
37
40
|
}
|
|
38
41
|
|
|
39
|
-
async _url(baseUrl, { page }) {
|
|
42
|
+
async _url(baseUrl, { records, page }) {
|
|
40
43
|
const head = baseUrl.indexOf('?') < 0 ? '?' : '&';
|
|
41
|
-
|
|
44
|
+
let url = `${baseUrl}${head}page=${page + 1}`;
|
|
45
|
+
// optimize: if limit < page size we can save much bandwidth
|
|
46
|
+
if (page === 0 && records < this._pageSize) {
|
|
47
|
+
if (url.indexOf('per_page=') > -1) {
|
|
48
|
+
url = url.replace(/per_page=\d+/, `per_page=${records}`);
|
|
49
|
+
} else {
|
|
50
|
+
url += `&per_page=${records}`;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
return url;
|
|
42
54
|
}
|
|
43
55
|
|
|
44
56
|
async _fetchTotal() {
|
|
@@ -63,3 +75,9 @@ export default class PagedWordPressDataSource extends WordPressDataSource {
|
|
|
63
75
|
}
|
|
64
76
|
|
|
65
77
|
}
|
|
78
|
+
|
|
79
|
+
const TOTAL_BUFFER = 10;
|
|
80
|
+
|
|
81
|
+
function combineLimit(total, limit) {
|
|
82
|
+
return total === undefined ? limit : limit === undefined ? total + TOTAL_BUFFER : Math.min(total + TOTAL_BUFFER, limit);
|
|
83
|
+
}
|
package/src/version.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export default '0.6.3
|
|
1
|
+
export default '0.6.3';
|