@miso.ai/server-wordpress 0.6.4 → 0.6.5-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,163 @@
1
+ # Miso Node.js SDK (WordPress)
2
+
3
+ ## Setup
4
+
5
+ 1. Install Node.js.
6
+
7
+ 2. Install the package locally:
8
+
9
+ ```bash
10
+ npm i @miso.ai/server-wordpress
11
+ ```
12
+
13
+ Or, install the package globally:
14
+
15
+ ```bash
16
+ npm i -g @miso.ai/server-wordpress
17
+ ```
18
+
19
+ Or, use `npx` to run the commands, which will guide the package installation.
20
+
21
+ 3. Initialize a project profile
22
+
23
+ ```bash
24
+ miso-wp init "b2c-contenthub.com"
25
+ ```
26
+
27
+ which will generate a file `wordpress.json` in the current directory, with content like this:
28
+
29
+ ```json
30
+ {
31
+ "site": "b2c-contenthub.com",
32
+ "utcOffset": 0
33
+ }
34
+ ```
35
+
36
+ ## Usage
37
+
38
+ ### Help message
39
+
40
+ ```bash
41
+ miso-wp --help
42
+ ```
43
+
44
+ ### Get
45
+
46
+ This command emits infinite stream of posts from the WordPress site.
47
+
48
+ ```bash
49
+ miso-wp posts
50
+ ```
51
+
52
+ You can specify a limit of posts to emit, or pipe to a `head`:
53
+
54
+ ```bash
55
+ miso-wp posts -n 10 # this will be more efficient
56
+ miso-wp posts | head -10
57
+ ```
58
+
59
+ ### Get, by ids
60
+
61
+ Get posts by ids:
62
+
63
+ ```bash
64
+ miso-wp posts --ids 10001
65
+ miso-wp posts --ids 10001,10002
66
+ ```
67
+
68
+ ### Get, with a time range
69
+
70
+ You can specify a time range:
71
+
72
+ ```bash
73
+ # The range is always inclusive
74
+ miso-wp posts --after 2025-01-01 --before 2025-01-31
75
+ miso-wp posts -a 2025-01-01 -b 2025-01-31
76
+
77
+ miso-wp posts -a 2025-01 -b 2025-02
78
+ miso-wp posts -a 2024 -b 2025
79
+
80
+ miso-wp posts --date 2025-01-15
81
+ miso-wp posts -d 2025-01-15
82
+
83
+ miso-wp posts -d 2025-01
84
+ miso-wp posts -d 2025
85
+ ```
86
+
87
+ Get posts that are either created or updated in a duration:
88
+
89
+ ```bash
90
+ miso-wp posts -u 3h # in last 3 hours
91
+ miso-wp posts -u 4d # in last 4 days
92
+ miso-wp posts -u 5w # in last 5 weeks
93
+ ```
94
+
95
+ ### Count the items
96
+
97
+ All filters above can be applied:
98
+
99
+ ```bash
100
+ miso-wp posts -c
101
+ miso-wp posts -d 2025 -c
102
+ miso-wp posts -d 2025-01 -c
103
+ ```
104
+
105
+ ### Get, with linked resources resolved
106
+
107
+ There are many linked resources in a post, such as author, categories, tags, etc. In the default mode, those resources are represented by their numberic ids. You can resolve them by:
108
+
109
+ ```bash
110
+ miso-wp posts --resolve
111
+ ```
112
+
113
+ The resolved content will be put in a `_linked` field under the post object.
114
+
115
+ ### Get, restricting fields retrieved
116
+
117
+ ```bash
118
+ miso-wp posts --fields id,title
119
+ ```
120
+
121
+ ### Get, with extra parameters
122
+
123
+ ```bash
124
+ miso-wp posts --param "order=asc"
125
+ ```
126
+
127
+ See [WordPress JSON API reference](https://developer.wordpress.org/rest-api/reference/posts/#definition).
128
+
129
+ ### Get, with records transformed
130
+
131
+ Tranform the posts to Miso product records:
132
+
133
+ ```bash
134
+ miso-wp posts --transform
135
+ ```
136
+
137
+ You can customize the transformation:
138
+
139
+ ```bash
140
+ miso-wp posts --transform 'path-to/your-transform.js'
141
+ ```
142
+
143
+ ```js
144
+ export default function(post, { defaultTransform }) {
145
+ const record = defaultTransform(post);
146
+ // do something with the record
147
+ return record;
148
+ }
149
+ ```
150
+
151
+ ### List all types of entities
152
+
153
+ ```bash
154
+ miso-wp taxonomies
155
+ ```
156
+
157
+ ### Getting other types of items
158
+
159
+ Simply replace `posts` with other types of items. The term should match the `rest_base` property in the response of the `taxonomies` command.
160
+
161
+ ```
162
+ miso-wp categories
163
+ ```
package/cli/index.js CHANGED
@@ -6,6 +6,7 @@ import taxonomies from './taxonomies.js';
6
6
  import entities from './entities.js';
7
7
  import summarize from './summarize.js';
8
8
  import download from './download.js';
9
+ import xml from './xml.js';
9
10
 
10
11
  yargs.build(yargs => {
11
12
  yargs
@@ -28,6 +29,7 @@ yargs.build(yargs => {
28
29
  .hide('debug')
29
30
  .command(init)
30
31
  .command(profile)
32
+ .command(xml)
31
33
  .command(summarize)
32
34
  .command(download)
33
35
  .command(taxonomies)
package/cli/utils.js CHANGED
@@ -51,6 +51,11 @@ export function buildForEntities(yargs) {
51
51
  type: 'array',
52
52
  coerce: yargs.coerceToArray,
53
53
  })
54
+ .option('params', {
55
+ describe: 'Specify additional query parameters in the form of key=value',
56
+ type: 'array',
57
+ coerce: yargs.coerceToArray,
58
+ })
54
59
  .option('resolve', {
55
60
  alias: 'r',
56
61
  describe: 'Attach resolved entities (author, catagories) linked with the subjects',
package/cli/xml.js ADDED
@@ -0,0 +1,61 @@
1
+ import { join } from 'path';
2
+ import { pipeline } from 'stream/promises';
3
+ import { stream } from '@miso.ai/server-commons';
4
+ import { getEntityTransformFunction, EntityTransformStream } from '../src/entities/index.js';
5
+ import { XmlParser } from '../src/index.js';
6
+
7
+ function build(yargs) {
8
+ return yargs
9
+ .option('transform', {
10
+ alias: 't',
11
+ describe: 'Transform function',
12
+ })
13
+ .option('parser', {
14
+ alias: 'p',
15
+ describe: 'Parser function',
16
+ })
17
+ .option('surpress-errors', {
18
+ alias: 's',
19
+ type: 'boolean',
20
+ default: false,
21
+ describe: 'Surpress errors',
22
+ });
23
+ }
24
+
25
+ async function run({ file, parser, transform, ...options } = {}) {
26
+ parser = await getParser(parser);
27
+ await pipeline(
28
+ file ? stream.fileReadStream(file) : process.stdin,
29
+ new stream.XmlParseStream(parser, options),
30
+ ...(await getTransformStreams(transform)),
31
+ new stream.OutputStream(),
32
+ );
33
+ }
34
+
35
+ export default {
36
+ command: 'xml [file]',
37
+ desc: 'Read from XML file',
38
+ builder: build,
39
+ handler: run,
40
+ };
41
+
42
+ async function getParser(file) {
43
+ if (!file || file === 'default') {
44
+ return new XmlParser();
45
+ }
46
+ try {
47
+ const cls = (await import(join(process.env.PWD, file))).default;
48
+ return new cls();
49
+ } catch (e) {
50
+ throw new Error(`Failed to load parser from ${file}: ${e.message}`);
51
+ }
52
+ }
53
+
54
+ async function getTransformStreams(transform) {
55
+ if (!transform) {
56
+ return [];
57
+ }
58
+ return [
59
+ new EntityTransformStream([], { transform: await getEntityTransformFunction(transform) }),
60
+ ];
61
+ }
package/package.json CHANGED
@@ -17,11 +17,12 @@
17
17
  "simonpai <simon.pai@askmiso.com>"
18
18
  ],
19
19
  "dependencies": {
20
- "@miso.ai/server-commons": "0.6.4",
20
+ "@miso.ai/server-commons": "0.6.5-beta.0",
21
21
  "axios": "^1.6.2",
22
22
  "axios-retry": "^3.3.1",
23
23
  "dotenv": "^16.4.5",
24
+ "saxes": "^6.0.0",
24
25
  "split2": "^4.2.0"
25
26
  },
26
- "version": "0.6.4"
27
+ "version": "0.6.5-beta.0"
27
28
  }
package/src/client.js CHANGED
@@ -3,7 +3,7 @@ import { constants } from 'fs';
3
3
  import Helpers from './helpers.js';
4
4
  import Media from './media.js';
5
5
  import Posts from './posts/index.js';
6
- import Entities from './entities/index.js';
6
+ import { Entities } from './entities/index.js';
7
7
 
8
8
  const DEFAULT_PROFILE = './wordpress.json';
9
9
 
@@ -7,7 +7,9 @@ import EntityPresenceStream from './presence.js';
7
7
  import defaultTransform from './transform-default.js';
8
8
  import legacyTransform from './transform-legacy.js';
9
9
 
10
- export default class Entities {
10
+ export { EntityTransformStream };
11
+
12
+ export class Entities {
11
13
 
12
14
  constructor(client, name) {
13
15
  this._client = client;
@@ -22,7 +24,7 @@ export default class Entities {
22
24
  return this._client._helpers.stream(this.name, options);
23
25
  }
24
26
  const client = this._client;
25
- transform = await getTransformFn(client, this.name, transform);
27
+ transform = await _getTransformFn(client, this.name, transform);
26
28
 
27
29
  // we need taxonomy fetched so we know whether it's hierarchical
28
30
  const taxonomies = await client._helpers.findAssociatedTaxonomies(this.name);
@@ -55,7 +57,7 @@ export default class Entities {
55
57
  };
56
58
 
57
59
  // transform stream
58
- const transformStream = new EntityTransformStream(this._client, indicies, { transform });
60
+ const transformStream = new EntityTransformStream(indicies, { transform });
59
61
 
60
62
  return (await this._client._helpers.stream(this.name, { ...options, onLoad }))
61
63
  .pipe(transformStream);
@@ -117,7 +119,11 @@ export default class Entities {
117
119
 
118
120
  }
119
121
 
120
- async function getTransformFn(client, name, transform) {
122
+ export async function getEntityTransformFunction(transform) {
123
+ return _getTransformFn(undefined, undefined, transform === true ? 'default' : transform);
124
+ }
125
+
126
+ async function _getTransformFn(client, name, transform) {
121
127
  switch (transform) {
122
128
  case 'default':
123
129
  return defaultTransform;
@@ -151,5 +157,5 @@ function aggregateIds(records, propName) {
151
157
  }
152
158
 
153
159
  async function getPostDate(client, order, options = {}) {
154
- return (await client.posts.getAll({ ...options, limit: 1, order, fields: ['date_gmt'] }))[0].date_gmt;
160
+ return (await client.posts.getAll({ ...options, after: 1, limit: 1, order, fields: ['date_gmt'] }))[0].date_gmt;
155
161
  }
@@ -24,9 +24,8 @@ export default class EntityTransformStream extends Transform {
24
24
  return PROP_NAME_OVERRIDES[resource] || resource;
25
25
  }
26
26
 
27
- constructor(client, indicies, { transform } = {}) {
27
+ constructor(indicies, { transform } = {}) {
28
28
  super({ objectMode: true });
29
- this._client = client;
30
29
  this._indicies = indicies;
31
30
  this._transformFn = transform;
32
31
  }
package/src/helpers.js CHANGED
@@ -175,6 +175,7 @@ class Url {
175
175
  include,
176
176
  exclude,
177
177
  fields,
178
+ params: extraParams,
178
179
  } = {}) {
179
180
  const params = [];
180
181
 
@@ -198,6 +199,12 @@ class Url {
198
199
  has(offset) && params.push(`offset=${offset}`);
199
200
  has(include) && include.length && params.push(`include=${joinIds(include)}`);
200
201
  has(exclude) && exclude.length && params.push(`exclude=${joinIds(exclude)}`);
202
+ if (has(extraParams)) {
203
+ for (const str of extraParams) {
204
+ const [key, value] = str.split('=', 2);
205
+ params.push(`${encodeURIComponent(key)}=${encodeURIComponent(value)}`);
206
+ }
207
+ }
201
208
  if (has(fields) && fields.length) {
202
209
  // TODO: is this unused?
203
210
  if (has(before) && !fields.includes('modified_gmt')) {
package/src/index.js CHANGED
@@ -1 +1,2 @@
1
1
  export { default as WordPressClient } from './client.js';
2
+ export * from './xml.js';
package/src/media.js CHANGED
@@ -1,4 +1,4 @@
1
- import Entities from './entities/index.js';
1
+ import { Entities } from './entities/index.js';
2
2
  import EntityIndex from './entities/entity-index.js';
3
3
 
4
4
  const RESOURCE_NAME = 'media';
@@ -1,4 +1,4 @@
1
- import Entities from '../entities/index.js';
1
+ import { Entities } from '../entities/index.js';
2
2
 
3
3
  const RESOURCE_NAME = 'posts';
4
4
 
@@ -28,7 +28,7 @@ export default class WordPressDataSource {
28
28
  this._debug(`[WordPressDataSource] get ${JSON.stringify(request)}`);
29
29
  const url = await this.url(request);
30
30
  this._debug(`[WordPressDataSource] request ${url}`);
31
- const response = await this._axiosGet(url);
31
+ const response = await this._axiosGet(url);
32
32
  this._debug(`[WordPressDataSource] response ${response.status} ${url}`);
33
33
  return this._process(response, { request, url });
34
34
  }
package/src/version.js CHANGED
@@ -1 +1 @@
1
- export default '0.6.4';
1
+ export default '0.6.5-beta.0';
package/src/xml.js ADDED
@@ -0,0 +1,81 @@
1
+ export class XmlParser {
2
+
3
+ constructor({ ...options } = {}) {
4
+ this._options = options;
5
+ this._currentNode = undefined;
6
+ this._currentText = '';
7
+ this._currentPostMeta = {};
8
+ }
9
+
10
+ _onOpenTag({ name }) {
11
+ if (name === 'item') {
12
+ this._currentNode = { categories: [], tags: [], postmeta: {} };
13
+ } else if (this._currentNode) {
14
+ this._currentNode[name] = '';
15
+ }
16
+ }
17
+
18
+ onCloseTag({ name }, { push }) {
19
+ if (this._currentNode) {
20
+ switch (name) {
21
+ case 'item':
22
+ push(nodeToPost(this._currentNode));
23
+ this._currentNode = undefined;
24
+ break;
25
+ case 'category':
26
+ this._currentNode.categories.push(this._currentText);
27
+ break;
28
+ case 'wp:meta_key':
29
+ this._currentPostMetaKey = this._currentText;
30
+ break;
31
+ case 'wp:meta_value':
32
+ this._currentNode.postmeta[this._currentPostMetaKey] = this._currentText;
33
+ this._currentPostMetaKey = undefined;
34
+ break;
35
+ default:
36
+ if (name.startsWith('wp:tag')) {
37
+ this._currentNode.tags.push(this._currentText);
38
+ } else {
39
+ this._currentNode[name] = this._currentText;
40
+ }
41
+ }
42
+ }
43
+ this._currentText = '';
44
+ }
45
+
46
+ onText(text) {
47
+ this._currentText += text.trim();
48
+ }
49
+
50
+ }
51
+
52
+ function nodeToPost(node) {
53
+ return {
54
+ id: node['wp:post_id'],
55
+ date: node['wp:post_date'],
56
+ date_gmt: node['wp:post_date_gmt'],
57
+ modified: node['wp:post_modified'],
58
+ modified_gmt: node['wp:post_modified_gmt'],
59
+ slug: node['wp:post_name'],
60
+ status: node['wp:status'],
61
+ title: node['title'],
62
+ link: node['link'],
63
+ guid: {
64
+ rendered: node['guid'],
65
+ },
66
+ content: {
67
+ rendered: node['content:encoded'],
68
+ },
69
+ excerpt: {
70
+ rendered: node['excerpt:encoded'],
71
+ },
72
+ author: node['dc:creator'],
73
+ featured_media: node['wp:attachment_url'] || null,
74
+ comment_status: node['wp:comment_status'],
75
+ ping_status: node['wp:ping_status'],
76
+ categories: node.categories,
77
+ tags: node.tags,
78
+ type: node['wp:post_type'],
79
+ meta: node.postmeta,
80
+ };
81
+ }