@financial-times/cp-content-pipeline-schema 2.2.2 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/CHANGELOG.md +8 -0
  2. package/__mocks__/worker_threads.ts +3 -0
  3. package/lib/fixtures/dummyContext.js +4 -0
  4. package/lib/fixtures/dummyContext.js.map +1 -1
  5. package/lib/index.d.ts +1 -0
  6. package/lib/index.js.map +1 -1
  7. package/lib/model/CapiResponse.js +14 -6
  8. package/lib/model/CapiResponse.js.map +1 -1
  9. package/lib/model/Concept.test.js +9 -0
  10. package/lib/model/Concept.test.js.map +1 -1
  11. package/lib/model/RichText.d.ts +1 -1
  12. package/lib/model/RichText.js +34 -20
  13. package/lib/model/RichText.js.map +1 -1
  14. package/lib/model/RichText.test.js +9 -0
  15. package/lib/model/RichText.test.js.map +1 -1
  16. package/lib/resolvers/content-tree/bodyXMLToTree.d.ts +3 -4
  17. package/lib/resolvers/content-tree/bodyXMLToTree.js +2 -2
  18. package/lib/resolvers/content-tree/bodyXMLToTree.js.map +1 -1
  19. package/lib/resolvers/content-tree/bodyXMLToTree.test.js +32 -20
  20. package/lib/resolvers/content-tree/bodyXMLToTree.test.js.map +1 -1
  21. package/lib/resolvers/content-tree/bodyXMLToTreeWorker.d.ts +9 -0
  22. package/lib/resolvers/content-tree/bodyXMLToTreeWorker.js +18 -0
  23. package/lib/resolvers/content-tree/bodyXMLToTreeWorker.js.map +1 -0
  24. package/lib/resolvers/content-tree/nodePredicates.d.ts +13 -6
  25. package/lib/resolvers/content-tree/nodePredicates.js +33 -33
  26. package/lib/resolvers/content-tree/nodePredicates.js.map +1 -1
  27. package/lib/resolvers/content-tree/references/Reference.d.ts +1 -1
  28. package/lib/resolvers/content-tree/tagMappings.d.ts +1 -2
  29. package/lib/resolvers/content-tree/tagMappings.js +60 -60
  30. package/lib/resolvers/content-tree/tagMappings.js.map +1 -1
  31. package/package.json +2 -1
  32. package/src/fixtures/dummyContext.ts +4 -0
  33. package/src/index.ts +1 -0
  34. package/src/model/CapiResponse.ts +20 -10
  35. package/src/model/Concept.test.ts +9 -0
  36. package/src/model/RichText.test.ts +10 -0
  37. package/src/model/RichText.ts +39 -27
  38. package/src/resolvers/content-tree/bodyXMLToTree.test.ts +32 -22
  39. package/src/resolvers/content-tree/bodyXMLToTree.ts +6 -11
  40. package/src/resolvers/content-tree/bodyXMLToTreeWorker.ts +31 -0
  41. package/src/resolvers/content-tree/nodePredicates.ts +45 -39
  42. package/src/resolvers/content-tree/tagMappings.ts +60 -102
  43. package/tsconfig.tsbuildinfo +1 -1
@@ -1,6 +1,16 @@
1
1
  import { baseCapiObject } from '../fixtures/capiObject'
2
2
  import { RichText } from '../model/RichText'
3
3
 
4
+ jest.mock('@dotcom-reliability-kit/logger', () => ({
5
+ Logger: jest.fn(() => ({
6
+ debug: jest.fn(),
7
+ error: jest.fn(),
8
+ fatal: jest.fn(),
9
+ info: jest.fn(),
10
+ warn: jest.fn(),
11
+ })),
12
+ }))
13
+
4
14
  describe('RichText resolver', () => {
5
15
  it('should transform bodyXML to an AST', async () => {
6
16
  const model = new RichText('bodyXML', baseCapiObject.bodyXML)
@@ -1,16 +1,26 @@
1
- import bodyXMLToTree from '../resolvers/content-tree/bodyXMLToTree'
1
+ import Piscina from 'piscina'
2
+
3
+ import type { QueryContext } from '..'
4
+ import bodyXMLToTreeWorker from '../resolvers/content-tree/bodyXMLToTreeWorker'
2
5
  import extractText from '../resolvers/content-tree/extractText'
6
+ import type { PredicateError } from '../resolvers/content-tree/nodePredicates'
3
7
  import updateTreeWithReferenceIds from '../resolvers/content-tree/updateTreeWithReferenceIds'
4
8
  import { LiteralUnionScalarValues } from '../resolvers/literal-union'
5
9
  import { RichTextSource } from '../resolvers/scalars'
6
10
  import { CapiResponse } from './CapiResponse'
7
- import {
8
- commonTagMappings,
9
- articleTagMappings,
10
- liveBlogPostTagMappings,
11
- } from '../resolvers/content-tree/tagMappings'
11
+ import { OperationalError } from '@dotcom-reliability-kit/errors'
12
12
  import { ContentTree } from '@financial-times/content-tree'
13
- import { QueryContext } from '..'
13
+
14
+ let piscina: Piscina | undefined
15
+ // Don't run the thread pool during tests as it creates loads of threads that
16
+ // aren't properly closed and leaves the process hanging. We do our best to
17
+ // keep the same behaviour when running on the main thread vs a worker thread,
18
+ // with the main difference being the lack of logs on the main thread.
19
+ if (process.env.NODE_ENV !== 'test') {
20
+ piscina = new Piscina({
21
+ filename: require.resolve('../resolvers/content-tree/bodyXMLToTreeWorker'),
22
+ })
23
+ }
14
24
 
15
25
  export class RichText {
16
26
  constructor(
@@ -24,27 +34,29 @@ export class RichText {
24
34
  }
25
35
 
26
36
  async structured(context?: QueryContext) {
27
- const tree = await new Promise<ContentTree.Body>((resolve, reject) => {
28
- // bodyXMLToTree is synchronous and slow. scheduling it in a setImmediate
29
- // prevents it from blocking the event loop, so the app can still handle
30
- // requests, and it won't skew resolver timing metrics by blocking the
31
- // timers for quicker, asynchronous resolvers from finishing.
32
-
33
- const tagMappings =
34
- this.contentApiData?.type() === 'LiveBlogPost'
35
- ? { ...commonTagMappings, ...liveBlogPostTagMappings }
36
- : {
37
- ...commonTagMappings,
38
- ...articleTagMappings(this.contentApiData),
39
- }
40
- setImmediate(() => {
41
- try {
42
- resolve(bodyXMLToTree(this.value ?? '', tagMappings, context))
43
- } catch (error) {
44
- reject(error)
45
- }
46
- })
37
+ const args = {
38
+ xml: this.value ?? '',
39
+ responseMetadata: this.contentApiData
40
+ ? {
41
+ isLiveBlogPost: this.contentApiData.type() === 'LiveBlogPost',
42
+ topperHasImage: this.contentApiData.topperHasImage(),
43
+ }
44
+ : undefined,
45
+ }
46
+ // forward errors from the worker threads to the logger
47
+ piscina?.on('message', ({ event, error, ...predError }: PredicateError) => {
48
+ const { stack } = error
49
+ const opError = new OperationalError(predError)
50
+ opError.stack = stack
51
+ context?.logger.error({ event, error: opError })
47
52
  })
53
+ // bodyXMLToTree is synchronous and slow. Offload its processing to a
54
+ // worker thread so that it does not block the event loop. This allows the
55
+ // app to continue to handle other requests whilst processing an expensive
56
+ // one, as well as not skewing timing metrics.
57
+ const tree: ContentTree.Body = piscina
58
+ ? await piscina.run(args)
59
+ : bodyXMLToTreeWorker(args)
48
60
 
49
61
  const { tree: treeWithReferences, references } = updateTreeWithReferenceIds(
50
62
  tree,
@@ -1,20 +1,16 @@
1
+ import { parentPort } from 'worker_threads'
1
2
  import { ContentTree } from '@financial-times/content-tree'
2
3
  import bodyXMLToTree, { TagMappings } from './bodyXMLToTree'
3
4
  import tags from './tagMappings'
4
- import { Logger } from '@dotcom-reliability-kit/logger'
5
- import { QueryContext } from '../..'
6
5
 
7
- const mockLogger = new Logger()
8
- const mockLogError = jest.spyOn(mockLogger, 'error')
9
-
10
- const mockContext = {
11
- logger: mockLogger,
12
- } as QueryContext
6
+ jest.mock('worker_threads')
7
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
8
+ const mockPostMessage = jest.mocked(parentPort)!.postMessage
13
9
 
14
10
  describe('bodyXMLToTree', () => {
15
11
  it('converts XML to tree', () => {
16
12
  const xml = `<body><p>Hello world</p></body>`
17
- expect(bodyXMLToTree(xml, tags, mockContext)).toMatchInlineSnapshot(`
13
+ expect(bodyXMLToTree(xml, tags)).toMatchInlineSnapshot(`
18
14
  Object {
19
15
  "children": Array [
20
16
  Object {
@@ -220,14 +216,14 @@ describe('bodyXMLToTree', () => {
220
216
  "version": 1,
221
217
  }
222
218
  `)
223
- expect(mockLogError).not.toBeCalled()
219
+ expect(mockPostMessage).not.toBeCalled()
224
220
  })
225
221
 
226
222
  it('should handle heading and slots', () => {
227
223
  const xml =
228
224
  '<body><div class="n-content-layout"><h3></h3><div class="n-content-layout__slot"></div><div class="n-content-layout__slot"></div></body>'
229
225
 
230
- expect(bodyXMLToTree(xml, tags, mockContext)).toMatchInlineSnapshot(`
226
+ expect(bodyXMLToTree(xml, tags)).toMatchInlineSnapshot(`
231
227
  Object {
232
228
  "children": Array [
233
229
  Object {
@@ -255,14 +251,14 @@ describe('bodyXMLToTree', () => {
255
251
  "version": 1,
256
252
  }
257
253
  `)
258
- expect(mockLogError).not.toBeCalled()
254
+ expect(mockPostMessage).not.toBeCalled()
259
255
  })
260
256
 
261
257
  it('should log an error on unexpected child after heading', () => {
262
258
  const xml =
263
259
  '<body><div class="n-content-layout"><h3></h3><div class="n-content-layout__slot"></div><p></p></body>'
264
260
 
265
- expect(bodyXMLToTree(xml, tags, mockContext)).toMatchInlineSnapshot(`
261
+ expect(bodyXMLToTree(xml, tags)).toMatchInlineSnapshot(`
266
262
  Object {
267
263
  "children": Array [
268
264
  Object {
@@ -290,12 +286,19 @@ describe('bodyXMLToTree', () => {
290
286
  "version": 1,
291
287
  }
292
288
  `)
293
- expect(mockLogError).toBeCalled()
294
- expect(mockLogError.mock.lastCall).toMatchInlineSnapshot(`
289
+ expect(mockPostMessage).toBeCalled()
290
+ expect(mockPostMessage.mock.lastCall).toMatchInlineSnapshot(`
295
291
  Array [
296
292
  Object {
297
- "error": [OperationalError: Unexpected children types for layout],
293
+ "actual": Array [
294
+ "layout-slot",
295
+ "paragraph",
296
+ ],
297
+ "code": "BODY_XML_UNEXPECTED_STRUCTURE",
298
+ "error": [Error],
298
299
  "event": "RECOVERABLE_ERROR",
300
+ "expected": "layout-slot",
301
+ "message": "Unexpected children types for layout",
299
302
  },
300
303
  ]
301
304
  `)
@@ -305,7 +308,7 @@ describe('bodyXMLToTree', () => {
305
308
  const xml =
306
309
  '<body><div class="n-content-layout"><div class="n-content-layout__slot"></div><p></p></body>'
307
310
 
308
- expect(bodyXMLToTree(xml, tags, mockContext)).toMatchInlineSnapshot(`
311
+ expect(bodyXMLToTree(xml, tags)).toMatchInlineSnapshot(`
309
312
  Object {
310
313
  "children": Array [
311
314
  Object {
@@ -328,12 +331,19 @@ describe('bodyXMLToTree', () => {
328
331
  "version": 1,
329
332
  }
330
333
  `)
331
- expect(mockLogError).toBeCalled()
332
- expect(mockLogError.mock.lastCall).toMatchInlineSnapshot(`
334
+ expect(mockPostMessage).toBeCalled()
335
+ expect(mockPostMessage.mock.lastCall).toMatchInlineSnapshot(`
333
336
  Array [
334
337
  Object {
335
- "error": [OperationalError: Unexpected children types for layout],
338
+ "actual": Array [
339
+ "layout-slot",
340
+ "paragraph",
341
+ ],
342
+ "code": "BODY_XML_UNEXPECTED_STRUCTURE",
343
+ "error": [Error],
336
344
  "event": "RECOVERABLE_ERROR",
345
+ "expected": "layout-slot",
346
+ "message": "Unexpected children types for layout",
337
347
  },
338
348
  ]
339
349
  `)
@@ -345,7 +355,7 @@ describe('bodyXMLToTree', () => {
345
355
  const xml =
346
356
  '<table class="data-table" data-table-collapse-rownum="" data-table-layout-largescreen="auto" data-table-layout-smallscreen="auto" data-table-theme="auto"><caption>Nulla iaculis tempus augue</caption><thead><tr><th data-column-hidden="none" data-column-sortable="false" data-column-type="string">libero mollis</th><th data-column-hidden="none" data-column-sortable="false" data-column-type="string">pretium nunc</th><th data-column-hidden="none" data-column-sortable="false" data-column-type="string">euismod nunc</th></tr></thead><tbody><tr><td>Aenean </td><td>14134</td><td>dfdsfd</td></tr><tr><td>lobortis </td><td>3434</td><td>fdsf dsf </td></tr><tr><td>volutpat </td><td>234234</td><td>sd fsd</td></tr><tr><td>vitae </td><td>2423</td><td>s fsdf</td></tr><tr><td>elementumus</td><td>23423</td><td>f sdf</td></tr></tbody><tfoot><tr><td colspan="1000">Aenean sodales sapien</td></tr></tfoot></table>'
347
357
 
348
- expect(bodyXMLToTree(xml, tags, mockContext)).toMatchInlineSnapshot(`
358
+ expect(bodyXMLToTree(xml, tags)).toMatchInlineSnapshot(`
349
359
  Object {
350
360
  "children": Array [
351
361
  Object {
@@ -604,7 +614,7 @@ describe('bodyXMLToTree', () => {
604
614
  const xml =
605
615
  '<table class="data-table" id="U1140244733565W0C"><caption>Emerging markets outlook for 2017</caption><tbody><tr><td colspan="2"><p>Brazil</p><p>Brazilian shares were the best-performing asset globally over the 12 months to the end of January, returning 121 per cent, according to data from BofA Merrill Lynch. Brazilian stocks did very well through January, but investors including Lazard and Eastspring have scaled back their exposure after strong growth last year.</p></td></tr></tbody></table>'
606
616
 
607
- expect(bodyXMLToTree(xml, tags, mockContext)).toMatchInlineSnapshot(`
617
+ expect(bodyXMLToTree(xml, tags)).toMatchInlineSnapshot(`
608
618
  Object {
609
619
  "children": Array [
610
620
  Object {
@@ -1,9 +1,8 @@
1
+ import type { ContentTree } from '@financial-times/content-tree'
1
2
  import * as cheerio from 'cheerio'
3
+ import { isTag, isText } from 'domhandler'
2
4
 
3
- import type { ContentTree } from '@financial-times/content-tree'
4
5
  import { AnyNode } from './Workarounds'
5
- import { QueryContext } from '../..'
6
- import { isTag, isText } from 'domhandler'
7
6
 
8
7
  function isNode(node: ContentTree.Node | undefined): node is ContentTree.Node {
9
8
  return Boolean(node && 'type' in node)
@@ -11,8 +10,7 @@ function isNode(node: ContentTree.Node | undefined): node is ContentTree.Node {
11
10
 
12
11
  type ContentTreeTransform = (
13
12
  $el: cheerio.Cheerio<any>,
14
- traverse: () => AnyNode[],
15
- context?: QueryContext
13
+ traverse: () => AnyNode[]
16
14
  ) => AnyNode | AnyNode[]
17
15
 
18
16
  export type TagMappings = Record<string, ContentTreeTransform>
@@ -23,8 +21,7 @@ export type TagMappings = Record<string, ContentTreeTransform>
23
21
  */
24
22
  export default function bodyXMLToTree(
25
23
  xml: string,
26
- tagMappings: TagMappings,
27
- context?: QueryContext
24
+ tagMappings: TagMappings
28
25
  ): ContentTree.Body {
29
26
  const $ = cheerio.load(xml)
30
27
 
@@ -44,10 +41,8 @@ export default function bodyXMLToTree(
44
41
 
45
42
  if (matchedSelector) {
46
43
  const contentTreeTransform = tagMappings[matchedSelector]
47
- return contentTreeTransform(
48
- $(node),
49
- () => flattenAndTraverseChildren(node.children),
50
- context
44
+ return contentTreeTransform($(node), () =>
45
+ flattenAndTraverseChildren(node.children)
51
46
  )
52
47
  }
53
48
 
@@ -0,0 +1,31 @@
1
+ import type { ContentTree } from '@financial-times/content-tree'
2
+
3
+ import bodyXMLToTree from './bodyXMLToTree'
4
+
5
+ import {
6
+ commonTagMappings,
7
+ articleTagMappings,
8
+ liveBlogPostTagMappings,
9
+ } from './tagMappings'
10
+
11
+ export interface ResponseMetadata {
12
+ isLiveBlogPost: boolean
13
+ topperHasImage: boolean
14
+ }
15
+
16
+ export default function bodyXMLToTreeWorker({
17
+ xml,
18
+ responseMetadata,
19
+ }: {
20
+ xml: string
21
+ responseMetadata?: ResponseMetadata
22
+ }): ContentTree.Body {
23
+ const tagMappings = responseMetadata?.isLiveBlogPost
24
+ ? { ...commonTagMappings, ...liveBlogPostTagMappings }
25
+ : {
26
+ ...commonTagMappings,
27
+ ...articleTagMappings(responseMetadata?.topperHasImage ?? false),
28
+ }
29
+
30
+ return bodyXMLToTree(xml, tagMappings)
31
+ }
@@ -1,6 +1,6 @@
1
- import { OperationalError } from '@dotcom-reliability-kit/errors'
2
- import { AnyNode } from './Workarounds'
3
- import { QueryContext } from '../..'
1
+ import { parentPort } from 'worker_threads'
2
+
3
+ import type { AnyNode } from './Workarounds'
4
4
 
5
5
  type ValuesOfTuple<Tuple extends readonly string[]> = Tuple[number]
6
6
 
@@ -19,24 +19,39 @@ export const phrasingTypes = [
19
19
  'link',
20
20
  ] as const
21
21
 
22
+ export interface PredicateError {
23
+ event: string
24
+ code: string
25
+ message: string
26
+ expected: AnyNode['type'] | readonly AnyNode['type'][]
27
+ actual: AnyNode['type'][]
28
+ error: Error
29
+ }
30
+
31
+ // reliability-kit's logger creates a worker thread under the hood but we can't
32
+ // access this thread directly from these worker threads. Instead let's send
33
+ // all our recoverable errors to the piscina pool managing us.
34
+ const postLoggerError = (error: Omit<PredicateError, 'error'>) => {
35
+ // stacks will only copy over if they're in an Error object
36
+ const errorWithStack = { ...error, error: new Error() }
37
+ parentPort?.postMessage(errorWithStack)
38
+ }
39
+
22
40
  export const findChildOftype = <NodeType extends AnyNode>(
23
41
  type: NodeType['type'],
24
42
  nodes: AnyNode[],
25
- parentType: AnyNode['type'],
26
- context?: QueryContext
43
+ parentType: AnyNode['type']
27
44
  ): NodeType | undefined => {
28
45
  const predicate = (node: AnyNode): node is NodeType => node.type === type
29
46
  const child = nodes.find(predicate)
30
47
 
31
48
  if (!child) {
32
- context?.logger.error({
49
+ postLoggerError({
33
50
  event: 'RECOVERABLE_ERROR',
34
- error: new OperationalError({
35
- code: 'BODY_XML_UNEXPECTED_STRUCTURE',
36
- message: `Didn't find expected child type in ${parentType}`,
37
- expected: type,
38
- actual: nodes.map((node) => node.type),
39
- }),
51
+ code: 'BODY_XML_UNEXPECTED_STRUCTURE',
52
+ message: `Didn't find expected child type in ${parentType}`,
53
+ expected: type,
54
+ actual: nodes.map((node) => node.type),
40
55
  })
41
56
  }
42
57
 
@@ -46,21 +61,18 @@ export const findChildOftype = <NodeType extends AnyNode>(
46
61
  export const everyChildIsType = <NodeType extends AnyNode>(
47
62
  type: NodeType['type'],
48
63
  nodes: AnyNode[],
49
- parentType: AnyNode['type'],
50
- context?: QueryContext
64
+ parentType: AnyNode['type']
51
65
  ): NodeType[] => {
52
66
  const predicate = (node: AnyNode): node is NodeType => node.type === type
53
67
  const allChildrenAreType = nodes.every(predicate)
54
68
 
55
69
  if (!allChildrenAreType) {
56
- context?.logger.error({
70
+ postLoggerError({
57
71
  event: 'RECOVERABLE_ERROR',
58
- error: new OperationalError({
59
- code: 'BODY_XML_UNEXPECTED_STRUCTURE',
60
- message: `Unexpected children types for ${parentType}`,
61
- expected: type,
62
- actual: nodes.map((node) => node.type),
63
- }),
72
+ code: 'BODY_XML_UNEXPECTED_STRUCTURE',
73
+ message: `Unexpected children types for ${parentType}`,
74
+ expected: type,
75
+ actual: nodes.map((node) => node.type),
64
76
  })
65
77
  }
66
78
 
@@ -70,22 +82,19 @@ export const everyChildIsType = <NodeType extends AnyNode>(
70
82
  export const childrenOfTypes = <Types extends readonly AnyNode['type'][]>(
71
83
  types: Types,
72
84
  nodes: AnyNode[],
73
- parentType: AnyNode['type'],
74
- context?: QueryContext
85
+ parentType: AnyNode['type']
75
86
  ): NodeOfType<ValuesOfTuple<Types>>[] => {
76
87
  const predicate = (node: AnyNode): node is NodeOfType<ValuesOfTuple<Types>> =>
77
88
  types.includes(node.type)
78
89
  const allChildrenAreType = nodes.every(predicate)
79
90
 
80
91
  if (!allChildrenAreType) {
81
- context?.logger.error({
92
+ postLoggerError({
82
93
  event: 'RECOVERABLE_ERROR',
83
- error: new OperationalError({
84
- code: 'BODY_XML_UNEXPECTED_STRUCTURE',
85
- message: `Unexpected ordered children types for ${parentType}`,
86
- expected: types,
87
- actual: nodes.map((node) => node.type),
88
- }),
94
+ code: 'BODY_XML_UNEXPECTED_STRUCTURE',
95
+ message: `Unexpected ordered children types for ${parentType}`,
96
+ expected: types,
97
+ actual: nodes.map((node) => node.type),
89
98
  })
90
99
  }
91
100
 
@@ -110,21 +119,18 @@ export const childrenOfOrderedTypes = <
110
119
  >(
111
120
  types: Types,
112
121
  nodes: AnyNode[],
113
- parentType: AnyNode['type'],
114
- context?: QueryContext
122
+ parentType: AnyNode['type']
115
123
  ): NodesOfTypes<Types> => {
116
124
  if (nodesAreOrderedTypes(types, nodes)) {
117
125
  return nodes
118
126
  }
119
127
 
120
- context?.logger.error({
128
+ postLoggerError({
121
129
  event: 'RECOVERABLE_ERROR',
122
- error: new OperationalError({
123
- code: 'BODY_XML_UNEXPECTED_STRUCTURE',
124
- message: `Unexpected children types for ${parentType}`,
125
- expected: types,
126
- actual: nodes.map((node) => node.type),
127
- }),
130
+ code: 'BODY_XML_UNEXPECTED_STRUCTURE',
131
+ message: `Unexpected children types for ${parentType}`,
132
+ expected: types,
133
+ actual: nodes.map((node) => node.type),
128
134
  })
129
135
 
130
136
  return nodes as unknown as NodesOfTypes<Types>