pptxtojson 1.0.3 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.html CHANGED
@@ -4,8 +4,8 @@
4
4
  <meta charset="utf-8">
5
5
  <meta http-equiv="X-UA-Compatible" content="IE=edge">
6
6
  <meta name="viewport" content="width=device-width, initial-scale=1">
7
- <meta name="description" content="基于JavaScript的 .pptx 文件解析工具丨A javascript tool for parsing .pptx file" />
8
- <meta name="keywords" content="ppt,powerpoint,office powerpoint,json,javascript,PPT解析,PPT转JSON" />
7
+ <meta name="description" content="Office PowerPoint(.pptx) file to JSON | PPTX 文件转为可读的 JSON 数据" />
8
+ <meta name="keywords" content="pptx2json,pptxtojson,ppt,powerpoint,json,javascript,PPT解析,PPT转JSON" />
9
9
  <link rel="icon" href="favicon.ico">
10
10
  <title>pptxtojson - PPTX转JSON</title>
11
11
 
@@ -80,6 +80,13 @@
80
80
  .upload-input {
81
81
  display: none;
82
82
  }
83
+ .link {
84
+ display: flex;
85
+ }
86
+ .link a {
87
+ padding: 5px 10px;
88
+ color: #d14424;
89
+ }
83
90
  </style>
84
91
  </head>
85
92
 
@@ -90,7 +97,10 @@
90
97
  <input class="upload-input" type="file" accept="application/vnd.openxmlformats-officedocument.presentationml.presentation"/>
91
98
  </div>
92
99
 
93
- <a href="https://github.com/pipipi-pikachu/pptx2json">Github</a>
100
+ <div class="link">
101
+ <a target="_blank" href="https://github.com/pipipi-pikachu/pptx2json">Github仓库</a>
102
+ <a target="_blank" href="https://pipipi-pikachu.github.io/PPTist/">可视化测试</a>
103
+ </div>
94
104
  </div>
95
105
  <div id="jsoneditor"></div>
96
106
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pptxtojson",
3
- "version": "1.0.3",
3
+ "version": "1.1.0",
4
4
  "description": "A javascript tool for parsing .pptx file",
5
5
  "type": "module",
6
6
  "main": "./dist/index.umd.js",
@@ -24,7 +24,7 @@
24
24
  "homepage": "https://github.com/pipipi-pikachu/pptxtojson",
25
25
  "license": "MIT",
26
26
  "publishConfig": {
27
- "registry": "https://registry.npmjs.org"
27
+ "registry": "https://registry.npmjs.com"
28
28
  },
29
29
  "dependencies": {
30
30
  "jszip": "^3.10.1",
package/src/math.js ADDED
@@ -0,0 +1,184 @@
1
+ import { getTextByPathList } from './utils'
2
+
3
+ export function findOMath(obj) {
4
+ let results = []
5
+ if (typeof obj !== 'object') return results
6
+ if (obj['m:oMath']) results = results.concat(obj['m:oMath'])
7
+
8
+ Object.values(obj).forEach(value => {
9
+ if (Array.isArray(value) || typeof value === 'object') {
10
+ results = results.concat(findOMath(value))
11
+ }
12
+ })
13
+ return results
14
+ }
15
+
16
+ export function parseFraction(fraction) {
17
+ const numerator = parseOMath(fraction['m:num'])
18
+ const denominator = parseOMath(fraction['m:den'])
19
+ return `\\frac{${numerator}}{${denominator}}`
20
+ }
21
+ export function parseSuperscript(superscript) {
22
+ const base = parseOMath(superscript['m:e'])
23
+ const sup = parseOMath(superscript['m:sup'])
24
+ return `${base}^{${sup}}`
25
+ }
26
+ export function parseSubscript(subscript) {
27
+ const base = parseOMath(subscript['m:e'])
28
+ const sub = parseOMath(subscript['m:sub'])
29
+ return `${base}_{${sub}}`
30
+ }
31
+ export function parseRadical(radical) {
32
+ const degree = parseOMath(radical['m:deg'])
33
+ const expression = parseOMath(radical['m:e'])
34
+ return degree ? `\\sqrt[${degree}]{${expression}}` : `\\sqrt{${expression}}`
35
+ }
36
+ export function parseMatrix(matrix) {
37
+ const rows = matrix['m:mr']
38
+ const matrixRows = rows.map((row) => {
39
+ return row['m:e'].map((element) => parseOMath(element)).join(' & ')
40
+ })
41
+ return `\\begin{matrix} ${matrixRows.join(' \\\\ ')} \\end{matrix}`
42
+ }
43
+ export function parseNary(nary) {
44
+ const op = getTextByPathList(nary, ['m:naryPr', 'm:chr', 'attrs', 'm:val']) || '∫'
45
+ const sub = parseOMath(nary['m:sub'])
46
+ const sup = parseOMath(nary['m:sup'])
47
+ const e = parseOMath(nary['m:e'])
48
+ return `${op}_{${sub}}^{${sup}}{${e}}`
49
+ }
50
+ export function parseLimit(limit, type) {
51
+ const base = parseOMath(limit['m:e'])
52
+ const lim = parseOMath(limit['m:lim'])
53
+ return type === 'low' ? `${base}_{${lim}}` : `${base}^{${lim}}`
54
+ }
55
+ export function parseDelimiter(delimiter) {
56
+ let left = getTextByPathList(delimiter, ['m:dPr', 'm:begChr', 'attrs', 'm:val'])
57
+ let right = getTextByPathList(delimiter, ['m:dPr', 'm:endChr', 'attrs', 'm:val'])
58
+ if (!left && !right) {
59
+ left = '('
60
+ right = ')'
61
+ }
62
+ if (left && right) {
63
+ left = `\\left${left}`
64
+ right = `\\right${right}`
65
+ }
66
+ const e = parseOMath(delimiter['m:e'])
67
+ return `${left}${e}${right}`
68
+ }
69
+ export function parseFunction(func) {
70
+ const name = parseOMath(func['m:fName'])
71
+ const arg = parseOMath(func['m:e'])
72
+ return `\\${name}{${arg}}`
73
+ }
74
+ export function parseGroupChr(groupChr) {
75
+ const chr = getTextByPathList(groupChr, ['m:groupChrPr', 'm:chr', 'attrs', 'm:val'])
76
+ const e = parseOMath(groupChr['m:e'])
77
+ return `${chr}${e}${chr}`
78
+ }
79
+ export function parseEqArr(eqArr) {
80
+ const equations = eqArr['m:e'].map((eq) => parseOMath(eq)).join(' \\\\ ')
81
+ return `\\begin{cases} ${equations} \\end{cases}`
82
+ }
83
+ export function parseBar(bar) {
84
+ const e = parseOMath(bar['m:e'])
85
+ const pos = getTextByPathList(bar, ['m:barPr', 'm:pos', 'attrs', 'm:val'])
86
+ return pos === 'top' ? `\\overline{${e}}` : `\\underline{${e}}`
87
+ }
88
+ export function parseAccent(accent) {
89
+ const chr = getTextByPathList(accent, ['m:accPr', 'm:chr', 'attrs', 'm:val']) || '^'
90
+ const e = parseOMath(accent['m:e'])
91
+ switch (chr) {
92
+ case '\u0301':
93
+ return `\\acute{${e}}`
94
+ case '\u0300':
95
+ return `\\grave{${e}}`
96
+ case '\u0302':
97
+ return `\\hat{${e}}`
98
+ case '\u0303':
99
+ return `\\tilde{${e}}`
100
+ case '\u0304':
101
+ return `\\bar{${e}}`
102
+ case '\u0306':
103
+ return `\\breve{${e}}`
104
+ case '\u0307':
105
+ return `\\dot{${e}}`
106
+ case '\u0308':
107
+ return `\\ddot{${e}}`
108
+ case '\u030A':
109
+ return `\\mathring{${e}}`
110
+ case '\u030B':
111
+ return `\\H{${e}}`
112
+ case '\u030C':
113
+ return `\\check{${e}}`
114
+ case '\u0327':
115
+ return `\\c{${e}}`
116
+ default:
117
+ return `\\${chr}{${e}}`
118
+ }
119
+ }
120
+ export function parseBox(box) {
121
+ const e = parseOMath(box['m:e'])
122
+ return `\\boxed{${e}}`
123
+ }
124
+
125
+
126
+ export function parseOMath(oMath) {
127
+ if (!oMath) return ''
128
+
129
+ if (Array.isArray(oMath)) {
130
+ return oMath.map(item => parseOMath(item)).join('')
131
+ }
132
+
133
+ const oMathList = []
134
+ const keys = Object.keys(oMath)
135
+ for (const key of keys) {
136
+ if (Array.isArray(oMath[key])) {
137
+ oMathList.push(...oMath[key].map(item => ({ key, value: item })))
138
+ }
139
+ else oMathList.push({ key, value: oMath[key] })
140
+ }
141
+
142
+ oMathList.sort((a, b) => {
143
+ let oA = 0
144
+ if (a.key === 'm:r' && a.value && a.value['a:rPr']) oA = a.value['a:rPr']['attrs']['order']
145
+ else if (a.value[`${a.key}Pr`] && a.value[`${a.key}Pr`]['m:ctrlPr'] && a.value[`${a.key}Pr`]['m:ctrlPr']['a:rPr']) {
146
+ oA = a.value[`${a.key}Pr`] && a.value[`${a.key}Pr`]['m:ctrlPr'] && a.value[`${a.key}Pr`]['m:ctrlPr']['a:rPr'] && a.value[`${a.key}Pr`]['m:ctrlPr']['a:rPr']['attrs']['order']
147
+ }
148
+ let oB = 0
149
+ if (b.key === 'm:r' && b.value && b.value['a:rPr']) oB = b.value['a:rPr']['attrs']['order']
150
+ else if (b.value[`${b.key}Pr`] && b.value[`${b.key}Pr`]['m:ctrlPr'] && b.value[`${b.key}Pr`]['m:ctrlPr']['a:rPr']) {
151
+ oB = b.value[`${b.key}Pr`] && b.value[`${b.key}Pr`]['m:ctrlPr'] && b.value[`${b.key}Pr`]['m:ctrlPr']['a:rPr'] && b.value[`${b.key}Pr`]['m:ctrlPr']['a:rPr']['attrs']['order']
152
+ }
153
+ return oA - oB
154
+ })
155
+
156
+ return oMathList.map(({ key, value }) => {
157
+ if (key === 'm:f') return parseFraction(value)
158
+ if (key === 'm:sSup') return parseSuperscript(value)
159
+ if (key === 'm:sSub') return parseSubscript(value)
160
+ if (key === 'm:rad') return parseRadical(value)
161
+ if (key === 'm:nary') return parseNary(value)
162
+ if (key === 'm:limLow') return parseLimit(value, 'low')
163
+ if (key === 'm:limUpp') return parseLimit(value, 'upp')
164
+ if (key === 'm:d') return parseDelimiter(value)
165
+ if (key === 'm:func') return parseFunction(value)
166
+ if (key === 'm:groupChr') return parseGroupChr(value)
167
+ if (key === 'm:eqArr') return parseEqArr(value)
168
+ if (key === 'm:bar') return parseBar(value)
169
+ if (key === 'm:acc') return parseAccent(value)
170
+ if (key === 'm:borderBox') return parseBox(value)
171
+ if (key === 'm:m') return parseMatrix(value)
172
+ if (key === 'm:r') return parseOMath(value)
173
+ if (key === 'm:t') return value
174
+ return ''
175
+ }).join('')
176
+ }
177
+
178
+ export function latexFormart(latex) {
179
+ return latex.replaceAll(/&lt;/g, '<')
180
+ .replaceAll(/&gt;/g, '>')
181
+ .replaceAll(/&amp;/g, '&')
182
+ .replaceAll(/&apos;/g, "'")
183
+ .replaceAll(/&quot;/g, '"')
184
+ }
package/src/pptxtojson.js CHANGED
@@ -11,6 +11,7 @@ import { extractFileExtension, base64ArrayBuffer, getTextByPathList, angleToDegr
11
11
  import { getShadow } from './shadow'
12
12
  import { getTableBorders, getTableCellParams, getTableRowParams } from './table'
13
13
  import { RATIO_EMUs_Points } from './constants'
14
+ import { findOMath, latexFormart, parseOMath } from './math'
14
15
 
15
16
  export async function parse(file) {
16
17
  const slides = []
@@ -105,6 +106,7 @@ async function processSingleSlide(zip, sldFileName, themeContent, defaultTextSty
105
106
  let relationshipArray = resContent['Relationships']['Relationship']
106
107
  let layoutFilename = ''
107
108
  let diagramFilename = ''
109
+ let notesFilename = ''
108
110
  const slideResObj = {}
109
111
 
110
112
  if (relationshipArray.constructor === Array) {
@@ -121,6 +123,8 @@ async function processSingleSlide(zip, sldFileName, themeContent, defaultTextSty
121
123
  }
122
124
  break
123
125
  case 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide':
126
+ notesFilename = relationshipArrayItem['attrs']['Target'].replace('../', 'ppt/')
127
+ break
124
128
  case 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image':
125
129
  case 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/chart':
126
130
  case 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink':
@@ -133,6 +137,9 @@ async function processSingleSlide(zip, sldFileName, themeContent, defaultTextSty
133
137
  }
134
138
  }
135
139
  else layoutFilename = relationshipArray['attrs']['Target'].replace('../', 'ppt/')
140
+
141
+ const slideNotesContent = await readXmlFile(zip, notesFilename)
142
+ const note = getNote(slideNotesContent)
136
143
 
137
144
  const slideLayoutContent = await readXmlFile(zip, layoutFilename)
138
145
  const slideLayoutTables = await indexNodes(slideLayoutContent)
@@ -283,9 +290,29 @@ async function processSingleSlide(zip, sldFileName, themeContent, defaultTextSty
283
290
  return {
284
291
  fill: bgColor,
285
292
  elements,
293
+ note,
286
294
  }
287
295
  }
288
296
 
297
+ function getNote(noteContent) {
298
+ let text = ''
299
+ let spNodes = getTextByPathList(noteContent, ['p:notes', 'p:cSld', 'p:spTree', 'p:sp'])
300
+ if (!spNodes) return ''
301
+
302
+ if (spNodes.constructor !== Array) spNodes = [spNodes]
303
+ for (const spNode of spNodes) {
304
+ let rNodes = getTextByPathList(spNode, ['p:txBody', 'a:p', 'a:r'])
305
+ if (!rNodes) continue
306
+
307
+ if (rNodes.constructor !== Array) rNodes = [rNodes]
308
+ for (const rNode of rNodes) {
309
+ const t = getTextByPathList(rNode, ['a:t'])
310
+ if (t) text += t
311
+ }
312
+ }
313
+ return text
314
+ }
315
+
289
316
  // async function getBackground(warpObj) {
290
317
  // const elements = []
291
318
  // const slideLayoutContent = warpObj['slideLayoutContent']
@@ -390,7 +417,12 @@ async function processNodesInSlide(nodeKey, nodeValue, warpObj, source) {
390
417
  json = await processGroupSpNode(nodeValue, warpObj, source)
391
418
  break
392
419
  case 'mc:AlternateContent':
393
- json = await processGroupSpNode(getTextByPathList(nodeValue, ['mc:Fallback']), warpObj, source)
420
+ if (getTextByPathList(nodeValue, ['mc:Fallback', 'p:grpSpPr', 'a:xfrm'])) {
421
+ json = await processGroupSpNode(getTextByPathList(nodeValue, ['mc:Fallback']), warpObj, source)
422
+ }
423
+ else if (getTextByPathList(nodeValue, ['mc:Choice'])) {
424
+ json = await processMathNode(getTextByPathList(nodeValue, ['mc:Choice']))
425
+ }
394
426
  break
395
427
  default:
396
428
  }
@@ -398,6 +430,24 @@ async function processNodesInSlide(nodeKey, nodeValue, warpObj, source) {
398
430
  return json
399
431
  }
400
432
 
433
+ function processMathNode(node) {
434
+ const xfrmNode = getTextByPathList(node, ['p:sp', 'p:spPr', 'a:xfrm'])
435
+ const { top, left } = getPosition(xfrmNode, undefined, undefined)
436
+ const { width, height } = getSize(xfrmNode, undefined, undefined)
437
+
438
+ const oMath = findOMath(node)[0]
439
+ const latex = latexFormart(parseOMath(oMath))
440
+
441
+ return {
442
+ type: 'math',
443
+ top,
444
+ left,
445
+ width,
446
+ height,
447
+ latex,
448
+ }
449
+ }
450
+
401
451
  async function processGroupSpNode(node, warpObj, source) {
402
452
  const xfrmNode = getTextByPathList(node, ['p:grpSpPr', 'a:xfrm'])
403
453
  if (!xfrmNode) return null