bbcode-compiler 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +77 -0
  3. package/dist/generateHtml.d.ts +2 -0
  4. package/dist/generateHtml.d.ts.map +1 -0
  5. package/dist/generateHtml.js +13 -0
  6. package/dist/generateHtml.js.map +1 -0
  7. package/dist/generator/Generator.d.ts +8 -0
  8. package/dist/generator/Generator.d.ts.map +1 -0
  9. package/dist/generator/Generator.js +54 -0
  10. package/dist/generator/Generator.js.map +1 -0
  11. package/dist/generator/transforms/Transform.d.ts +10 -0
  12. package/dist/generator/transforms/Transform.d.ts.map +1 -0
  13. package/dist/generator/transforms/Transform.js +2 -0
  14. package/dist/generator/transforms/Transform.js.map +1 -0
  15. package/dist/generator/transforms/htmlTransforms.d.ts +3 -0
  16. package/dist/generator/transforms/htmlTransforms.d.ts.map +1 -0
  17. package/dist/generator/transforms/htmlTransforms.js +198 -0
  18. package/dist/generator/transforms/htmlTransforms.js.map +1 -0
  19. package/dist/generator/utils/getTagImmediateAttrVal.d.ts +14 -0
  20. package/dist/generator/utils/getTagImmediateAttrVal.d.ts.map +1 -0
  21. package/dist/generator/utils/getTagImmediateAttrVal.js +19 -0
  22. package/dist/generator/utils/getTagImmediateAttrVal.js.map +1 -0
  23. package/dist/generator/utils/getTagImmediateText.d.ts +12 -0
  24. package/dist/generator/utils/getTagImmediateText.d.ts.map +1 -0
  25. package/dist/generator/utils/getTagImmediateText.js +29 -0
  26. package/dist/generator/utils/getTagImmediateText.js.map +1 -0
  27. package/dist/generator/utils/getWidthHeightAttr.d.ts +31 -0
  28. package/dist/generator/utils/getWidthHeightAttr.d.ts.map +1 -0
  29. package/dist/generator/utils/getWidthHeightAttr.js +47 -0
  30. package/dist/generator/utils/getWidthHeightAttr.js.map +1 -0
  31. package/dist/generator/utils/isDangerousUrl.d.ts +2 -0
  32. package/dist/generator/utils/isDangerousUrl.d.ts.map +1 -0
  33. package/dist/generator/utils/isDangerousUrl.js +14 -0
  34. package/dist/generator/utils/isDangerousUrl.js.map +1 -0
  35. package/dist/generator/utils/isOrderedList.d.ts +19 -0
  36. package/dist/generator/utils/isOrderedList.d.ts.map +1 -0
  37. package/dist/generator/utils/isOrderedList.js +26 -0
  38. package/dist/generator/utils/isOrderedList.js.map +1 -0
  39. package/dist/index.d.ts +16 -0
  40. package/dist/index.d.ts.map +1 -0
  41. package/dist/index.js +16 -0
  42. package/dist/index.js.map +1 -0
  43. package/dist/lexer/Lexer.d.ts +5 -0
  44. package/dist/lexer/Lexer.d.ts.map +1 -0
  45. package/dist/lexer/Lexer.js +81 -0
  46. package/dist/lexer/Lexer.js.map +1 -0
  47. package/dist/lexer/Token.d.ts +8 -0
  48. package/dist/lexer/Token.d.ts.map +1 -0
  49. package/dist/lexer/Token.js +54 -0
  50. package/dist/lexer/Token.js.map +1 -0
  51. package/dist/lexer/TokenType.d.ts +17 -0
  52. package/dist/lexer/TokenType.d.ts.map +1 -0
  53. package/dist/lexer/TokenType.js +41 -0
  54. package/dist/lexer/TokenType.js.map +1 -0
  55. package/dist/parser/AstNode.d.ts +105 -0
  56. package/dist/parser/AstNode.d.ts.map +1 -0
  57. package/dist/parser/AstNode.js +263 -0
  58. package/dist/parser/AstNode.js.map +1 -0
  59. package/dist/parser/Parser.d.ts +11 -0
  60. package/dist/parser/Parser.d.ts.map +1 -0
  61. package/dist/parser/Parser.js +265 -0
  62. package/dist/parser/Parser.js.map +1 -0
  63. package/dist/parser/nodeIsType.d.ts +13 -0
  64. package/dist/parser/nodeIsType.d.ts.map +1 -0
  65. package/dist/parser/nodeIsType.js +5 -0
  66. package/dist/parser/nodeIsType.js.map +1 -0
  67. package/package.json +68 -0
  68. package/src/generateHtml.ts +15 -0
  69. package/src/generator/Generator.ts +60 -0
  70. package/src/generator/transforms/Transform.ts +15 -0
  71. package/src/generator/transforms/htmlTransforms.ts +205 -0
  72. package/src/generator/utils/getTagImmediateAttrVal.ts +21 -0
  73. package/src/generator/utils/getTagImmediateText.ts +33 -0
  74. package/src/generator/utils/getWidthHeightAttr.ts +51 -0
  75. package/src/generator/utils/isDangerousUrl.ts +17 -0
  76. package/src/generator/utils/isOrderedList.ts +28 -0
  77. package/src/index.ts +18 -0
  78. package/src/lexer/Lexer.ts +89 -0
  79. package/src/lexer/Token.ts +64 -0
  80. package/src/lexer/TokenType.ts +65 -0
  81. package/src/parser/AstNode.ts +338 -0
  82. package/src/parser/Parser.ts +316 -0
  83. package/src/parser/nodeIsType.ts +15 -0
@@ -0,0 +1,316 @@
1
+ import { htmlTransforms } from '../generator/transforms/htmlTransforms'
2
+ import { stringifyTokens, Token } from '../lexer/Token'
3
+ import { isStringToken, TokenType } from '../lexer/TokenType'
4
+ import { RootNode, AttrNode, TextNode, LinebreakNode, StartTagNode, EndTagNode, AstNodeType, TagNode, AstNode } from './AstNode'
5
+ import { nodeIsType } from './nodeIsType'
6
+
7
+ export class Parser {
8
+ readonly tags: Set<string>
9
+ readonly linebreakTerminatedTags: Set<string>
10
+ readonly standaloneTags: Set<string>
11
+
12
+ constructor(transforms = htmlTransforms) {
13
+ this.tags = new Set(transforms.map((transform) => transform.name))
14
+ this.linebreakTerminatedTags = new Set(transforms.filter((transform) => transform.isLinebreakTerminated).map((transform) => transform.name.toLowerCase()))
15
+ this.standaloneTags = new Set(transforms.filter((transform) => transform.isStandalone).map((transform) => transform.name.toLowerCase()))
16
+ }
17
+
18
+ parse(ogText: string, tokens: Array<Token>): RootNode {
19
+ let idx = 0
20
+
21
+ const parseRoot = (): RootNode => {
22
+ const root = new RootNode()
23
+
24
+ while (idx < tokens.length) {
25
+ if (tokens[idx].type === TokenType.L_BRACKET) {
26
+ const startIdx = idx
27
+ const tagNode = parseTag()
28
+
29
+ if (tagNode !== null) {
30
+ root.addChild(tagNode)
31
+ } else {
32
+ const invalidTokens = tokens.slice(startIdx, idx)
33
+ const str = stringifyTokens(ogText, invalidTokens)
34
+ const textNode = new TextNode(str)
35
+ root.addChild(textNode)
36
+ }
37
+ } else if (tokens[idx].type === TokenType.LINEBREAK) {
38
+ idx += 1 // Consume LINEBREAK
39
+ root.addChild(new LinebreakNode())
40
+ } else {
41
+ const startIdx = idx
42
+
43
+ // Advance until we see the start of another RootNode's child (TagNode or LinebreakNode)
44
+ while (idx < tokens.length && tokens[idx].type !== TokenType.L_BRACKET && tokens[idx].type !== TokenType.LINEBREAK) {
45
+ idx += 1
46
+ }
47
+
48
+ const slice = tokens.slice(startIdx, idx)
49
+ const str = stringifyTokens(ogText, slice)
50
+ root.addChild(new TextNode(str))
51
+ }
52
+ }
53
+
54
+ return root
55
+ }
56
+
57
+ const parseTag = (): StartTagNode | EndTagNode | null => {
58
+ if (idx + 1 >= tokens.length) {
59
+ return null
60
+ }
61
+
62
+ if (tokens[idx].type !== TokenType.L_BRACKET) {
63
+ return null
64
+ }
65
+
66
+ // If L_BRACKET is followed by text, then it must be StartTag or is invalid
67
+ if (isStringToken(tokens[idx + 1].type)) {
68
+ const startIdx = idx
69
+ idx += 1 // Consume L_BRACKET
70
+
71
+ const labelText = parseLabel()
72
+ if (!this.tags.has(labelText)) {
73
+ return null
74
+ }
75
+
76
+ const attrNodes = new Array<AttrNode>()
77
+ while (true) {
78
+ const attrNode = parseAttr()
79
+ if (attrNode === null) {
80
+ break
81
+ }
82
+
83
+ attrNodes.push(attrNode)
84
+ }
85
+
86
+ if (tokens[idx].type !== TokenType.R_BRACKET) {
87
+ return null
88
+ }
89
+
90
+ idx += 1 // Consume R_BRACKET
91
+
92
+ const slice = tokens.slice(startIdx, idx)
93
+ const ogTag = stringifyTokens(ogText, slice)
94
+ const startTagNode = new StartTagNode(labelText, ogTag, attrNodes)
95
+ return startTagNode
96
+ }
97
+
98
+ // If L_BRACKET is followed by BACKSLASH, then it must be EndTag or is invalid
99
+ if (tokens[idx + 1].type === TokenType.BACKSLASH) {
100
+ const startIdx = idx
101
+ idx += 1 // Consume L_BRACKET
102
+ idx += 1 // Consume BACKSLASH
103
+
104
+ const labelText = parseLabel()
105
+ if (!this.tags.has(labelText)) {
106
+ return null
107
+ }
108
+
109
+ if (tokens[idx].type !== TokenType.R_BRACKET) {
110
+ return null
111
+ }
112
+
113
+ idx += 1 // Consume R_BRACKET
114
+
115
+ const slice = tokens.slice(startIdx, idx)
116
+ const ogTag = stringifyTokens(ogText, slice)
117
+ const endTagNode = new EndTagNode(labelText, ogTag)
118
+ return endTagNode
119
+ }
120
+
121
+ return null
122
+ }
123
+
124
+ const parseLabel = (): string => {
125
+ const slice = tokens.slice(idx, idx + 1)
126
+ const label = stringifyTokens(ogText, slice)
127
+ idx += 1 // Consume LABEL
128
+ return label.toLowerCase()
129
+ }
130
+
131
+ const parseText = (endOnQuotes = false, endOnSpace = false): TextNode => {
132
+ const startIdx = idx
133
+
134
+ while (idx < tokens.length) {
135
+ if (!isStringToken(tokens[idx].type)) {
136
+ break
137
+ }
138
+
139
+ if (endOnQuotes && (tokens[idx].type === TokenType.XSS_S_QUOTE || tokens[idx].type === TokenType.XSS_D_QUOTE)) {
140
+ break
141
+ }
142
+
143
+ /**
144
+ * If this text must end on space, then it must not endOnQuote (implies that the space is part of the entire text)
145
+ * When we encounter a space, then we must split the current token into 2 tokens and only consume the first half
146
+ *
147
+ * <a b> -> <a>< b>
148
+ * | | |
149
+ * | | idx (new)
150
+ * | |
151
+ * idx (consumed)
152
+ */
153
+ if (endOnSpace && !endOnQuotes) {
154
+ const origStr = stringifyTokens(ogText, [tokens[idx]])
155
+ const spaceIdx = origStr.indexOf(' ')
156
+
157
+ if (spaceIdx >= 0) {
158
+ const oldToken: Token = {
159
+ type: TokenType.STR,
160
+ offset: tokens[idx].offset,
161
+ length: spaceIdx,
162
+ }
163
+
164
+ const newToken: Token = {
165
+ type: TokenType.STR,
166
+ offset: tokens[idx].offset + spaceIdx,
167
+ length: tokens[idx].length - spaceIdx,
168
+ }
169
+
170
+ tokens.splice(idx + 0, 1, oldToken)
171
+ tokens.splice(idx + 1, 0, newToken)
172
+ idx += 1
173
+ break
174
+ }
175
+ }
176
+
177
+ idx += 1
178
+ }
179
+
180
+ const slice = tokens.slice(startIdx, idx)
181
+ const str = stringifyTokens(ogText, slice)
182
+
183
+ return new TextNode(str)
184
+ }
185
+
186
+ const parseAttr = (): AttrNode | null => {
187
+ if (idx + 1 >= tokens.length) {
188
+ return null
189
+ }
190
+
191
+ const attrNode = new AttrNode()
192
+
193
+ if (tokens[idx].type === TokenType.EQUALS && isStringToken(tokens[idx + 1].type)) { // [Tag = VAL ...] or [Tag = "VAL"]
194
+ idx += 1 // Consume EQUALS
195
+
196
+ const openedWithQuotes = tokens[idx].type === TokenType.XSS_S_QUOTE || tokens[idx].type === TokenType.XSS_D_QUOTE
197
+ if (openedWithQuotes) {
198
+ idx += 1
199
+ }
200
+
201
+ const valNode = parseText(openedWithQuotes, true)
202
+ attrNode.addChild(valNode)
203
+
204
+ if (openedWithQuotes) {
205
+ if (tokens[idx].type !== TokenType.XSS_S_QUOTE && tokens[idx].type !== TokenType.XSS_D_QUOTE) {
206
+ return null
207
+ }
208
+
209
+ idx += 1
210
+ }
211
+ } else if (isStringToken(tokens[idx].type) && tokens[idx + 1].type === TokenType.EQUALS && (idx + 2 < tokens.length && isStringToken(tokens[idx + 2].type))) { // [Tag KEY = VAL ...] or [Tag KEY = "VAL" ...]
212
+ const keyNode = parseText()
213
+ attrNode.addChild(keyNode)
214
+
215
+ idx += 1 // Consume EQUALS
216
+
217
+ const openedWithQuotes = tokens[idx].type === TokenType.XSS_S_QUOTE || tokens[idx].type === TokenType.XSS_D_QUOTE
218
+ if (openedWithQuotes) {
219
+ idx += 1
220
+ }
221
+
222
+ const valNode = parseText(openedWithQuotes, true)
223
+
224
+ if (openedWithQuotes) {
225
+ if (tokens[idx].type !== TokenType.XSS_S_QUOTE && tokens[idx].type !== TokenType.XSS_D_QUOTE) {
226
+ return null
227
+ }
228
+
229
+ idx += 1
230
+ }
231
+
232
+ attrNode.addChild(valNode)
233
+ } else if (isStringToken(tokens[idx].type) && tokens[idx + 1].type !== TokenType.EQUALS) { // [Tag VAL ...]
234
+ const valNode = parseText()
235
+ attrNode.addChild(valNode)
236
+ } else {
237
+ return null
238
+ }
239
+
240
+ return attrNode
241
+ }
242
+
243
+ let root = parseRoot()
244
+ root = this.#matchTagNodes(root)
245
+ return root
246
+ }
247
+
248
+ // ------------------------------------------------------------------------
249
+ // Post Parsing Transforms
250
+ // ------------------------------------------------------------------------
251
+
252
+ #matchTagNodes(rootNode: RootNode): RootNode {
253
+ const transformedRoot = new RootNode()
254
+
255
+ for (let i = 0; i < rootNode.children.length; i++) {
256
+ const child = rootNode.children[i]
257
+
258
+ if (nodeIsType(child, AstNodeType.StartTagNode)) {
259
+ const endTag = this.#findMatchingEndTag(rootNode.children, i, child.tagName)
260
+ const isStandalone = this.standaloneTags.has(child.tagName)
261
+
262
+ if (endTag || isStandalone) {
263
+ const tagNode = new TagNode(child, endTag?.node)
264
+ transformedRoot.addChild(tagNode)
265
+
266
+ // If matching end tag exists, consume all nodes between start/end (exclusive) as a subtree
267
+ if (endTag) {
268
+ const subRoot = new RootNode(rootNode.children.slice(i + 1, endTag.idx))
269
+ i = endTag.idx
270
+
271
+ const transformedSubRoot = this.#matchTagNodes(subRoot)
272
+ tagNode.addChild(transformedSubRoot)
273
+ }
274
+ } else {
275
+ // If no end tag exists, then treat tag as string literal
276
+ transformedRoot.addChild(new TextNode(child.ogTag))
277
+ }
278
+ } else if (nodeIsType(child, AstNodeType.EndTagNode)) {
279
+ // Encountered end tag when we're not expecting an end tag so we treat it as a string literal
280
+ transformedRoot.addChild(new TextNode(child.ogTag))
281
+ } else if (nodeIsType(child, AstNodeType.TextNode)) {
282
+ // Normal text nodes get copied
283
+ transformedRoot.addChild(child)
284
+ } else if (nodeIsType(child, AstNodeType.LinebreakNode)) {
285
+ // Linebreak nodes get copied
286
+ transformedRoot.addChild(child)
287
+ } else {
288
+ throw new Error('Unexpected child of RootNode')
289
+ }
290
+ }
291
+
292
+ return transformedRoot
293
+ }
294
+
295
+ #findMatchingEndTag(siblings: Array<AstNode>, startIdx: number, tagName: string): { idx: number; node: EndTagNode | LinebreakNode } | null {
296
+ if (this.standaloneTags.has(tagName)) {
297
+ return null
298
+ }
299
+
300
+ for (let i = startIdx; i < siblings.length; i++) {
301
+ const sibling = siblings[i]
302
+ const isEndTag =
303
+ (nodeIsType(sibling, AstNodeType.LinebreakNode) && this.linebreakTerminatedTags.has(tagName)) ||
304
+ (nodeIsType(sibling, AstNodeType.EndTagNode) && sibling.tagName === tagName)
305
+
306
+ if (isEndTag) {
307
+ return {
308
+ idx: i,
309
+ node: sibling,
310
+ }
311
+ }
312
+ }
313
+
314
+ return null
315
+ }
316
+ }
@@ -0,0 +1,15 @@
1
+ import { AstNodeType, AstNode, AttrNode, RootNode, TagNode, TextNode, LinebreakNode, EndTagNode, StartTagNode } from './AstNode'
2
+
3
+ interface AstMap {
4
+ [AstNodeType.RootNode]: RootNode
5
+ [AstNodeType.LinebreakNode]: LinebreakNode
6
+ [AstNodeType.TextNode]: TextNode
7
+ [AstNodeType.TagNode]: TagNode
8
+ [AstNodeType.StartTagNode]: StartTagNode
9
+ [AstNodeType.EndTagNode]: EndTagNode
10
+ [AstNodeType.AttrNode]: AttrNode
11
+ }
12
+
13
+ export function nodeIsType<T extends keyof AstMap>(node: AstNode, nodeType: T): node is AstMap[T] {
14
+ return node.nodeType === nodeType
15
+ }