cui-llama.rn 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +20 -0
- package/README.md +330 -0
- package/android/build.gradle +107 -0
- package/android/gradle.properties +5 -0
- package/android/src/main/AndroidManifest.xml +4 -0
- package/android/src/main/CMakeLists.txt +69 -0
- package/android/src/main/java/com/rnllama/LlamaContext.java +353 -0
- package/android/src/main/java/com/rnllama/RNLlama.java +446 -0
- package/android/src/main/java/com/rnllama/RNLlamaPackage.java +48 -0
- package/android/src/main/jni.cpp +635 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +94 -0
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +95 -0
- package/cpp/README.md +4 -0
- package/cpp/common.cpp +3237 -0
- package/cpp/common.h +467 -0
- package/cpp/ggml-aarch64.c +2193 -0
- package/cpp/ggml-aarch64.h +39 -0
- package/cpp/ggml-alloc.c +1041 -0
- package/cpp/ggml-alloc.h +76 -0
- package/cpp/ggml-backend-impl.h +153 -0
- package/cpp/ggml-backend.c +2225 -0
- package/cpp/ggml-backend.h +236 -0
- package/cpp/ggml-common.h +1829 -0
- package/cpp/ggml-impl.h +655 -0
- package/cpp/ggml-metal.h +65 -0
- package/cpp/ggml-metal.m +3273 -0
- package/cpp/ggml-quants.c +15022 -0
- package/cpp/ggml-quants.h +132 -0
- package/cpp/ggml.c +22034 -0
- package/cpp/ggml.h +2444 -0
- package/cpp/grammar-parser.cpp +536 -0
- package/cpp/grammar-parser.h +29 -0
- package/cpp/json-schema-to-grammar.cpp +1045 -0
- package/cpp/json-schema-to-grammar.h +8 -0
- package/cpp/json.hpp +24766 -0
- package/cpp/llama.cpp +21789 -0
- package/cpp/llama.h +1201 -0
- package/cpp/log.h +737 -0
- package/cpp/rn-llama.hpp +630 -0
- package/cpp/sampling.cpp +460 -0
- package/cpp/sampling.h +160 -0
- package/cpp/sgemm.cpp +1027 -0
- package/cpp/sgemm.h +14 -0
- package/cpp/unicode-data.cpp +7032 -0
- package/cpp/unicode-data.h +20 -0
- package/cpp/unicode.cpp +812 -0
- package/cpp/unicode.h +64 -0
- package/ios/RNLlama.h +11 -0
- package/ios/RNLlama.mm +302 -0
- package/ios/RNLlama.xcodeproj/project.pbxproj +278 -0
- package/ios/RNLlamaContext.h +39 -0
- package/ios/RNLlamaContext.mm +426 -0
- package/jest/mock.js +169 -0
- package/lib/commonjs/NativeRNLlama.js +10 -0
- package/lib/commonjs/NativeRNLlama.js.map +1 -0
- package/lib/commonjs/grammar.js +574 -0
- package/lib/commonjs/grammar.js.map +1 -0
- package/lib/commonjs/index.js +151 -0
- package/lib/commonjs/index.js.map +1 -0
- package/lib/module/NativeRNLlama.js +3 -0
- package/lib/module/NativeRNLlama.js.map +1 -0
- package/lib/module/grammar.js +566 -0
- package/lib/module/grammar.js.map +1 -0
- package/lib/module/index.js +129 -0
- package/lib/module/index.js.map +1 -0
- package/lib/typescript/NativeRNLlama.d.ts +107 -0
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -0
- package/lib/typescript/grammar.d.ts +38 -0
- package/lib/typescript/grammar.d.ts.map +1 -0
- package/lib/typescript/index.d.ts +46 -0
- package/lib/typescript/index.d.ts.map +1 -0
- package/llama-rn.podspec +56 -0
- package/package.json +230 -0
- package/src/NativeRNLlama.ts +132 -0
- package/src/grammar.ts +849 -0
- package/src/index.ts +182 -0
package/src/grammar.ts
ADDED
@@ -0,0 +1,849 @@
|
|
1
|
+
/* eslint-disable no-restricted-syntax */
|
2
|
+
/* eslint-disable no-underscore-dangle */
|
3
|
+
const SPACE_RULE = '" "?'
|
4
|
+
|
5
|
+
function buildRepetition(
|
6
|
+
itemRule: string,
|
7
|
+
minItems: number,
|
8
|
+
maxItems: number | undefined,
|
9
|
+
opts: {
|
10
|
+
separatorRule?: string
|
11
|
+
itemRuleIsLiteral?: boolean
|
12
|
+
} = {},
|
13
|
+
) {
|
14
|
+
const separatorRule = opts.separatorRule ?? ''
|
15
|
+
const itemRuleIsLiteral = opts.itemRuleIsLiteral ?? false
|
16
|
+
|
17
|
+
if (separatorRule === '') {
|
18
|
+
if (minItems === 0 && maxItems === 1) {
|
19
|
+
return `${itemRule}?`
|
20
|
+
} else if (minItems === 1 && maxItems === undefined) {
|
21
|
+
return `${itemRule}+`
|
22
|
+
}
|
23
|
+
}
|
24
|
+
|
25
|
+
let result = ''
|
26
|
+
if (minItems > 0) {
|
27
|
+
if (itemRuleIsLiteral && separatorRule === '') {
|
28
|
+
result = `"${itemRule.slice(1, -1).repeat(minItems)}"`
|
29
|
+
} else {
|
30
|
+
result = Array.from({ length: minItems }, () => itemRule).join(
|
31
|
+
separatorRule !== '' ? ` ${separatorRule} ` : ' ',
|
32
|
+
)
|
33
|
+
}
|
34
|
+
}
|
35
|
+
|
36
|
+
const optRepetitions = (upToN: number, prefixWithSep = false): string => {
|
37
|
+
const content =
|
38
|
+
separatorRule !== '' && prefixWithSep
|
39
|
+
? `${separatorRule} ${itemRule}`
|
40
|
+
: itemRule
|
41
|
+
if (upToN === 0) {
|
42
|
+
return ''
|
43
|
+
} else if (upToN === 1) {
|
44
|
+
return `(${content})?`
|
45
|
+
} else if (separatorRule !== '' && !prefixWithSep) {
|
46
|
+
return `(${content} ${optRepetitions(upToN - 1, true)})?`
|
47
|
+
} else {
|
48
|
+
return (
|
49
|
+
Array.from({ length: upToN }, () => `(${content}`)
|
50
|
+
.join(' ')
|
51
|
+
.trim() + Array.from({ length: upToN }, () => ')?').join('')
|
52
|
+
)
|
53
|
+
}
|
54
|
+
}
|
55
|
+
|
56
|
+
if (minItems > 0 && maxItems !== minItems) {
|
57
|
+
result += ' '
|
58
|
+
}
|
59
|
+
|
60
|
+
if (maxItems !== undefined) {
|
61
|
+
result += optRepetitions(maxItems - minItems, minItems > 0)
|
62
|
+
} else {
|
63
|
+
const itemOperator = `(${
|
64
|
+
separatorRule !== '' ? `${separatorRule} ` : ''
|
65
|
+
}${itemRule})`
|
66
|
+
|
67
|
+
if (minItems === 0 && separatorRule !== '') {
|
68
|
+
result = `(${itemRule} ${itemOperator}*)?`
|
69
|
+
} else {
|
70
|
+
result += `${itemOperator}*`
|
71
|
+
}
|
72
|
+
}
|
73
|
+
|
74
|
+
return result
|
75
|
+
}
|
76
|
+
|
77
|
+
class BuiltinRule {
|
78
|
+
content: string
|
79
|
+
|
80
|
+
deps: string[]
|
81
|
+
|
82
|
+
constructor(content: string, deps: string[]) {
|
83
|
+
this.content = content
|
84
|
+
this.deps = deps || []
|
85
|
+
}
|
86
|
+
}
|
87
|
+
|
88
|
+
const UP_TO_15_DIGITS = buildRepetition('[0-9]', 0, 15)
|
89
|
+
|
90
|
+
const PRIMITIVE_RULES: { [key: string]: BuiltinRule } = {
|
91
|
+
boolean: new BuiltinRule('("true" | "false") space', []),
|
92
|
+
'decimal-part': new BuiltinRule(`[0-9] ${UP_TO_15_DIGITS}`, []),
|
93
|
+
'integral-part': new BuiltinRule(`[0-9] | [1-9] ${UP_TO_15_DIGITS}`, []),
|
94
|
+
number: new BuiltinRule(
|
95
|
+
'("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space',
|
96
|
+
['integral-part', 'decimal-part'],
|
97
|
+
),
|
98
|
+
integer: new BuiltinRule('("-"? integral-part) space', ['integral-part']),
|
99
|
+
value: new BuiltinRule('object | array | string | number | boolean | null', [
|
100
|
+
'object',
|
101
|
+
'array',
|
102
|
+
'string',
|
103
|
+
'number',
|
104
|
+
'boolean',
|
105
|
+
'null',
|
106
|
+
]),
|
107
|
+
object: new BuiltinRule(
|
108
|
+
'"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space',
|
109
|
+
['string', 'value'],
|
110
|
+
),
|
111
|
+
array: new BuiltinRule('"[" space ( value ("," space value)* )? "]" space', [
|
112
|
+
'value',
|
113
|
+
]),
|
114
|
+
uuid: new BuiltinRule(
|
115
|
+
`"\\"" ${[8, 4, 4, 4, 12]
|
116
|
+
.map((n) => [...new Array(n)].map((_) => '[0-9a-fA-F]').join(''))
|
117
|
+
.join(' "-" ')} "\\"" space`,
|
118
|
+
[],
|
119
|
+
),
|
120
|
+
char: new BuiltinRule(
|
121
|
+
`[^"\\\\] | "\\\\" (["\\\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])`,
|
122
|
+
[],
|
123
|
+
),
|
124
|
+
string: new BuiltinRule(`"\\"" char* "\\"" space`, ['char']),
|
125
|
+
null: new BuiltinRule('"null" space', []),
|
126
|
+
}
|
127
|
+
|
128
|
+
// TODO: support "uri", "email" string formats
|
129
|
+
const STRING_FORMAT_RULES: { [key: string]: BuiltinRule } = {
|
130
|
+
date: new BuiltinRule(
|
131
|
+
'[0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( "0" [1-9] | [1-2] [0-9] | "3" [0-1] )',
|
132
|
+
[],
|
133
|
+
),
|
134
|
+
time: new BuiltinRule(
|
135
|
+
'([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )',
|
136
|
+
[],
|
137
|
+
),
|
138
|
+
'date-time': new BuiltinRule('date "T" time', ['date', 'time']),
|
139
|
+
'date-string': new BuiltinRule('"\\"" date "\\"" space', ['date']),
|
140
|
+
'time-string': new BuiltinRule('"\\"" time "\\"" space', ['time']),
|
141
|
+
'date-time-string': new BuiltinRule('"\\"" date-time "\\"" space', [
|
142
|
+
'date-time',
|
143
|
+
]),
|
144
|
+
}
|
145
|
+
|
146
|
+
const RESERVED_NAMES = {
|
147
|
+
root: true,
|
148
|
+
...PRIMITIVE_RULES,
|
149
|
+
...STRING_FORMAT_RULES,
|
150
|
+
}
|
151
|
+
|
152
|
+
const INVALID_RULE_CHARS_RE = /[^\dA-Za-z-]+/g
|
153
|
+
const GRAMMAR_LITERAL_ESCAPE_RE = /[\n\r"]/g
|
154
|
+
const GRAMMAR_LITERAL_ESCAPES: any = {
|
155
|
+
'\r': '\\r',
|
156
|
+
'\n': '\\n',
|
157
|
+
'"': '\\"',
|
158
|
+
'-': '\\-',
|
159
|
+
']': '\\]',
|
160
|
+
}
|
161
|
+
|
162
|
+
const NON_LITERAL_SET = new Set('|.()[]{}*+?')
|
163
|
+
const ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = new Set('[]()|{}*+?')
|
164
|
+
|
165
|
+
const formatLiteral = (literal: string): string => {
|
166
|
+
const escaped = literal.replace(
|
167
|
+
GRAMMAR_LITERAL_ESCAPE_RE,
|
168
|
+
(m) => GRAMMAR_LITERAL_ESCAPES[m] || '',
|
169
|
+
)
|
170
|
+
return `"${escaped}"`
|
171
|
+
}
|
172
|
+
|
173
|
+
const generateConstantRule = (value: any): string =>
|
174
|
+
formatLiteral(JSON.stringify(value))
|
175
|
+
|
176
|
+
interface PropOrder {
|
177
|
+
[key: string]: number
|
178
|
+
}
|
179
|
+
|
180
|
+
// Helper function to group elements by a key function
|
181
|
+
function* groupBy(iterable: Iterable<any>, keyFn: (x: any) => any) {
|
182
|
+
let lastKey = null
|
183
|
+
let group = []
|
184
|
+
for (const element of iterable) {
|
185
|
+
const key = keyFn(element)
|
186
|
+
if (lastKey !== null && key !== lastKey) {
|
187
|
+
yield [lastKey, group]
|
188
|
+
group = []
|
189
|
+
}
|
190
|
+
group.push(element)
|
191
|
+
lastKey = key
|
192
|
+
}
|
193
|
+
if (group.length > 0) {
|
194
|
+
yield [lastKey, group]
|
195
|
+
}
|
196
|
+
}
|
197
|
+
|
198
|
+
export class SchemaGrammarConverter {
|
199
|
+
private _propOrder: PropOrder
|
200
|
+
|
201
|
+
private _allowFetch: boolean
|
202
|
+
|
203
|
+
private _dotall: boolean
|
204
|
+
|
205
|
+
private _rules: { [key: string]: string }
|
206
|
+
|
207
|
+
private _refs: { [key: string]: any }
|
208
|
+
|
209
|
+
private _refsBeingResolved: Set<string>
|
210
|
+
|
211
|
+
constructor(options: {
|
212
|
+
prop_order?: PropOrder
|
213
|
+
allow_fetch?: boolean
|
214
|
+
dotall?: boolean
|
215
|
+
}) {
|
216
|
+
this._propOrder = options.prop_order || {}
|
217
|
+
this._allowFetch = options.allow_fetch || false
|
218
|
+
this._dotall = options.dotall || false
|
219
|
+
this._rules = { space: SPACE_RULE }
|
220
|
+
this._refs = {}
|
221
|
+
this._refsBeingResolved = new Set()
|
222
|
+
}
|
223
|
+
|
224
|
+
_addRule(name: string, rule: string): string {
|
225
|
+
const escName = name.replace(INVALID_RULE_CHARS_RE, '-')
|
226
|
+
let key = escName
|
227
|
+
|
228
|
+
if (escName in this._rules) {
|
229
|
+
if (this._rules[escName] === rule) {
|
230
|
+
return key
|
231
|
+
}
|
232
|
+
|
233
|
+
let i = 0
|
234
|
+
while (
|
235
|
+
`${escName}${i}` in this._rules &&
|
236
|
+
this._rules[`${escName}${i}`] !== rule
|
237
|
+
) {
|
238
|
+
i += 1
|
239
|
+
}
|
240
|
+
key = `${escName}${i}`
|
241
|
+
}
|
242
|
+
|
243
|
+
this._rules[key] = rule
|
244
|
+
return key
|
245
|
+
}
|
246
|
+
|
247
|
+
async resolveRefs(schema: any, url: string): Promise<any> {
|
248
|
+
const visit: any = async (n: any) => {
|
249
|
+
if (Array.isArray(n)) {
|
250
|
+
return Promise.all(n.map(visit))
|
251
|
+
} else if (typeof n === 'object' && n !== null) {
|
252
|
+
let ref = n.$ref
|
253
|
+
let target
|
254
|
+
if (ref !== undefined && !this._refs[ref]) {
|
255
|
+
if (ref.startsWith('https://')) {
|
256
|
+
if (!this._allowFetch) {
|
257
|
+
throw new Error(
|
258
|
+
'Fetching remote schemas is not allowed (use --allow-fetch for force)',
|
259
|
+
)
|
260
|
+
}
|
261
|
+
|
262
|
+
const fragSplit = ref.split('#')
|
263
|
+
const baseUrl = fragSplit[0]
|
264
|
+
|
265
|
+
target = this._refs[baseUrl]
|
266
|
+
if (!target) {
|
267
|
+
target = await this.resolveRefs(
|
268
|
+
await fetch(ref).then((res) => res.json()),
|
269
|
+
baseUrl,
|
270
|
+
)
|
271
|
+
this._refs[baseUrl] = target
|
272
|
+
}
|
273
|
+
|
274
|
+
if (
|
275
|
+
fragSplit.length === 1 ||
|
276
|
+
fragSplit[fragSplit.length - 1] === ''
|
277
|
+
) {
|
278
|
+
return target
|
279
|
+
}
|
280
|
+
} else if (ref.startsWith('#/')) {
|
281
|
+
target = schema
|
282
|
+
ref = `${url}${ref}`
|
283
|
+
n.$ref = ref
|
284
|
+
} else {
|
285
|
+
throw new Error(`Unsupported ref ${ref}`)
|
286
|
+
}
|
287
|
+
|
288
|
+
const selectors = ref.split('#')[1].split('/').slice(1)
|
289
|
+
for (const sel of selectors) {
|
290
|
+
if (!target || !(sel in target)) {
|
291
|
+
throw new Error(
|
292
|
+
`Error resolving ref ${ref}: ${sel} not in ${JSON.stringify(
|
293
|
+
target,
|
294
|
+
)}`,
|
295
|
+
)
|
296
|
+
}
|
297
|
+
target = target[sel]
|
298
|
+
}
|
299
|
+
|
300
|
+
this._refs[ref] = target
|
301
|
+
} else {
|
302
|
+
await Promise.all(Object.values(n).map(visit))
|
303
|
+
}
|
304
|
+
}
|
305
|
+
|
306
|
+
return n
|
307
|
+
}
|
308
|
+
|
309
|
+
return visit(schema)
|
310
|
+
}
|
311
|
+
|
312
|
+
_generateUnionRule(name: string, altSchemas: any[]): string {
|
313
|
+
return altSchemas
|
314
|
+
.map((altSchema, i) =>
|
315
|
+
this.visit(
|
316
|
+
altSchema,
|
317
|
+
`${name ?? ''}${name ? '-' : 'alternative-'}${i}`,
|
318
|
+
),
|
319
|
+
)
|
320
|
+
.join(' | ')
|
321
|
+
}
|
322
|
+
|
323
|
+
_visitPattern(pattern: string, name: string): string {
|
324
|
+
if (!pattern.startsWith('^') || !pattern.endsWith('$')) {
|
325
|
+
throw new Error('Pattern must start with "^" and end with "$"')
|
326
|
+
}
|
327
|
+
pattern = pattern.slice(1, -1)
|
328
|
+
const subRuleIds: { [key: string]: string } = {}
|
329
|
+
|
330
|
+
let i = 0
|
331
|
+
const { length } = pattern
|
332
|
+
|
333
|
+
const getDot = () => {
|
334
|
+
let rule
|
335
|
+
if (this._dotall) {
|
336
|
+
rule = '[\\U00000000-\\U0010FFFF]'
|
337
|
+
} else {
|
338
|
+
// Accept any character... except \n and \r line break chars (\x0A and \xOD)
|
339
|
+
rule = '[^\\x0A\\x0D]'
|
340
|
+
}
|
341
|
+
return this._addRule('dot', rule)
|
342
|
+
}
|
343
|
+
|
344
|
+
const toRule = ([s, isLiteral]: [string, boolean]) =>
|
345
|
+
isLiteral ? `"${s}"` : s
|
346
|
+
|
347
|
+
const transform = () => {
|
348
|
+
const start = i
|
349
|
+
// For each component of this sequence, store its string representation and whether it's a literal.
|
350
|
+
// We only need a flat structure here to apply repetition operators to the last item, and
|
351
|
+
// to merge literals at the and (we're parsing grouped ( sequences ) recursively and don't treat '|' specially
|
352
|
+
// (GBNF's syntax is luckily very close to regular expressions!)
|
353
|
+
const seq: Array<[string, boolean]> = []
|
354
|
+
|
355
|
+
const joinSeq = () => {
|
356
|
+
const ret = []
|
357
|
+
for (const [isLiteral, g] of groupBy(seq, (x) => x[1])) {
|
358
|
+
if (isLiteral) {
|
359
|
+
ret.push([[...g].map((x) => x[0]).join(''), true])
|
360
|
+
} else {
|
361
|
+
ret.push(...g)
|
362
|
+
}
|
363
|
+
}
|
364
|
+
if (ret.length === 1) {
|
365
|
+
return ret[0]
|
366
|
+
}
|
367
|
+
return [ret.map((x) => toRule(x)).join(' '), false]
|
368
|
+
}
|
369
|
+
|
370
|
+
while (i < length) {
|
371
|
+
const c = pattern[i]
|
372
|
+
if (c === '.') {
|
373
|
+
seq.push([getDot(), false])
|
374
|
+
i += 1
|
375
|
+
} else if (c === '(') {
|
376
|
+
i += 1
|
377
|
+
if (i < length) {
|
378
|
+
if (pattern[i] === '?') {
|
379
|
+
throw new Error(
|
380
|
+
`Unsupported pattern syntax "${pattern[i]}" at index ${i} of /${pattern}/`,
|
381
|
+
)
|
382
|
+
}
|
383
|
+
}
|
384
|
+
seq.push([`(${toRule(transform())})`, false])
|
385
|
+
} else if (c === ')') {
|
386
|
+
i += 1
|
387
|
+
if (start <= 0 || pattern[start - 1] !== '(') {
|
388
|
+
throw new Error(
|
389
|
+
`Unbalanced parentheses; start = ${start}, i = ${i}, pattern = ${pattern}`,
|
390
|
+
)
|
391
|
+
}
|
392
|
+
return joinSeq()
|
393
|
+
} else if (c === '[') {
|
394
|
+
let squareBrackets = c
|
395
|
+
i += 1
|
396
|
+
while (i < length && pattern[i] !== ']') {
|
397
|
+
if (pattern[i] === '\\') {
|
398
|
+
squareBrackets += pattern.slice(i, i + 2)
|
399
|
+
i += 2
|
400
|
+
} else {
|
401
|
+
squareBrackets += pattern[i]
|
402
|
+
i += 1
|
403
|
+
}
|
404
|
+
}
|
405
|
+
if (i >= length) {
|
406
|
+
throw new Error(
|
407
|
+
`Unbalanced square brackets; start = ${start}, i = ${i}, pattern = ${pattern}`,
|
408
|
+
)
|
409
|
+
}
|
410
|
+
squareBrackets += ']'
|
411
|
+
i += 1
|
412
|
+
seq.push([squareBrackets, false])
|
413
|
+
} else if (c === '|') {
|
414
|
+
seq.push(['|', false])
|
415
|
+
i += 1
|
416
|
+
} else if (c === '*' || c === '+' || c === '?') {
|
417
|
+
seq[seq.length - 1] = [
|
418
|
+
toRule(seq[seq.length - 1] || ['', false]) + c,
|
419
|
+
false,
|
420
|
+
]
|
421
|
+
i += 1
|
422
|
+
} else if (c === '{') {
|
423
|
+
let curlyBrackets = c
|
424
|
+
i += 1
|
425
|
+
while (i < length && pattern[i] !== '}') {
|
426
|
+
curlyBrackets += pattern[i]
|
427
|
+
i += 1
|
428
|
+
}
|
429
|
+
if (i >= length) {
|
430
|
+
throw new Error(
|
431
|
+
`Unbalanced curly brackets; start = ${start}, i = ${i}, pattern = ${pattern}`,
|
432
|
+
)
|
433
|
+
}
|
434
|
+
curlyBrackets += '}'
|
435
|
+
i += 1
|
436
|
+
const nums = curlyBrackets
|
437
|
+
.slice(1, -1)
|
438
|
+
.split(',')
|
439
|
+
.map((s) => s.trim())
|
440
|
+
let minTimes: number
|
441
|
+
let maxTimes: number | undefined
|
442
|
+
if (nums.length === 1) {
|
443
|
+
minTimes = parseInt(nums[0] as string, 10)
|
444
|
+
maxTimes = minTimes
|
445
|
+
} else {
|
446
|
+
if (nums.length !== 2) {
|
447
|
+
throw new Error(`Invalid quantifier ${curlyBrackets}`)
|
448
|
+
}
|
449
|
+
minTimes = nums[0] ? parseInt(nums[0], 10) : 0
|
450
|
+
maxTimes = nums[1] ? parseInt(nums[1], 10) : Infinity
|
451
|
+
}
|
452
|
+
|
453
|
+
let [sub] = seq[seq.length - 1] || ['', false]
|
454
|
+
const [, subIsLiteral] = seq[seq.length - 1] || ['', false]
|
455
|
+
|
456
|
+
if (!subIsLiteral) {
|
457
|
+
let id = subRuleIds[sub]
|
458
|
+
if (id === undefined) {
|
459
|
+
id = this._addRule(
|
460
|
+
`${name}-${Object.keys(subRuleIds).length + 1}`,
|
461
|
+
sub,
|
462
|
+
)
|
463
|
+
subRuleIds[sub] = id
|
464
|
+
}
|
465
|
+
sub = id
|
466
|
+
}
|
467
|
+
|
468
|
+
seq[seq.length - 1] = [
|
469
|
+
buildRepetition(
|
470
|
+
subIsLiteral ? `"${sub}"` : sub,
|
471
|
+
minTimes,
|
472
|
+
maxTimes,
|
473
|
+
{ itemRuleIsLiteral: subIsLiteral },
|
474
|
+
),
|
475
|
+
false,
|
476
|
+
]
|
477
|
+
} else {
|
478
|
+
let literal = ''
|
479
|
+
while (i < length) {
|
480
|
+
if (pattern[i] === '\\' && i < length - 1) {
|
481
|
+
const next = pattern[i + 1]
|
482
|
+
if (ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS.has(next || '')) {
|
483
|
+
i += 1
|
484
|
+
literal += pattern[i]
|
485
|
+
i += 1
|
486
|
+
} else {
|
487
|
+
literal += pattern.slice(i, i + 2)
|
488
|
+
i += 2
|
489
|
+
}
|
490
|
+
} else if (pattern[i] === '"') {
|
491
|
+
literal += '\\"'
|
492
|
+
i += 1
|
493
|
+
} else if (
|
494
|
+
!NON_LITERAL_SET.has(pattern[i] || '') &&
|
495
|
+
(i === length - 1 ||
|
496
|
+
literal === '' ||
|
497
|
+
pattern[i + 1] === '.' ||
|
498
|
+
!NON_LITERAL_SET.has(pattern[i + 1] || ''))
|
499
|
+
) {
|
500
|
+
literal += pattern[i]
|
501
|
+
i += 1
|
502
|
+
} else {
|
503
|
+
break
|
504
|
+
}
|
505
|
+
}
|
506
|
+
if (literal !== '') {
|
507
|
+
seq.push([literal, true])
|
508
|
+
}
|
509
|
+
}
|
510
|
+
}
|
511
|
+
|
512
|
+
return joinSeq()
|
513
|
+
}
|
514
|
+
|
515
|
+
return this._addRule(name, `"\\"" ${toRule(transform())} "\\"" space`)
|
516
|
+
}
|
517
|
+
|
518
|
+
_resolveRef(ref: string): string {
|
519
|
+
let refName = ref.split('/').pop() || ''
|
520
|
+
if (!(refName in this._rules) && !this._refsBeingResolved.has(ref)) {
|
521
|
+
this._refsBeingResolved.add(ref)
|
522
|
+
const resolved = this._refs[ref]
|
523
|
+
refName = this.visit(resolved, refName)
|
524
|
+
this._refsBeingResolved.delete(ref)
|
525
|
+
}
|
526
|
+
return refName
|
527
|
+
}
|
528
|
+
|
529
|
+
visit(schema: any, name: string): string {
|
530
|
+
const schemaType = schema.type
|
531
|
+
const schemaFormat = schema.format
|
532
|
+
const isRoot = name in RESERVED_NAMES ? `${name}-` : name == ''
|
533
|
+
const ruleName = isRoot ? 'root' : name
|
534
|
+
|
535
|
+
const ref = schema.$ref
|
536
|
+
if (ref !== undefined) {
|
537
|
+
return this._addRule(ruleName, this._resolveRef(ref))
|
538
|
+
} else if (schema.oneOf || schema.anyOf) {
|
539
|
+
return this._addRule(
|
540
|
+
ruleName,
|
541
|
+
this._generateUnionRule(name, schema.oneOf || schema.anyOf),
|
542
|
+
)
|
543
|
+
} else if (Array.isArray(schemaType)) {
|
544
|
+
return this._addRule(
|
545
|
+
ruleName,
|
546
|
+
this._generateUnionRule(
|
547
|
+
name,
|
548
|
+
schemaType.map((t) => ({ type: t })),
|
549
|
+
),
|
550
|
+
)
|
551
|
+
} else if ('const' in schema) {
|
552
|
+
return this._addRule(ruleName, generateConstantRule(schema.const))
|
553
|
+
} else if ('enum' in schema) {
|
554
|
+
const rule = schema.enum
|
555
|
+
.map((v: any) => generateConstantRule(v))
|
556
|
+
.join(' | ')
|
557
|
+
return this._addRule(ruleName, rule)
|
558
|
+
} else if (
|
559
|
+
(schemaType === undefined || schemaType === 'object') &&
|
560
|
+
('properties' in schema ||
|
561
|
+
('additionalProperties' in schema &&
|
562
|
+
schema.additionalProperties !== true))
|
563
|
+
) {
|
564
|
+
const required: Set<string> = new Set(schema.required || [])
|
565
|
+
const properties = Object.entries(schema.properties ?? {})
|
566
|
+
return this._addRule(
|
567
|
+
ruleName,
|
568
|
+
this._buildObjectRule(
|
569
|
+
properties,
|
570
|
+
required,
|
571
|
+
name,
|
572
|
+
schema.additionalProperties,
|
573
|
+
),
|
574
|
+
)
|
575
|
+
} else if (
|
576
|
+
(schemaType === undefined || schemaType === 'object') &&
|
577
|
+
'allOf' in schema
|
578
|
+
) {
|
579
|
+
const required: Set<string> = new Set()
|
580
|
+
const properties: Array<[string, any]> = []
|
581
|
+
const addComponent = (compSchema: any, isRequired: boolean) => {
|
582
|
+
if (compSchema.$ref !== undefined) {
|
583
|
+
compSchema = this._refs[compSchema.$ref]
|
584
|
+
}
|
585
|
+
|
586
|
+
if ('properties' in compSchema) {
|
587
|
+
for (const [propName, propSchema] of Object.entries(
|
588
|
+
compSchema.properties,
|
589
|
+
)) {
|
590
|
+
properties.push([propName, propSchema])
|
591
|
+
if (isRequired) {
|
592
|
+
required.add(propName)
|
593
|
+
}
|
594
|
+
}
|
595
|
+
}
|
596
|
+
}
|
597
|
+
|
598
|
+
for (const t of schema.allOf) {
|
599
|
+
if ('anyOf' in t) {
|
600
|
+
for (const tt of t.anyOf) {
|
601
|
+
addComponent(tt, false)
|
602
|
+
}
|
603
|
+
} else {
|
604
|
+
addComponent(t, true)
|
605
|
+
}
|
606
|
+
}
|
607
|
+
|
608
|
+
return this._addRule(
|
609
|
+
ruleName,
|
610
|
+
this._buildObjectRule(
|
611
|
+
properties,
|
612
|
+
required,
|
613
|
+
name,
|
614
|
+
/* additionalProperties= */ false,
|
615
|
+
),
|
616
|
+
)
|
617
|
+
} else if (
|
618
|
+
(schemaType === undefined || schemaType === 'array') &&
|
619
|
+
('items' in schema || 'prefixItems' in schema)
|
620
|
+
) {
|
621
|
+
const items = schema.items ?? schema.prefixItems
|
622
|
+
if (Array.isArray(items)) {
|
623
|
+
const rules = items
|
624
|
+
.map((item, i) =>
|
625
|
+
this.visit(item, `${name ?? ''}${name ? '-' : ''}tuple-${i}`),
|
626
|
+
)
|
627
|
+
.join(' "," space ')
|
628
|
+
return this._addRule(ruleName, `"[" space ${rules} "]" space`)
|
629
|
+
} else {
|
630
|
+
const itemRuleName = this.visit(
|
631
|
+
items,
|
632
|
+
`${name ?? ''}${name ? '-' : ''}item`,
|
633
|
+
)
|
634
|
+
const minItems = schema.minItems || 0
|
635
|
+
const { maxItems } = schema
|
636
|
+
return this._addRule(
|
637
|
+
ruleName,
|
638
|
+
`"[" space ${buildRepetition(itemRuleName, minItems, maxItems, {
|
639
|
+
separatorRule: '"," space',
|
640
|
+
})} "]" space`,
|
641
|
+
)
|
642
|
+
}
|
643
|
+
} else if (
|
644
|
+
(schemaType === undefined || schemaType === 'string') &&
|
645
|
+
'pattern' in schema
|
646
|
+
) {
|
647
|
+
return this._visitPattern(schema.pattern, ruleName)
|
648
|
+
} else if (
|
649
|
+
(schemaType === undefined || schemaType === 'string') &&
|
650
|
+
/^uuid[1-5]?$/.test(schema.format || '')
|
651
|
+
) {
|
652
|
+
return this._addPrimitive(
|
653
|
+
ruleName === 'root' ? 'root' : schemaFormat,
|
654
|
+
PRIMITIVE_RULES['uuid'],
|
655
|
+
)
|
656
|
+
} else if (
|
657
|
+
(schemaType === undefined || schemaType === 'string') &&
|
658
|
+
`${schema.format}-string` in STRING_FORMAT_RULES
|
659
|
+
) {
|
660
|
+
const primName = `${schema.format}-string`
|
661
|
+
return this._addRule(
|
662
|
+
ruleName,
|
663
|
+
this._addPrimitive(primName, STRING_FORMAT_RULES[primName]),
|
664
|
+
)
|
665
|
+
} else if (
|
666
|
+
schemaType === 'string' &&
|
667
|
+
('minLength' in schema || 'maxLength' in schema)
|
668
|
+
) {
|
669
|
+
const charRuleName = this._addPrimitive('char', PRIMITIVE_RULES['char'])
|
670
|
+
const minLen = schema.minLength || 0
|
671
|
+
const maxLen = schema.maxLength
|
672
|
+
return this._addRule(
|
673
|
+
ruleName,
|
674
|
+
`"\\"" ${buildRepetition(charRuleName, minLen, maxLen)} "\\"" space`,
|
675
|
+
)
|
676
|
+
} else if (schemaType === 'object' || Object.keys(schema).length === 0) {
|
677
|
+
return this._addRule(
|
678
|
+
ruleName,
|
679
|
+
this._addPrimitive('object', PRIMITIVE_RULES['object']),
|
680
|
+
)
|
681
|
+
} else {
|
682
|
+
if (!(schemaType in PRIMITIVE_RULES)) {
|
683
|
+
throw new Error(`Unrecognized schema: ${JSON.stringify(schema)}`)
|
684
|
+
}
|
685
|
+
// TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
|
686
|
+
return this._addPrimitive(
|
687
|
+
ruleName === 'root' ? 'root' : schemaType,
|
688
|
+
PRIMITIVE_RULES[schemaType],
|
689
|
+
)
|
690
|
+
}
|
691
|
+
}
|
692
|
+
|
693
|
+
_addPrimitive(name: string, rule: BuiltinRule | undefined) {
|
694
|
+
if (!rule) {
|
695
|
+
throw new Error(`Rule ${name} not known`)
|
696
|
+
}
|
697
|
+
const n = this._addRule(name, rule.content)
|
698
|
+
for (const dep of rule.deps) {
|
699
|
+
const depRule = PRIMITIVE_RULES[dep] || STRING_FORMAT_RULES[dep]
|
700
|
+
if (!depRule) {
|
701
|
+
throw new Error(`Rule ${dep} not known`)
|
702
|
+
}
|
703
|
+
if (!(dep in this._rules)) {
|
704
|
+
this._addPrimitive(dep, depRule)
|
705
|
+
}
|
706
|
+
}
|
707
|
+
return n
|
708
|
+
}
|
709
|
+
|
710
|
+
_buildObjectRule(
|
711
|
+
properties: any[],
|
712
|
+
required: Set<string>,
|
713
|
+
name: string,
|
714
|
+
additionalProperties: any,
|
715
|
+
) {
|
716
|
+
const propOrder = this._propOrder
|
717
|
+
// sort by position in prop_order (if specified) then by original order
|
718
|
+
const sortedProps = properties
|
719
|
+
.map(([k]) => k)
|
720
|
+
.sort((a, b) => {
|
721
|
+
const orderA = propOrder[a] || Infinity
|
722
|
+
const orderB = propOrder[b] || Infinity
|
723
|
+
return (
|
724
|
+
orderA - orderB ||
|
725
|
+
properties.findIndex(([k]) => k === a) -
|
726
|
+
properties.findIndex(([k]) => k === b)
|
727
|
+
)
|
728
|
+
})
|
729
|
+
|
730
|
+
const propKvRuleNames: { [key: string]: string } = {}
|
731
|
+
for (const [propName, propSchema] of properties) {
|
732
|
+
const propRuleName = this.visit(
|
733
|
+
propSchema,
|
734
|
+
`${name ?? ''}${name ? '-' : ''}${propName}`,
|
735
|
+
)
|
736
|
+
propKvRuleNames[propName] = this._addRule(
|
737
|
+
`${name ?? ''}${name ? '-' : ''}${propName}-kv`,
|
738
|
+
`${formatLiteral(
|
739
|
+
JSON.stringify(propName),
|
740
|
+
)} space ":" space ${propRuleName}`,
|
741
|
+
)
|
742
|
+
}
|
743
|
+
const requiredProps = sortedProps.filter((k) => required.has(k))
|
744
|
+
const optionalProps = sortedProps.filter((k) => !required.has(k))
|
745
|
+
|
746
|
+
if (
|
747
|
+
typeof additionalProperties === 'object' ||
|
748
|
+
additionalProperties === true
|
749
|
+
) {
|
750
|
+
const subName = `${name ?? ''}${name ? '-' : ''}additional`
|
751
|
+
const valueRule = this.visit(
|
752
|
+
additionalProperties === true ? {} : additionalProperties,
|
753
|
+
`${subName}-value`,
|
754
|
+
)
|
755
|
+
propKvRuleNames['*'] = this._addRule(
|
756
|
+
`${subName}-kv`,
|
757
|
+
`${this._addPrimitive(
|
758
|
+
'string',
|
759
|
+
PRIMITIVE_RULES['string'],
|
760
|
+
)} ":" space ${valueRule}`,
|
761
|
+
)
|
762
|
+
optionalProps.push('*')
|
763
|
+
}
|
764
|
+
|
765
|
+
let rule = '"{" space '
|
766
|
+
rule += requiredProps.map((k) => propKvRuleNames[k]).join(' "," space ')
|
767
|
+
|
768
|
+
if (optionalProps.length > 0) {
|
769
|
+
rule += ' ('
|
770
|
+
if (requiredProps.length > 0) {
|
771
|
+
rule += ' "," space ( '
|
772
|
+
}
|
773
|
+
|
774
|
+
const getRecursiveRefs = (ks: any[], firstIsOptional: boolean) => {
|
775
|
+
const [k, ...rest] = ks
|
776
|
+
const kvRuleName = propKvRuleNames[k]
|
777
|
+
let res
|
778
|
+
if (k === '*') {
|
779
|
+
res = this._addRule(
|
780
|
+
`${name ?? ''}${name ? '-' : ''}additional-kvs`,
|
781
|
+
`${kvRuleName} ( "," space ${kvRuleName} )*`,
|
782
|
+
)
|
783
|
+
} else if (firstIsOptional) {
|
784
|
+
res = `( "," space ${kvRuleName} )?`
|
785
|
+
} else {
|
786
|
+
res = kvRuleName
|
787
|
+
}
|
788
|
+
if (rest.length > 0) {
|
789
|
+
res += ` ${this._addRule(
|
790
|
+
`${name ?? ''}${name ? '-' : ''}${k}-rest`,
|
791
|
+
getRecursiveRefs(rest, true) || '',
|
792
|
+
)}`
|
793
|
+
}
|
794
|
+
return res
|
795
|
+
}
|
796
|
+
|
797
|
+
rule += optionalProps
|
798
|
+
.map((_: any, i: number) =>
|
799
|
+
getRecursiveRefs(optionalProps.slice(i), false),
|
800
|
+
)
|
801
|
+
.join(' | ')
|
802
|
+
if (requiredProps.length > 0) {
|
803
|
+
rule += ' )'
|
804
|
+
}
|
805
|
+
rule += ' )?'
|
806
|
+
}
|
807
|
+
|
808
|
+
rule += ' "}" space'
|
809
|
+
|
810
|
+
return rule
|
811
|
+
}
|
812
|
+
|
813
|
+
formatGrammar() {
|
814
|
+
let grammar = ''
|
815
|
+
for (const [name, rule] of Object.entries(this._rules).sort(([a], [b]) =>
|
816
|
+
a.localeCompare(b),
|
817
|
+
)) {
|
818
|
+
grammar += `${name} ::= ${rule}\n`
|
819
|
+
}
|
820
|
+
return grammar
|
821
|
+
}
|
822
|
+
}
|
823
|
+
|
824
|
+
export const convertJsonSchemaToGrammar = ({
|
825
|
+
schema,
|
826
|
+
propOrder,
|
827
|
+
dotall,
|
828
|
+
allowFetch,
|
829
|
+
}: {
|
830
|
+
schema: any
|
831
|
+
propOrder?: PropOrder
|
832
|
+
dotall?: boolean
|
833
|
+
allowFetch?: boolean
|
834
|
+
}): string | Promise<string> => {
|
835
|
+
const converter = new SchemaGrammarConverter({
|
836
|
+
prop_order: propOrder,
|
837
|
+
dotall,
|
838
|
+
allow_fetch: allowFetch,
|
839
|
+
})
|
840
|
+
|
841
|
+
if (allowFetch) {
|
842
|
+
return converter.resolveRefs(schema, '').then(() => {
|
843
|
+
converter.visit(schema, '')
|
844
|
+
return converter.formatGrammar()
|
845
|
+
})
|
846
|
+
}
|
847
|
+
converter.visit(schema, '')
|
848
|
+
return converter.formatGrammar()
|
849
|
+
}
|