zkjson 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/encoder-v2.js +9 -182
- package/package.json +1 -1
package/encoder-v2.js
CHANGED
@@ -395,178 +395,6 @@ function buildDic(data) {
|
|
395
395
|
return dp[0]
|
396
396
|
}
|
397
397
|
|
398
|
-
// Process each entry (in the sorted, deterministic order).
|
399
|
-
for (const entry of data) {
|
400
|
-
const key = entry.key
|
401
|
-
let compressed
|
402
|
-
if (key.length === 1) {
|
403
|
-
// For simple keys, copy as-is.
|
404
|
-
compressed = key.slice()
|
405
|
-
} else {
|
406
|
-
// Try element-by-element compression.
|
407
|
-
const repA = compressElementByElement(key)
|
408
|
-
const costA = computeCost(repA)
|
409
|
-
|
410
|
-
// Also try full segmentation.
|
411
|
-
const segRes = segmentKey(key)
|
412
|
-
if (segRes !== null) {
|
413
|
-
const repB = [segRes.seg] // Represent segmentation as a pointer.
|
414
|
-
const costB = segRes.cost
|
415
|
-
compressed = costB < costA ? repB : repA
|
416
|
-
} else compressed = repA
|
417
|
-
}
|
418
|
-
dict.push({ original: key, compressed })
|
419
|
-
}
|
420
|
-
|
421
|
-
// --- Step 4. Return the dictionary and key map.
|
422
|
-
// "dictionary" is an array of compressed keys.
|
423
|
-
// "keyMap" is the array of original keys (in the same, deterministic order).
|
424
|
-
return {
|
425
|
-
dictionary: dict.map(entry => {
|
426
|
-
return entry.compressed.length === 1 && !is(Array, entry.compressed[0])
|
427
|
-
? entry.compressed[0]
|
428
|
-
: entry.compressed
|
429
|
-
}),
|
430
|
-
keyMap: dict.map(entry => entry.original),
|
431
|
-
}
|
432
|
-
}
|
433
|
-
|
434
|
-
function buildDic(data) {
|
435
|
-
// --- Step 1. (Optional) Save original input order if needed.
|
436
|
-
data.forEach((entry, idx) => (entry._origIdx = idx))
|
437
|
-
|
438
|
-
// --- Step 2. Sort the data in "dictionary order."
|
439
|
-
// Primary: by key array length (shorter arrays come first).
|
440
|
-
// Secondary: for keys of equal length, by the total character length (ascending)
|
441
|
-
// so that, for example, ["jane"] (4 chars) comes before ["alice"] (5 chars).
|
442
|
-
// Tertiary: if still equal, compare element-by-element using natural order.
|
443
|
-
data.sort((a, b) => {
|
444
|
-
const keyA = a.key
|
445
|
-
const keyB = b.key
|
446
|
-
|
447
|
-
// Primary: Compare array lengths.
|
448
|
-
if (keyA.length !== keyB.length) return keyA.length - keyB.length
|
449
|
-
|
450
|
-
// Secondary: Compare total character lengths (ascending).
|
451
|
-
const totalA = keyA.reduce((acc, x) => acc + x.toString().length, 0)
|
452
|
-
const totalB = keyB.reduce((acc, x) => acc + x.toString().length, 0)
|
453
|
-
if (totalA !== totalB) return totalA - totalB
|
454
|
-
// Tertiary: Compare element-by-element using natural order.
|
455
|
-
for (let i = 0; i < keyA.length; i++) {
|
456
|
-
const elA = keyA[i]
|
457
|
-
const elB = keyB[i]
|
458
|
-
|
459
|
-
if (typeof elA === typeof elB) {
|
460
|
-
if (typeof elA === "number") {
|
461
|
-
if (elA !== elB) return elA - elB
|
462
|
-
} else if (typeof elA === "string") {
|
463
|
-
const cmp = elA.localeCompare(elB, undefined, { numeric: true })
|
464
|
-
if (cmp !== 0) return cmp
|
465
|
-
} else {
|
466
|
-
// Fallback: compare string representations.
|
467
|
-
const cmp = elA
|
468
|
-
.toString()
|
469
|
-
.localeCompare(elB.toString(), undefined, { numeric: true })
|
470
|
-
if (cmp !== 0) return cmp
|
471
|
-
}
|
472
|
-
} else {
|
473
|
-
// If types differ, compare string representations.
|
474
|
-
const cmp = elA
|
475
|
-
.toString()
|
476
|
-
.localeCompare(elB.toString(), undefined, { numeric: true })
|
477
|
-
if (cmp !== 0) return cmp
|
478
|
-
}
|
479
|
-
}
|
480
|
-
|
481
|
-
return 0
|
482
|
-
})
|
483
|
-
|
484
|
-
// --- Step 3. Build the dictionary.
|
485
|
-
// Each dictionary entry will be stored as an object with:
|
486
|
-
// - original: the original key (an array)
|
487
|
-
// - compressed: the computed compressed representation.
|
488
|
-
const dict = []
|
489
|
-
|
490
|
-
// Helper: For a given string, look for a previously defined simple key (an array of length 1).
|
491
|
-
function getPointerIndex(str) {
|
492
|
-
for (let i = 0; i < dict.length; i++) {
|
493
|
-
if (dict[i].original.length === 1 && dict[i].original[0] === str) return i
|
494
|
-
}
|
495
|
-
return -1
|
496
|
-
}
|
497
|
-
|
498
|
-
// Helper: Element-by-element compression.
|
499
|
-
// For each element in a composite key, if it is a string that already exists as a simple key,
|
500
|
-
// replace one or more consecutive occurrences with a pointer.
|
501
|
-
// A single occurrence becomes [dictIndex]; a group becomes [dictIndex, 0].
|
502
|
-
function compressElementByElement(key) {
|
503
|
-
const rep = []
|
504
|
-
let i = 0
|
505
|
-
while (i < key.length) {
|
506
|
-
const el = key[i]
|
507
|
-
if (typeof el === "string") {
|
508
|
-
const ptrIndex = getPointerIndex(el)
|
509
|
-
if (ptrIndex !== -1) {
|
510
|
-
let j = i
|
511
|
-
while (j < key.length && key[j] === el) {
|
512
|
-
j++
|
513
|
-
}
|
514
|
-
const groupLen = j - i
|
515
|
-
rep.push(groupLen === 1 ? [ptrIndex] : [ptrIndex, 0])
|
516
|
-
i = j
|
517
|
-
continue
|
518
|
-
}
|
519
|
-
}
|
520
|
-
rep.push(el)
|
521
|
-
i++
|
522
|
-
}
|
523
|
-
return rep
|
524
|
-
}
|
525
|
-
|
526
|
-
// Helper: Compute a "cost" for a given representation.
|
527
|
-
// Each literal (number or string) counts as 1; a pointer array counts as the number of numbers it holds.
|
528
|
-
function computeCost(rep) {
|
529
|
-
let cost = 0
|
530
|
-
for (const token of rep) cost += Array.isArray(token) ? token.length : 1
|
531
|
-
return cost
|
532
|
-
}
|
533
|
-
|
534
|
-
// Helper: Full segmentation compression.
|
535
|
-
// Try to segment the entire key as a concatenation of one or more previously defined dictionary entries.
|
536
|
-
// Uses dynamic programming over the key array.
|
537
|
-
// Returns an object { cost, seg } where seg is an array of dictionary indices.
|
538
|
-
function segmentKey(key) {
|
539
|
-
const n = key.length
|
540
|
-
const dp = Array(n + 1).fill(null)
|
541
|
-
dp[n] = { cost: 0, seg: [] }
|
542
|
-
|
543
|
-
for (let i = n - 1; i >= 0; i--) {
|
544
|
-
let best = null
|
545
|
-
// Try every dictionary entry.
|
546
|
-
for (let d = 0; d < dict.length; d++) {
|
547
|
-
const candidate = dict[d].original
|
548
|
-
const m = candidate.length
|
549
|
-
if (i + m <= n) {
|
550
|
-
let match = true
|
551
|
-
for (let k = 0; k < m; k++) {
|
552
|
-
if (key[i + k] !== candidate[k]) {
|
553
|
-
match = false
|
554
|
-
break
|
555
|
-
}
|
556
|
-
}
|
557
|
-
if (match && dp[i + m] !== null) {
|
558
|
-
const candidateCost = 1 + dp[i + m].cost // cost 1 for using this pointer.
|
559
|
-
if (best === null || candidateCost < best.cost) {
|
560
|
-
best = { cost: candidateCost, seg: [d].concat(dp[i + m].seg) }
|
561
|
-
}
|
562
|
-
}
|
563
|
-
}
|
564
|
-
}
|
565
|
-
dp[i] = best
|
566
|
-
}
|
567
|
-
return dp[0]
|
568
|
-
}
|
569
|
-
|
570
398
|
// Process each entry (in the sorted, deterministic order).
|
571
399
|
for (const entry of data) {
|
572
400
|
const key = entry.key
|
@@ -714,15 +542,15 @@ function mapDic(dic, len) {
|
|
714
542
|
while (dlen > 0) {
|
715
543
|
let type = dic.shift()
|
716
544
|
let elms = []
|
717
|
-
if (type ==
|
545
|
+
if (type == 7) {
|
718
546
|
let slen = dic.shift()
|
719
|
-
elms.push(
|
547
|
+
elms.push(slen)
|
720
548
|
for (let i = 0; i < slen; i++) elms.push(dic.shift())
|
721
549
|
_elms.push(elms)
|
722
|
-
} else if (type ==
|
550
|
+
} else if (type == 3) {
|
723
551
|
elms = concat(elms, [0, 0, dic.shift()])
|
724
552
|
_elms.push(elms)
|
725
|
-
} else if (type ==
|
553
|
+
} else if (type == 9) {
|
726
554
|
for (let v2 of _map[dic.shift()]) _elms.push(v2)
|
727
555
|
}
|
728
556
|
dlen--
|
@@ -743,25 +571,23 @@ function encodeDic(dict) {
|
|
743
571
|
for (let v2 of v) {
|
744
572
|
if (is(Array, v2)) {
|
745
573
|
len += v2.length
|
746
|
-
for (let v3 of v2)
|
747
|
-
elms = concat(elms, [5, v3])
|
748
|
-
}
|
574
|
+
for (let v3 of v2) elms = concat(elms, [9, v3])
|
749
575
|
} else {
|
750
576
|
len += 1
|
751
577
|
if (is(String, v2)) {
|
752
|
-
elms.push(
|
578
|
+
elms.push(7)
|
753
579
|
elms.push(v2.length)
|
754
580
|
elms = concat(
|
755
581
|
elms,
|
756
582
|
v2.split("").map(c => c.charCodeAt(0)),
|
757
583
|
)
|
758
584
|
} else {
|
759
|
-
elms = concat(elms, [
|
585
|
+
elms = concat(elms, [3, v2])
|
760
586
|
}
|
761
587
|
}
|
762
588
|
}
|
763
589
|
} else {
|
764
|
-
elms.push(
|
590
|
+
elms.push(7)
|
765
591
|
elms.push(v.length)
|
766
592
|
elms = concat(
|
767
593
|
elms,
|
@@ -772,6 +598,7 @@ function encodeDic(dict) {
|
|
772
598
|
}
|
773
599
|
return enc
|
774
600
|
}
|
601
|
+
|
775
602
|
function encode(json, nodic = false) {
|
776
603
|
let dic = null
|
777
604
|
let dictionary, keyMap
|