zkjson 0.4.0 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/encoder-v2.js +26 -191
- package/package.json +1 -1
package/encoder-v2.js
CHANGED
@@ -395,178 +395,6 @@ function buildDic(data) {
|
|
395
395
|
return dp[0]
|
396
396
|
}
|
397
397
|
|
398
|
-
// Process each entry (in the sorted, deterministic order).
|
399
|
-
for (const entry of data) {
|
400
|
-
const key = entry.key
|
401
|
-
let compressed
|
402
|
-
if (key.length === 1) {
|
403
|
-
// For simple keys, copy as-is.
|
404
|
-
compressed = key.slice()
|
405
|
-
} else {
|
406
|
-
// Try element-by-element compression.
|
407
|
-
const repA = compressElementByElement(key)
|
408
|
-
const costA = computeCost(repA)
|
409
|
-
|
410
|
-
// Also try full segmentation.
|
411
|
-
const segRes = segmentKey(key)
|
412
|
-
if (segRes !== null) {
|
413
|
-
const repB = [segRes.seg] // Represent segmentation as a pointer.
|
414
|
-
const costB = segRes.cost
|
415
|
-
compressed = costB < costA ? repB : repA
|
416
|
-
} else compressed = repA
|
417
|
-
}
|
418
|
-
dict.push({ original: key, compressed })
|
419
|
-
}
|
420
|
-
|
421
|
-
// --- Step 4. Return the dictionary and key map.
|
422
|
-
// "dictionary" is an array of compressed keys.
|
423
|
-
// "keyMap" is the array of original keys (in the same, deterministic order).
|
424
|
-
return {
|
425
|
-
dictionary: dict.map(entry => {
|
426
|
-
return entry.compressed.length === 1 && !is(Array, entry.compressed[0])
|
427
|
-
? entry.compressed[0]
|
428
|
-
: entry.compressed
|
429
|
-
}),
|
430
|
-
keyMap: dict.map(entry => entry.original),
|
431
|
-
}
|
432
|
-
}
|
433
|
-
|
434
|
-
function buildDic(data) {
|
435
|
-
// --- Step 1. (Optional) Save original input order if needed.
|
436
|
-
data.forEach((entry, idx) => (entry._origIdx = idx))
|
437
|
-
|
438
|
-
// --- Step 2. Sort the data in "dictionary order."
|
439
|
-
// Primary: by key array length (shorter arrays come first).
|
440
|
-
// Secondary: for keys of equal length, by the total character length (ascending)
|
441
|
-
// so that, for example, ["jane"] (4 chars) comes before ["alice"] (5 chars).
|
442
|
-
// Tertiary: if still equal, compare element-by-element using natural order.
|
443
|
-
data.sort((a, b) => {
|
444
|
-
const keyA = a.key
|
445
|
-
const keyB = b.key
|
446
|
-
|
447
|
-
// Primary: Compare array lengths.
|
448
|
-
if (keyA.length !== keyB.length) return keyA.length - keyB.length
|
449
|
-
|
450
|
-
// Secondary: Compare total character lengths (ascending).
|
451
|
-
const totalA = keyA.reduce((acc, x) => acc + x.toString().length, 0)
|
452
|
-
const totalB = keyB.reduce((acc, x) => acc + x.toString().length, 0)
|
453
|
-
if (totalA !== totalB) return totalA - totalB
|
454
|
-
// Tertiary: Compare element-by-element using natural order.
|
455
|
-
for (let i = 0; i < keyA.length; i++) {
|
456
|
-
const elA = keyA[i]
|
457
|
-
const elB = keyB[i]
|
458
|
-
|
459
|
-
if (typeof elA === typeof elB) {
|
460
|
-
if (typeof elA === "number") {
|
461
|
-
if (elA !== elB) return elA - elB
|
462
|
-
} else if (typeof elA === "string") {
|
463
|
-
const cmp = elA.localeCompare(elB, undefined, { numeric: true })
|
464
|
-
if (cmp !== 0) return cmp
|
465
|
-
} else {
|
466
|
-
// Fallback: compare string representations.
|
467
|
-
const cmp = elA
|
468
|
-
.toString()
|
469
|
-
.localeCompare(elB.toString(), undefined, { numeric: true })
|
470
|
-
if (cmp !== 0) return cmp
|
471
|
-
}
|
472
|
-
} else {
|
473
|
-
// If types differ, compare string representations.
|
474
|
-
const cmp = elA
|
475
|
-
.toString()
|
476
|
-
.localeCompare(elB.toString(), undefined, { numeric: true })
|
477
|
-
if (cmp !== 0) return cmp
|
478
|
-
}
|
479
|
-
}
|
480
|
-
|
481
|
-
return 0
|
482
|
-
})
|
483
|
-
|
484
|
-
// --- Step 3. Build the dictionary.
|
485
|
-
// Each dictionary entry will be stored as an object with:
|
486
|
-
// - original: the original key (an array)
|
487
|
-
// - compressed: the computed compressed representation.
|
488
|
-
const dict = []
|
489
|
-
|
490
|
-
// Helper: For a given string, look for a previously defined simple key (an array of length 1).
|
491
|
-
function getPointerIndex(str) {
|
492
|
-
for (let i = 0; i < dict.length; i++) {
|
493
|
-
if (dict[i].original.length === 1 && dict[i].original[0] === str) return i
|
494
|
-
}
|
495
|
-
return -1
|
496
|
-
}
|
497
|
-
|
498
|
-
// Helper: Element-by-element compression.
|
499
|
-
// For each element in a composite key, if it is a string that already exists as a simple key,
|
500
|
-
// replace one or more consecutive occurrences with a pointer.
|
501
|
-
// A single occurrence becomes [dictIndex]; a group becomes [dictIndex, 0].
|
502
|
-
function compressElementByElement(key) {
|
503
|
-
const rep = []
|
504
|
-
let i = 0
|
505
|
-
while (i < key.length) {
|
506
|
-
const el = key[i]
|
507
|
-
if (typeof el === "string") {
|
508
|
-
const ptrIndex = getPointerIndex(el)
|
509
|
-
if (ptrIndex !== -1) {
|
510
|
-
let j = i
|
511
|
-
while (j < key.length && key[j] === el) {
|
512
|
-
j++
|
513
|
-
}
|
514
|
-
const groupLen = j - i
|
515
|
-
rep.push(groupLen === 1 ? [ptrIndex] : [ptrIndex, 0])
|
516
|
-
i = j
|
517
|
-
continue
|
518
|
-
}
|
519
|
-
}
|
520
|
-
rep.push(el)
|
521
|
-
i++
|
522
|
-
}
|
523
|
-
return rep
|
524
|
-
}
|
525
|
-
|
526
|
-
// Helper: Compute a "cost" for a given representation.
|
527
|
-
// Each literal (number or string) counts as 1; a pointer array counts as the number of numbers it holds.
|
528
|
-
function computeCost(rep) {
|
529
|
-
let cost = 0
|
530
|
-
for (const token of rep) cost += Array.isArray(token) ? token.length : 1
|
531
|
-
return cost
|
532
|
-
}
|
533
|
-
|
534
|
-
// Helper: Full segmentation compression.
|
535
|
-
// Try to segment the entire key as a concatenation of one or more previously defined dictionary entries.
|
536
|
-
// Uses dynamic programming over the key array.
|
537
|
-
// Returns an object { cost, seg } where seg is an array of dictionary indices.
|
538
|
-
function segmentKey(key) {
|
539
|
-
const n = key.length
|
540
|
-
const dp = Array(n + 1).fill(null)
|
541
|
-
dp[n] = { cost: 0, seg: [] }
|
542
|
-
|
543
|
-
for (let i = n - 1; i >= 0; i--) {
|
544
|
-
let best = null
|
545
|
-
// Try every dictionary entry.
|
546
|
-
for (let d = 0; d < dict.length; d++) {
|
547
|
-
const candidate = dict[d].original
|
548
|
-
const m = candidate.length
|
549
|
-
if (i + m <= n) {
|
550
|
-
let match = true
|
551
|
-
for (let k = 0; k < m; k++) {
|
552
|
-
if (key[i + k] !== candidate[k]) {
|
553
|
-
match = false
|
554
|
-
break
|
555
|
-
}
|
556
|
-
}
|
557
|
-
if (match && dp[i + m] !== null) {
|
558
|
-
const candidateCost = 1 + dp[i + m].cost // cost 1 for using this pointer.
|
559
|
-
if (best === null || candidateCost < best.cost) {
|
560
|
-
best = { cost: candidateCost, seg: [d].concat(dp[i + m].seg) }
|
561
|
-
}
|
562
|
-
}
|
563
|
-
}
|
564
|
-
}
|
565
|
-
dp[i] = best
|
566
|
-
}
|
567
|
-
return dp[0]
|
568
|
-
}
|
569
|
-
|
570
398
|
// Process each entry (in the sorted, deterministic order).
|
571
399
|
for (const entry of data) {
|
572
400
|
const key = entry.key
|
@@ -632,7 +460,7 @@ function listKeys(v, key = [], keys = []) {
|
|
632
460
|
listKeys(v2, append(i, key), keys)
|
633
461
|
i++
|
634
462
|
}
|
635
|
-
} else if (typeof v
|
463
|
+
} else if (typeof v === "object" && v !== null) {
|
636
464
|
for (const k in v) listKeys(v[k], append(k, key), keys)
|
637
465
|
} else {
|
638
466
|
keys.push(key)
|
@@ -688,7 +516,8 @@ function applyDic(arr, dic) {
|
|
688
516
|
})
|
689
517
|
}
|
690
518
|
|
691
|
-
function encodePaths(data) {
|
519
|
+
function encodePaths(data, index = false) {
|
520
|
+
let i = 0
|
692
521
|
for (let v of data) {
|
693
522
|
let path = []
|
694
523
|
for (let v2 of v[0]) {
|
@@ -702,6 +531,8 @@ function encodePaths(data) {
|
|
702
531
|
}
|
703
532
|
}
|
704
533
|
v[0] = path
|
534
|
+
if (index) v.push(i)
|
535
|
+
i++
|
705
536
|
}
|
706
537
|
return data
|
707
538
|
}
|
@@ -714,15 +545,15 @@ function mapDic(dic, len) {
|
|
714
545
|
while (dlen > 0) {
|
715
546
|
let type = dic.shift()
|
716
547
|
let elms = []
|
717
|
-
if (type ==
|
548
|
+
if (type == 7) {
|
718
549
|
let slen = dic.shift()
|
719
|
-
elms.push(
|
550
|
+
elms.push(slen)
|
720
551
|
for (let i = 0; i < slen; i++) elms.push(dic.shift())
|
721
552
|
_elms.push(elms)
|
722
|
-
} else if (type ==
|
553
|
+
} else if (type == 3) {
|
723
554
|
elms = concat(elms, [0, 0, dic.shift()])
|
724
555
|
_elms.push(elms)
|
725
|
-
} else if (type ==
|
556
|
+
} else if (type == 9) {
|
726
557
|
for (let v2 of _map[dic.shift()]) _elms.push(v2)
|
727
558
|
}
|
728
559
|
dlen--
|
@@ -743,25 +574,23 @@ function encodeDic(dict) {
|
|
743
574
|
for (let v2 of v) {
|
744
575
|
if (is(Array, v2)) {
|
745
576
|
len += v2.length
|
746
|
-
for (let v3 of v2)
|
747
|
-
elms = concat(elms, [5, v3])
|
748
|
-
}
|
577
|
+
for (let v3 of v2) elms = concat(elms, [9, v3])
|
749
578
|
} else {
|
750
579
|
len += 1
|
751
580
|
if (is(String, v2)) {
|
752
|
-
elms.push(
|
581
|
+
elms.push(7)
|
753
582
|
elms.push(v2.length)
|
754
583
|
elms = concat(
|
755
584
|
elms,
|
756
585
|
v2.split("").map(c => c.charCodeAt(0)),
|
757
586
|
)
|
758
587
|
} else {
|
759
|
-
elms = concat(elms, [
|
588
|
+
elms = concat(elms, [3, v2])
|
760
589
|
}
|
761
590
|
}
|
762
591
|
}
|
763
592
|
} else {
|
764
|
-
elms.push(
|
593
|
+
elms.push(7)
|
765
594
|
elms.push(v.length)
|
766
595
|
elms = concat(
|
767
596
|
elms,
|
@@ -772,7 +601,9 @@ function encodeDic(dict) {
|
|
772
601
|
}
|
773
602
|
return enc
|
774
603
|
}
|
775
|
-
|
604
|
+
|
605
|
+
function encode(_json, nodic = false) {
|
606
|
+
let json = clone(_json)
|
776
607
|
let dic = null
|
777
608
|
let dictionary, keyMap
|
778
609
|
if (nodic !== true) {
|
@@ -780,10 +611,9 @@ function encode(json, nodic = false) {
|
|
780
611
|
if (dictionary.length > 0) dic = encodeDic(dictionary)
|
781
612
|
}
|
782
613
|
let enc = _encode(json)
|
783
|
-
|
784
|
-
|
785
|
-
|
786
|
-
enc.sort((a, b) => {
|
614
|
+
let _enc = clone(enc)
|
615
|
+
_enc = encodePaths(_enc, true)
|
616
|
+
_enc.sort((a, b) => {
|
787
617
|
const isUndefined = v => typeof v == "undefined"
|
788
618
|
const max = Math.max(a[0].length, b[0].length)
|
789
619
|
if (max > 0) {
|
@@ -809,10 +639,14 @@ function encode(json, nodic = false) {
|
|
809
639
|
}
|
810
640
|
return 0
|
811
641
|
})
|
642
|
+
if (dic) enc = applyDic(enc, keyMap)
|
643
|
+
enc = encodePaths(enc)
|
644
|
+
let enc2 = []
|
645
|
+
for (let v of _enc) enc2.push(enc[v[2]])
|
812
646
|
const _dic = dic ? [1, 0, 2, dictionary.length, ...dic] : []
|
813
647
|
return concat(
|
814
648
|
_dic,
|
815
|
-
|
649
|
+
enc2.reduce((arr, v) => arr.concat([...flattenPath(v[0]), ...v[1]]), []),
|
816
650
|
)
|
817
651
|
}
|
818
652
|
|
@@ -939,7 +773,8 @@ function decodeVal(arr) {
|
|
939
773
|
return val
|
940
774
|
}
|
941
775
|
|
942
|
-
function decode(
|
776
|
+
function decode(_arr) {
|
777
|
+
let arr = clone(_arr)
|
943
778
|
const decoded = _decode(arr)
|
944
779
|
let json =
|
945
780
|
decoded[0]?.[0]?.[0]?.[0] == 0 && decoded[0]?.[0]?.[0]?.[1] == 0 ? [] : {}
|