verifyhash 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +883 -0
- package/cli/abi/ContributionRegistry.json +881 -0
- package/cli/agent.js +2173 -0
- package/cli/anchor-artifact.js +853 -0
- package/cli/anchor.js +400 -0
- package/cli/claim.js +881 -0
- package/cli/core/agent-commit.js +448 -0
- package/cli/core/agent-session.js +598 -0
- package/cli/core/anchor-binding.js +663 -0
- package/cli/core/attestation.js +580 -0
- package/cli/core/evidence-plans.js +495 -0
- package/cli/core/fixtures/evidence-plans/baseline.json +19 -0
- package/cli/core/fulfill-intake.js +1082 -0
- package/cli/core/go-live-preflight.js +481 -0
- package/cli/core/license.js +534 -0
- package/cli/core/manifest.js +243 -0
- package/cli/core/packetseal.js +591 -0
- package/cli/core/registryArtifact.js +49 -0
- package/cli/core/revocation.js +539 -0
- package/cli/core/rfc3161.js +389 -0
- package/cli/core/timestamp.js +482 -0
- package/cli/core/trust-asof.js +479 -0
- package/cli/dataset.js +2950 -0
- package/cli/evidence.js +2227 -0
- package/cli/fulfill-webhook-http.js +438 -0
- package/cli/git.js +220 -0
- package/cli/hash.js +550 -0
- package/cli/identity.js +1072 -0
- package/cli/journal-cli.js +1110 -0
- package/cli/journal-log.js +454 -0
- package/cli/journal.js +334 -0
- package/cli/lineage.js +447 -0
- package/cli/list.js +287 -0
- package/cli/parcel.js +1509 -0
- package/cli/proof.js +578 -0
- package/cli/prove.js +300 -0
- package/cli/receipt.js +631 -0
- package/cli/registry.js +331 -0
- package/cli/reputation.js +344 -0
- package/cli/revocation.js +495 -0
- package/cli/serve-verify-http.js +298 -0
- package/cli/serve-verify.js +333 -0
- package/cli/show.js +339 -0
- package/cli/verify.js +383 -0
- package/cli/vh.js +3927 -0
- package/docs/ADOPT.md +183 -0
- package/docs/ADOPTION.json +11 -0
- package/docs/AGENTTRACE.md +247 -0
- package/docs/ANCHORING.md +167 -0
- package/docs/AUDIT.md +55 -0
- package/docs/CONFORMANCE.md +107 -0
- package/docs/DATALEDGER.md +638 -0
- package/docs/DECIDE.md +47 -0
- package/docs/DECISIONS-PENDING.md +27 -0
- package/docs/DEPLOY-PUBLIC-SITE.md +301 -0
- package/docs/ENGINE-LEDGER.json +12 -0
- package/docs/EVIDENCE.md +519 -0
- package/docs/GO-LIVE.md +66 -0
- package/docs/IDENTITY.md +123 -0
- package/docs/INDEPENDENT-VERIFICATION.md +377 -0
- package/docs/INTEGRITY-JOURNAL.md +337 -0
- package/docs/KEY-LIFECYCLE.md +179 -0
- package/docs/LICENSING.md +46 -0
- package/docs/LINEAGE.md +307 -0
- package/docs/LOOP-AUDIT-2026-07-03.json +580 -0
- package/docs/LOOP-HARDENING-PLAN.md +44 -0
- package/docs/MERKLE-LEAVES.md +113 -0
- package/docs/METRICS.jsonl +31 -0
- package/docs/MORNING.md +204 -0
- package/docs/PILOT.md +444 -0
- package/docs/PROOFPARCEL.md +227 -0
- package/docs/PROOFS.md +262 -0
- package/docs/RECEIPTS.md +341 -0
- package/docs/REPUTATION.md +158 -0
- package/docs/SDK.md +301 -0
- package/docs/STRATEGY-ARCHIVE.md +5055 -0
- package/docs/SUPERVISOR-RUNBOOK.md +52 -0
- package/docs/TRUST-BOUNDARIES.md +335 -0
- package/docs/TRUSTLEDGER.md +1976 -0
- package/docs/USAGE-BUDGET.json +121 -0
- package/docs/VERIFY-SERVICE.md +168 -0
- package/index.js +160 -0
- package/package.json +41 -0
- package/trustledger/build-standalone.js +796 -0
- package/trustledger/cli.js +3179 -0
- package/trustledger/close.js +391 -0
- package/trustledger/corpus.js +159 -0
- package/trustledger/dist/BUILD-PROVENANCE.json +99 -0
- package/trustledger/dist/trustledger-standalone.html +6197 -0
- package/trustledger/dist/trustledger-standalone.html.sha256 +1 -0
- package/trustledger/door-core.js +442 -0
- package/trustledger/fixtures/bank.csv +7 -0
- package/trustledger/fixtures/bank.malformed.csv +3 -0
- package/trustledger/fixtures/bank.noalias.csv +5 -0
- package/trustledger/fixtures/bank.ofx +34 -0
- package/trustledger/fixtures/bank.real.csv +5 -0
- package/trustledger/fixtures/corpus/_shared/prior-close.json +22 -0
- package/trustledger/fixtures/corpus/bank-book-mismatch--benign-twin/inputs.json +14 -0
- package/trustledger/fixtures/corpus/bank-book-mismatch--benign-twin/meta.json +7 -0
- package/trustledger/fixtures/corpus/bank-book-mismatch--out-of-trust/inputs.json +14 -0
- package/trustledger/fixtures/corpus/bank-book-mismatch--out-of-trust/meta.json +7 -0
- package/trustledger/fixtures/corpus/continuity-break--benign-twin/inputs.json +15 -0
- package/trustledger/fixtures/corpus/continuity-break--benign-twin/meta.json +7 -0
- package/trustledger/fixtures/corpus/continuity-break--out-of-trust/inputs.json +15 -0
- package/trustledger/fixtures/corpus/continuity-break--out-of-trust/meta.json +7 -0
- package/trustledger/fixtures/corpus/negative-tenant-ledger--benign-twin/inputs.json +13 -0
- package/trustledger/fixtures/corpus/negative-tenant-ledger--benign-twin/meta.json +7 -0
- package/trustledger/fixtures/corpus/negative-tenant-ledger--out-of-trust/inputs.json +13 -0
- package/trustledger/fixtures/corpus/negative-tenant-ledger--out-of-trust/meta.json +7 -0
- package/trustledger/fixtures/corpus/owner-overdraw--benign-twin/inputs.json +15 -0
- package/trustledger/fixtures/corpus/owner-overdraw--benign-twin/meta.json +7 -0
- package/trustledger/fixtures/corpus/owner-overdraw--out-of-trust/inputs.json +15 -0
- package/trustledger/fixtures/corpus/owner-overdraw--out-of-trust/meta.json +7 -0
- package/trustledger/fixtures/corpus/security-deposit-segregation--benign-twin/inputs.json +16 -0
- package/trustledger/fixtures/corpus/security-deposit-segregation--benign-twin/meta.json +7 -0
- package/trustledger/fixtures/corpus/security-deposit-segregation--out-of-trust/inputs.json +13 -0
- package/trustledger/fixtures/corpus/security-deposit-segregation--out-of-trust/meta.json +7 -0
- package/trustledger/fixtures/corpus/subledger-out-of-balance--benign-twin/inputs.json +13 -0
- package/trustledger/fixtures/corpus/subledger-out-of-balance--benign-twin/meta.json +7 -0
- package/trustledger/fixtures/corpus/subledger-out-of-balance--out-of-trust/inputs.json +13 -0
- package/trustledger/fixtures/corpus/subledger-out-of-balance--out-of-trust/meta.json +7 -0
- package/trustledger/fixtures/e2e/bank.aliased.csv +4 -0
- package/trustledger/fixtures/e2e/bank.csv +4 -0
- package/trustledger/fixtures/e2e/bank.nsf.csv +4 -0
- package/trustledger/fixtures/e2e/quickbooks.csv +6 -0
- package/trustledger/fixtures/e2e/quickbooks.nsf.csv +8 -0
- package/trustledger/fixtures/e2e/rentroll.csv +6 -0
- package/trustledger/fixtures/e2e/rentroll.nsf.csv +8 -0
- package/trustledger/fixtures/e2e/rentroll.short.csv +5 -0
- package/trustledger/fixtures/plans/baseline.json +25 -0
- package/trustledger/fixtures/plans/price-binding.example.json +27 -0
- package/trustledger/fixtures/policy/ambiguous-deposit-example.json +12 -0
- package/trustledger/fixtures/policy/baseline.json +19 -0
- package/trustledger/fixtures/policy/ca-example.json +12 -0
- package/trustledger/fixtures/policy/negative-tenant-ledger-example.json +12 -0
- package/trustledger/fixtures/policy/owner-overdraw-example.json +12 -0
- package/trustledger/fixtures/quickbooks.csv +7 -0
- package/trustledger/fixtures/quickbooks.real.csv +5 -0
- package/trustledger/fixtures/rentroll.csv +6 -0
- package/trustledger/fixtures/rentroll.real.csv +4 -0
- package/trustledger/ingest.js +1163 -0
- package/trustledger/lib/policy-bundled-loader.js +44 -0
- package/trustledger/lib/sha256-vendored.js +227 -0
- package/trustledger/license.js +563 -0
- package/trustledger/match.js +551 -0
- package/trustledger/plans.js +551 -0
- package/trustledger/policy.js +398 -0
- package/trustledger/public/index.html +512 -0
- package/trustledger/reconcile.js +1486 -0
- package/trustledger/report.js +887 -0
- package/trustledger/seal.js +854 -0
- package/trustledger/server.js +391 -0
- package/trustledger/valueproof.js +350 -0
package/cli/hash.js
ADDED
|
@@ -0,0 +1,550 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
// Content hashing for the verifyhash CLI.
|
|
4
|
+
//
|
|
5
|
+
// Two operations, both designed to line up exactly with the on-chain contract:
|
|
6
|
+
//
|
|
7
|
+
// * hashFile(path) -> keccak256 of the file's raw bytes. This is the same digest
|
|
8
|
+
// the contract sees: `keccak256(abi.encodePacked(content))` in Solidity. Anchoring
|
|
9
|
+
// this value and later proving against it requires byte-for-byte equality.
|
|
10
|
+
//
|
|
11
|
+
// * hashDir(path) -> a *stable, sorted-leaf* Merkle root whose proofs verify against
|
|
12
|
+
// ContributionRegistry.verifyLeaf. The tree is DOMAIN-SEPARATED (RFC 6962 /
|
|
13
|
+
// OpenZeppelin style) so that a crafted interior node can never be re-presented as a
|
|
14
|
+
// leaf (second-preimage resistance), AND every leaf is *path-bound* so the root commits
|
|
15
|
+
// to file NAMES as well as their content:
|
|
16
|
+
// - each file's content digest is c = keccak256(file bytes)
|
|
17
|
+
// - the per-file leaf VALUE is the path-bound digest
|
|
18
|
+
// pathLeaf = keccak256(DIR_LEAF_DOMAIN ++ relPath ++ 0x00 ++ c)
|
|
19
|
+
// (DIR_LEAF_DOMAIN is `domainPrefix`; the 0x00 byte separates the variable-length
|
|
20
|
+
// relPath from the fixed-length content digest so no (relPath, c) pair can be
|
|
21
|
+
// re-segmented into a different (relPath', c') pair — an unambiguous encoding).
|
|
22
|
+
// - that pathLeaf is then domain-tagged for the tree:
|
|
23
|
+
// leafHash(pathLeaf) = keccak256(LEAF_TAG ++ pathLeaf)
|
|
24
|
+
// - an interior node is nodeHash(a,b) = keccak256(NODE_TAG ++ min(a,b) ++ max(a,b))
|
|
25
|
+
// The on-chain verifyLeaf applies LEAF_TAG to whatever 32-byte value it is handed and
|
|
26
|
+
// folds with NODE_TAG. The CLI hands it the pathLeaf (NOT the bare content digest), so
|
|
27
|
+
// a root produced here is exactly the root the contract reconstructs from a pathLeaf +
|
|
28
|
+
// proof. The two conventions are byte-identical; the contract needs no change.
|
|
29
|
+
//
|
|
30
|
+
// WHAT THE ROOT COMMITS TO. Because the path is hashed into every leaf, the directory
|
|
31
|
+
// root commits to the full set of (relPath, content) pairs — both the names and the
|
|
32
|
+
// bytes. Renaming a file (same bytes, new path) changes that file's pathLeaf and hence
|
|
33
|
+
// the root; moving a file between directories changes its relPath and hence the root;
|
|
34
|
+
// editing a byte changes c and hence the root. Two trees share a root iff they contain
|
|
35
|
+
// the identical set of files at the identical relative paths with identical content.
|
|
36
|
+
//
|
|
37
|
+
// "Stable" means the root does not depend on filesystem enumeration order: leaves are
|
|
38
|
+
// sorted before the tree is built, so the same set of files always yields the same root.
|
|
39
|
+
// Relative paths are normalized to forward slashes so the root is identical regardless
|
|
40
|
+
// of the host OS path separator.
|
|
41
|
+
|
|
42
|
+
const fs = require("fs");
|
|
43
|
+
const path = require("path");
|
|
44
|
+
const { keccak256, concat, toUtf8Bytes } = require("ethers");
|
|
45
|
+
const { keccak256: streamingKeccak256 } = require("js-sha3");
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* keccak256 of a single file's raw bytes, as a 0x-prefixed 32-byte hex string.
|
|
49
|
+
* Deterministic: identical bytes always produce the identical digest, and an empty
|
|
50
|
+
* file hashes to keccak256("") just like Solidity's keccak256 of empty input.
|
|
51
|
+
* @param {string} filePath
|
|
52
|
+
* @returns {string} 0x-prefixed bytes32 hex
|
|
53
|
+
*/
|
|
54
|
+
function hashFile(filePath) {
|
|
55
|
+
const data = fs.readFileSync(filePath); // Buffer; works for empty files (length 0) too.
|
|
56
|
+
return keccak256(data);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* keccak256 of an in-memory buffer / byte array. Exposed so callers (and tests) can hash
|
|
61
|
+
* content without touching the filesystem. Equivalent to hashFile for the same bytes.
|
|
62
|
+
* @param {Buffer|Uint8Array|string} bytes a Buffer/Uint8Array, or a 0x hex string
|
|
63
|
+
* @returns {string} 0x-prefixed bytes32 hex
|
|
64
|
+
*/
|
|
65
|
+
function hashBytes(bytes) {
|
|
66
|
+
return keccak256(bytes);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Read a file in fixed-size chunks rather than slurping it whole. 1 MiB balances syscall overhead
|
|
70
|
+
// against peak memory: a multi-gigabyte dataset file is hashed with at most ~1 MiB resident at a time.
|
|
71
|
+
const STREAM_CHUNK_BYTES = 1024 * 1024;
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* keccak256 of a single file's raw bytes, computed by STREAMING the file through an incremental
|
|
75
|
+
* keccak so the whole file is never resident in memory at once. The digest is byte-identical to
|
|
76
|
+
* `hashFile` (and to Solidity's keccak256 of the same bytes) — js-sha3's incremental keccak256 is the
|
|
77
|
+
* same primitive ethers' one-shot keccak256 wraps, verified in test/cli.dataset.test.js against
|
|
78
|
+
* `hashFile`/`hashBytes`/`ethers.keccak256`. Empty files hash to keccak256("") exactly like hashFile.
|
|
79
|
+
*
|
|
80
|
+
* This is the building block that lets a large dataset tree be hashed without loading all file
|
|
81
|
+
* content into memory at once: callers hash one file at a time, keeping at most STREAM_CHUNK_BYTES
|
|
82
|
+
* plus that file's 32-byte digest live, then move to the next file.
|
|
83
|
+
*
|
|
84
|
+
* @param {string} filePath
|
|
85
|
+
* @returns {string} 0x-prefixed bytes32 hex
|
|
86
|
+
*/
|
|
87
|
+
function hashFileStream(filePath) {
|
|
88
|
+
const h = streamingKeccak256.create();
|
|
89
|
+
const fd = fs.openSync(filePath, "r");
|
|
90
|
+
try {
|
|
91
|
+
const buf = Buffer.allocUnsafe(STREAM_CHUNK_BYTES);
|
|
92
|
+
for (;;) {
|
|
93
|
+
const bytesRead = fs.readSync(fd, buf, 0, STREAM_CHUNK_BYTES, null);
|
|
94
|
+
if (bytesRead === 0) break;
|
|
95
|
+
// Update with exactly the bytes read (subarray is a view, no copy) so a short final read does
|
|
96
|
+
// not feed stale trailing bytes from a previous, larger chunk into the digest.
|
|
97
|
+
h.update(buf.subarray(0, bytesRead));
|
|
98
|
+
}
|
|
99
|
+
} finally {
|
|
100
|
+
fs.closeSync(fd);
|
|
101
|
+
}
|
|
102
|
+
return "0x" + h.hex();
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Domain tags, byte-identical to ContributionRegistry's LEAF_TAG / NODE_TAG. These keep a leaf
|
|
106
|
+
// hash, an interior-node hash, and a bare content digest in three disjoint value spaces, so an
|
|
107
|
+
// interior node can never be replayed as a leaf (second-preimage resistance).
|
|
108
|
+
const LEAF_TAG = "0x00";
|
|
109
|
+
const NODE_TAG = "0x01";
|
|
110
|
+
|
|
111
|
+
// Domain prefix for path-bound directory leaves (the `domainPrefix` in the leaf formula). A fixed,
|
|
112
|
+
// versioned ASCII tag so a directory pathLeaf lives in its own value space: it can never collide
|
|
113
|
+
// with a bare content digest, an on-chain anchor of a single file, or a leaf from a future scheme.
|
|
114
|
+
// Bump the version suffix if the leaf encoding ever changes, to keep old and new roots disjoint.
|
|
115
|
+
const DIR_LEAF_DOMAIN_STR = "verifyhash/dir-leaf/v1";
|
|
116
|
+
const DIR_LEAF_DOMAIN = keccak256(toUtf8Bytes(DIR_LEAF_DOMAIN_STR)); // 32-byte fixed-length prefix
|
|
117
|
+
|
|
118
|
+
// Separator byte between the variable-length relPath and the fixed-length content digest.
|
|
119
|
+
const PATH_SEP = "0x00";
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Normalize a relative path to a canonical, OS-independent form: forward-slash separators with no
|
|
123
|
+
* leading "./". This makes the leaf (and thus the root) identical regardless of the host platform's
|
|
124
|
+
* path separator, so a repo hashed on Windows and on Linux yields the same root.
|
|
125
|
+
* @param {string} relPath
|
|
126
|
+
* @returns {string}
|
|
127
|
+
*/
|
|
128
|
+
function toPosixRel(relPath) {
|
|
129
|
+
return relPath.split(path.sep).join("/").replace(/^\.\//, "");
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Path-bound directory leaf, the `dir leaf` of T-0.2:
|
|
134
|
+
* pathLeaf(relPath, c) = keccak256(DIR_LEAF_DOMAIN ++ relPath ++ 0x00 ++ c)
|
|
135
|
+
* where c = keccak256(file bytes). Binding relPath into the leaf is what makes the directory root
|
|
136
|
+
* commit to file NAMES as well as content: rename a file (new relPath, same bytes) and its pathLeaf
|
|
137
|
+
* — and therefore the root — changes. The 0x00 separator + fixed-length 32-byte c give an
|
|
138
|
+
* unambiguous encoding: there is exactly one (relPath, c) split for any leaf preimage, so two
|
|
139
|
+
* distinct (relPath, content) pairs can never alias to the same leaf via boundary ambiguity.
|
|
140
|
+
*
|
|
141
|
+
* NOTE: this pathLeaf is the *content-digest-layer* value the on-chain verifyLeaf is handed — the
|
|
142
|
+
* verifier re-tags it with LEAF_TAG (keccak256(LEAF_TAG ++ pathLeaf)) to form the actual tree leaf,
|
|
143
|
+
* so the second-preimage protection of T-0.1 still applies on top of the path binding.
|
|
144
|
+
*
|
|
145
|
+
* @param {string} relPath file path relative to the repo root (normalized to forward slashes here)
|
|
146
|
+
* @param {string} contentDigest 0x bytes32, = keccak256(file bytes)
|
|
147
|
+
* @returns {string} 0x bytes32 path-bound leaf value
|
|
148
|
+
*/
|
|
149
|
+
function pathLeaf(relPath, contentDigest) {
|
|
150
|
+
const relBytes = toUtf8Bytes(toPosixRel(relPath));
|
|
151
|
+
return keccak256(concat([DIR_LEAF_DOMAIN, relBytes, PATH_SEP, contentDigest]));
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Domain-separated leaf hash, matching ContributionRegistry.leafHash:
|
|
156
|
+
* leafHash(c) = keccak256(LEAF_TAG ++ c)
|
|
157
|
+
* `c` is a per-file content digest = keccak256(file bytes). Tagging the leaf means the value at the
|
|
158
|
+
* bottom of the tree differs from `c` itself and from any interior node, defeating second-preimage
|
|
159
|
+
* forgeries that try to pass a node (or a bare content digest) off as a leaf.
|
|
160
|
+
* @param {string} c 0x bytes32 content digest
|
|
161
|
+
* @returns {string} 0x bytes32 tagged leaf
|
|
162
|
+
*/
|
|
163
|
+
function leafHash(c) {
|
|
164
|
+
return keccak256(concat([LEAF_TAG, c]));
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* One level of sorted-pair, domain-tagged interior hashing, matching ContributionRegistry.nodeHash:
|
|
169
|
+
* nodeHash(a,b) = a <= b ? keccak256(NODE_TAG ++ a ++ b) : keccak256(NODE_TAG ++ b ++ a)
|
|
170
|
+
* Comparison is on the 32-byte big-endian value, exactly as Solidity compares bytes32. The NODE_TAG
|
|
171
|
+
* prefix is what keeps an interior node from ever colliding with a (LEAF_TAG-prefixed) leaf.
|
|
172
|
+
* @param {string} a 0x bytes32
|
|
173
|
+
* @param {string} b 0x bytes32
|
|
174
|
+
* @returns {string} 0x bytes32
|
|
175
|
+
*/
|
|
176
|
+
function nodeHash(a, b) {
|
|
177
|
+
const [lo, hi] = BigInt(a) <= BigInt(b) ? [a, b] : [b, a];
|
|
178
|
+
return keccak256(concat([NODE_TAG, lo, hi]));
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
/**
|
|
182
|
+
* Recursively collect every file under `dirPath`, returning absolute file paths.
|
|
183
|
+
* Symlinks are not followed (we hash real file content, not link targets).
|
|
184
|
+
* @param {string} dirPath
|
|
185
|
+
* @returns {string[]} absolute file paths
|
|
186
|
+
*/
|
|
187
|
+
function listFiles(dirPath) {
|
|
188
|
+
const out = [];
|
|
189
|
+
const entries = fs.readdirSync(dirPath, { withFileTypes: true });
|
|
190
|
+
for (const entry of entries) {
|
|
191
|
+
const full = path.join(dirPath, entry.name);
|
|
192
|
+
if (entry.isDirectory()) {
|
|
193
|
+
out.push(...listFiles(full));
|
|
194
|
+
} else if (entry.isFile()) {
|
|
195
|
+
out.push(full);
|
|
196
|
+
}
|
|
197
|
+
// sockets/fifos/symlinks are intentionally skipped: they have no stable content hash.
|
|
198
|
+
}
|
|
199
|
+
return out;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
/**
|
|
203
|
+
* Build a sorted-leaf, domain-separated Merkle tree and return its layers (bottom-up).
|
|
204
|
+
*
|
|
205
|
+
* Input `leaves` are per-file CONTENT DIGESTS (c = keccak256(file bytes)). They are sorted ascending
|
|
206
|
+
* by their 32-byte value so the tree — and thus the root — is independent of input order. Each is
|
|
207
|
+
* then mapped to a *tagged* leaf via leafHash(c) = keccak256(LEAF_TAG ++ c); the tagged values form
|
|
208
|
+
* the bottom layer of `layers`. Interior layers are folded with nodeHash (NODE_TAG-tagged), exactly
|
|
209
|
+
* as ContributionRegistry.verifyLeaf does. This domain separation is what makes the scheme
|
|
210
|
+
* second-preimage resistant: an interior node value can never be re-presented as a leaf.
|
|
211
|
+
*
|
|
212
|
+
* `sortedLeaves` (returned) is the sorted CONTENT DIGEST array (pre-tag), used for index lookup by a
|
|
213
|
+
* caller that knows a file's content digest. `layers[0]` is the corresponding TAGGED-leaf layer.
|
|
214
|
+
*
|
|
215
|
+
* Odd nodes are paired with *themselves* (`nodeHash(node, node)`) rather than promoted unchanged to
|
|
216
|
+
* the next level. This is the OpenZeppelin / merkletreejs "duplicate the lone node" convention.
|
|
217
|
+
* Promoting a node unchanged (the old carry rule) makes that node its own ancestor, so its Merkle
|
|
218
|
+
* proof skips a level and can collapse to a single sibling (or none) — a degenerate, shorter-than-
|
|
219
|
+
* the-tree proof. Hashing the lone node against itself gives every leaf a genuine sibling at each
|
|
220
|
+
* level, so a depth-d tree yields a depth-d proof for *all* leaves, and stays compatible with the
|
|
221
|
+
* contract's verifyLeaf (it folds `computed == x` with proof element `x` as `nodeHash(x, x)`).
|
|
222
|
+
* @param {string[]} leaves array of 0x bytes32 content digests
|
|
223
|
+
* @returns {{ root: string, layers: string[][], sortedLeaves: string[] }}
|
|
224
|
+
*/
|
|
225
|
+
function buildTree(leaves) {
|
|
226
|
+
if (leaves.length === 0) {
|
|
227
|
+
throw new Error("cannot build a Merkle tree from zero leaves");
|
|
228
|
+
}
|
|
229
|
+
const sortedLeaves = leaves.slice().sort((a, b) => {
|
|
230
|
+
const x = BigInt(a);
|
|
231
|
+
const y = BigInt(b);
|
|
232
|
+
return x < y ? -1 : x > y ? 1 : 0;
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
// Bottom layer is the DOMAIN-TAGGED leaves; folding then matches the on-chain verifier exactly.
|
|
236
|
+
let layer = sortedLeaves.map((c) => leafHash(c));
|
|
237
|
+
const layers = [layer];
|
|
238
|
+
while (layer.length > 1) {
|
|
239
|
+
const next = [];
|
|
240
|
+
for (let i = 0; i < layer.length; i += 2) {
|
|
241
|
+
// Pair (i, i+1); if i is the lone last (odd) node, pair it with itself.
|
|
242
|
+
const right = i + 1 < layer.length ? layer[i + 1] : layer[i];
|
|
243
|
+
next.push(nodeHash(layer[i], right));
|
|
244
|
+
}
|
|
245
|
+
layer = next;
|
|
246
|
+
layers.push(layer);
|
|
247
|
+
}
|
|
248
|
+
return { root: layers[layers.length - 1][0], layers, sortedLeaves };
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Generate a Merkle proof for the leaf at `index` within `layers` (as produced by
|
|
253
|
+
* buildTree). The proof is the sequence of sibling hashes from leaf to root; replaying
|
|
254
|
+
* it with sorted-pair hashing reproduces the root — i.e. it is accepted by verifyLeaf.
|
|
255
|
+
*
|
|
256
|
+
* Mirrors buildTree's "duplicate the lone node" rule: when a node is the last in an
|
|
257
|
+
* odd-length level it has no real neighbor, so its sibling is its own value (the parent
|
|
258
|
+
* was `nodeHash(node, node)`). We therefore push `lvl[idx]` itself in that case, giving a
|
|
259
|
+
* full-depth proof for every leaf rather than skipping the level.
|
|
260
|
+
*
|
|
261
|
+
* `layers[0]` is the TAGGED-leaf layer (see buildTree), so the sibling values pushed here are the
|
|
262
|
+
* exact node/leaf hashes the on-chain verifyLeaf folds against — the proof is replay-compatible.
|
|
263
|
+
* @param {string[][]} layers
|
|
264
|
+
* @param {number} index index into the *sorted* leaf layer
|
|
265
|
+
* @returns {string[]} proof (array of 0x bytes32)
|
|
266
|
+
*/
|
|
267
|
+
function proofForIndex(layers, index) {
|
|
268
|
+
const proof = [];
|
|
269
|
+
let idx = index;
|
|
270
|
+
for (let l = 0; l < layers.length - 1; l++) {
|
|
271
|
+
const lvl = layers[l];
|
|
272
|
+
const sibling = idx ^ 1;
|
|
273
|
+
// In-range sibling -> push it. Otherwise this is a lone odd node whose sibling is
|
|
274
|
+
// itself (buildTree paired it with itself), so push its own value.
|
|
275
|
+
proof.push(sibling < lvl.length ? lvl[sibling] : lvl[idx]);
|
|
276
|
+
idx = Math.floor(idx / 2);
|
|
277
|
+
}
|
|
278
|
+
return proof;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
/**
|
|
282
|
+
* Hash a directory into a stable sorted-leaf Merkle root that commits to file NAMES and content.
|
|
283
|
+
*
|
|
284
|
+
* Each file contributes one PATH-BOUND leaf:
|
|
285
|
+
* leaf = pathLeaf(relPath, c) = keccak256(DIR_LEAF_DOMAIN ++ relPath ++ 0x00 ++ c)
|
|
286
|
+
* where c = keccak256(file bytes) is the file's content digest (= hashFile). Because relPath is
|
|
287
|
+
* hashed in, two files with identical bytes but different paths get DIFFERENT leaves, and renaming
|
|
288
|
+
* a file changes its leaf — so the root commits to the full set of (relPath, content) pairs.
|
|
289
|
+
*
|
|
290
|
+
* The returned per-file `leaf` is exactly the value the on-chain verifyLeaf expects as its
|
|
291
|
+
* `contentHash` argument (it tags it with LEAF_TAG itself); `contentHash` is also returned for
|
|
292
|
+
* transparency (the bare keccak256 of the file's bytes).
|
|
293
|
+
*
|
|
294
|
+
* @param {string} dirPath
|
|
295
|
+
* @returns {{
|
|
296
|
+
* root: string,
|
|
297
|
+
* leaves: { path: string, leaf: string, contentHash: string }[], // per-file, sorted by leaf
|
|
298
|
+
* proofFor: (relOrAbsPathOrLeaf: string) => string[],
|
|
299
|
+
* leafFor: (relOrAbsPath: string) => string, // path-bound leaf for a file
|
|
300
|
+
* }}
|
|
301
|
+
*/
|
|
302
|
+
function hashDir(dirPath) {
|
|
303
|
+
const files = listFiles(dirPath);
|
|
304
|
+
if (files.length === 0) {
|
|
305
|
+
throw new Error(`no files found under directory: ${dirPath}`);
|
|
306
|
+
}
|
|
307
|
+
// Map each absolute file path to its repo-relative POSIX path + bytes, then hand the entries to the
|
|
308
|
+
// shared tree builder so the filesystem walk and the git-scoped walk produce byte-identical roots.
|
|
309
|
+
const entries = files.map((f) => ({
|
|
310
|
+
path: toPosixRel(path.relative(dirPath, f)),
|
|
311
|
+
abs: f,
|
|
312
|
+
content: fs.readFileSync(f),
|
|
313
|
+
}));
|
|
314
|
+
return hashEntries(entries);
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
/**
|
|
318
|
+
* Build a sorted-leaf directory Merkle root + per-file manifest by STREAMING each file, so a large
|
|
319
|
+
* dataset tree is hashed WITHOUT loading all file content into memory at once. This is the engine
|
|
320
|
+
* behind `vh dataset build`.
|
|
321
|
+
*
|
|
322
|
+
* It reuses the EXACT path-bound, domain-separated convention of `hashDir`/`hashEntries` — the only
|
|
323
|
+
* difference is *how* each file's content digest is obtained: instead of `fs.readFileSync` (whole file
|
|
324
|
+
* resident) the bytes are streamed through an incremental keccak (`hashFileStream`), and only the
|
|
325
|
+
* resulting 32-byte `contentHash` + `pathLeaf` are retained. Peak memory is therefore one chunk
|
|
326
|
+
* (~1 MiB) plus the O(number-of-files) array of 32-byte hashes — never the sum of all file sizes. The
|
|
327
|
+
* resulting root is byte-identical to `hashDir(dirPath).root` for the same tree (asserted in tests),
|
|
328
|
+
* so it verifies against the on-chain `verifyLeaf` with no new hashing convention.
|
|
329
|
+
*
|
|
330
|
+
* @param {string} dirPath directory to walk recursively (symlinks/sockets skipped, as in listFiles)
|
|
331
|
+
* @returns {{
|
|
332
|
+
* root: string,
|
|
333
|
+
* leaves: { path: string, contentHash: string, leaf: string }[], // sorted by leaf, ascending
|
|
334
|
+
* }}
|
|
335
|
+
*/
|
|
336
|
+
function hashDirStream(dirPath) {
|
|
337
|
+
const files = listFiles(dirPath);
|
|
338
|
+
if (files.length === 0) {
|
|
339
|
+
throw new Error(`no files found under directory: ${dirPath}`);
|
|
340
|
+
}
|
|
341
|
+
// Compute one file's (path, contentHash, leaf) at a time. Only the 32-byte hashes are kept; the
|
|
342
|
+
// file's bytes are released as soon as its streamed digest is computed, so total content never piles
|
|
343
|
+
// up in memory regardless of how large or how many the files are.
|
|
344
|
+
const pairs = files.map((abs) => {
|
|
345
|
+
const rel = toPosixRel(path.relative(dirPath, abs));
|
|
346
|
+
const contentHash = hashFileStream(abs);
|
|
347
|
+
return { path: rel, contentHash, leaf: pathLeaf(rel, contentHash) };
|
|
348
|
+
});
|
|
349
|
+
|
|
350
|
+
const { root } = buildTree(pairs.map((p) => p.leaf));
|
|
351
|
+
|
|
352
|
+
// Sort the manifest the same way buildTree sorted the bare leaves, so the on-disk order is the
|
|
353
|
+
// canonical (leaf-ascending) order and is reproducible across hosts.
|
|
354
|
+
const sortedPairs = pairs.slice().sort((a, b) => {
|
|
355
|
+
const x = BigInt(a.leaf);
|
|
356
|
+
const y = BigInt(b.leaf);
|
|
357
|
+
return x < y ? -1 : x > y ? 1 : 0;
|
|
358
|
+
});
|
|
359
|
+
|
|
360
|
+
return {
|
|
361
|
+
root,
|
|
362
|
+
leaves: sortedPairs.map((p) => ({
|
|
363
|
+
path: p.path,
|
|
364
|
+
contentHash: p.contentHash,
|
|
365
|
+
leaf: p.leaf,
|
|
366
|
+
})),
|
|
367
|
+
};
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
/**
|
|
371
|
+
* Build the directory-root result from an explicit list of file entries. This is the shared core
|
|
372
|
+
* behind both `hashDir` (filesystem walk) and the git-scoped walk in cli/git.js, so they compute the
|
|
373
|
+
* IDENTICAL Merkle root via the same pathLeaf/buildTree/leafHash/nodeHash convention the contract's
|
|
374
|
+
* verifyLeaf accepts. Each entry is `{ path: repoRelPosixPath, content: Buffer, abs?: absPath }`;
|
|
375
|
+
* `abs` is optional and only used to let `proofFor`/`leafFor` accept an absolute path.
|
|
376
|
+
*
|
|
377
|
+
* @param {{ path: string, content: Buffer|Uint8Array, abs?: string }[]} entries
|
|
378
|
+
* @returns {{
|
|
379
|
+
* root: string,
|
|
380
|
+
* leaves: { path: string, leaf: string, contentHash: string }[],
|
|
381
|
+
* proofFor: (relOrAbsPathOrLeaf: string) => string[],
|
|
382
|
+
* leafFor: (relOrAbsPath: string) => string,
|
|
383
|
+
* }}
|
|
384
|
+
*/
|
|
385
|
+
function hashEntries(entries) {
|
|
386
|
+
if (!entries || entries.length === 0) {
|
|
387
|
+
throw new Error("cannot build a directory root from zero files");
|
|
388
|
+
}
|
|
389
|
+
// Compute (path, contentHash, leaf) triples. The tree leaf is the PATH-BOUND digest so the root
|
|
390
|
+
// commits to names+content; the bare contentHash is kept for display. Sort by leaf so the root is
|
|
391
|
+
// order-independent. `path` is already a normalized POSIX relPath; pathLeaf re-normalizes anyway.
|
|
392
|
+
const pairs = entries.map((e) => {
|
|
393
|
+
const rel = toPosixRel(e.path);
|
|
394
|
+
const contentHash = hashBytes(e.content);
|
|
395
|
+
return {
|
|
396
|
+
path: rel,
|
|
397
|
+
abs: e.abs,
|
|
398
|
+
contentHash,
|
|
399
|
+
leaf: pathLeaf(rel, contentHash),
|
|
400
|
+
};
|
|
401
|
+
});
|
|
402
|
+
|
|
403
|
+
const { root, layers, sortedLeaves } = buildTree(pairs.map((p) => p.leaf));
|
|
404
|
+
|
|
405
|
+
// Re-order the metadata to match the sorted leaf layer used to build the tree.
|
|
406
|
+
// (Sort the pairs the same way buildTree sorted the bare leaves.)
|
|
407
|
+
const sortedPairs = pairs
|
|
408
|
+
.slice()
|
|
409
|
+
.sort((a, b) => {
|
|
410
|
+
const x = BigInt(a.leaf);
|
|
411
|
+
const y = BigInt(b.leaf);
|
|
412
|
+
return x < y ? -1 : x > y ? 1 : 0;
|
|
413
|
+
});
|
|
414
|
+
|
|
415
|
+
// Resolve a target (relative/absolute path, or a path-bound leaf hash) to its index in the sorted
|
|
416
|
+
// leaf layer. Path matching uses the normalized (forward-slash) relPath; an absolute target is
|
|
417
|
+
// matched against each entry's recorded absolute path (entries carry their own `abs`, so there is
|
|
418
|
+
// no single base directory to resolve against).
|
|
419
|
+
function indexFor(target) {
|
|
420
|
+
if (/^0x[0-9a-fA-F]{64}$/.test(target)) {
|
|
421
|
+
return sortedLeaves.findIndex((l) => BigInt(l) === BigInt(target));
|
|
422
|
+
}
|
|
423
|
+
if (path.isAbsolute(target)) {
|
|
424
|
+
return sortedPairs.findIndex((p) => p.abs === target);
|
|
425
|
+
}
|
|
426
|
+
const normTarget = toPosixRel(target);
|
|
427
|
+
return sortedPairs.findIndex((p) => p.path === normTarget);
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
function proofFor(target) {
|
|
431
|
+
const index = indexFor(target);
|
|
432
|
+
if (index < 0) throw new Error(`target not found in directory tree: ${target}`);
|
|
433
|
+
return proofForIndex(layers, index);
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
function leafFor(target) {
|
|
437
|
+
const index = indexFor(target);
|
|
438
|
+
if (index < 0) throw new Error(`target not found in directory tree: ${target}`);
|
|
439
|
+
return sortedPairs[index].leaf;
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
return {
|
|
443
|
+
root,
|
|
444
|
+
leaves: sortedPairs.map((p) => ({
|
|
445
|
+
path: p.path,
|
|
446
|
+
leaf: p.leaf,
|
|
447
|
+
contentHash: p.contentHash,
|
|
448
|
+
})),
|
|
449
|
+
proofFor,
|
|
450
|
+
leafFor,
|
|
451
|
+
};
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
/**
|
|
455
|
+
* Hash a directory into a stable Merkle root over EXACTLY the files git tracks at `ref` (default
|
|
456
|
+
* HEAD), reading their bytes from the WORKING TREE. This is the engine behind `vh hash <path> --git`.
|
|
457
|
+
*
|
|
458
|
+
* The tracked set is enumerated from the commit's tree (`git ls-tree`, via cli/git.js), so untracked
|
|
459
|
+
* junk in the work tree (`node_modules/`, `.env`, unstaged scratch files) is IGNORED — the root
|
|
460
|
+
* depends only on which files git tracks. Each tracked file's bytes are read from the work tree and
|
|
461
|
+
* fed through the IDENTICAL pathLeaf/buildTree/leafHash/nodeHash convention as `hashDir`, with the
|
|
462
|
+
* git path bound into each leaf, so the resulting root is byte-identical to (and verifiable by) the
|
|
463
|
+
* contract's verifyLeaf — no new leaf scheme, no contract change.
|
|
464
|
+
*
|
|
465
|
+
* Errors explicitly (never silently falls back to a filesystem walk):
|
|
466
|
+
* - `dirPath` not in a git work tree -> error (via repoRoot),
|
|
467
|
+
* - unknown `ref` -> error (via resolveCommit),
|
|
468
|
+
* - zero tracked files -> actionable error (cannot build a tree from zero leaves).
|
|
469
|
+
* A tracked file that is missing from the work tree (e.g. `git rm` without commit) is reported as a
|
|
470
|
+
* clear error rather than silently skipped, so the root always reflects the full tracked set.
|
|
471
|
+
*
|
|
472
|
+
* @param {string} dirPath a directory inside the repo
|
|
473
|
+
* @param {{ ref?: string }} [opts] `ref` to enumerate (default HEAD)
|
|
474
|
+
* @returns {{
|
|
475
|
+
* root: string,
|
|
476
|
+
* commit: string,
|
|
477
|
+
* scope: string,
|
|
478
|
+
* leaves: { path: string, leaf: string, contentHash: string }[],
|
|
479
|
+
* proofFor: (relOrAbsPathOrLeaf: string) => string[],
|
|
480
|
+
* leafFor: (relOrAbsPath: string) => string,
|
|
481
|
+
* }}
|
|
482
|
+
*/
|
|
483
|
+
function hashGit(dirPath, opts = {}) {
|
|
484
|
+
// Lazy-require so cli/hash.js stays usable (and unit-testable) without git on the host unless the
|
|
485
|
+
// --git path is actually taken.
|
|
486
|
+
const git = require("./git");
|
|
487
|
+
const root = git.repoRoot(dirPath); // errors clearly if dirPath is not in a git work tree
|
|
488
|
+
const commit = git.resolveCommit(dirPath, opts.ref); // errors clearly on an unknown ref
|
|
489
|
+
// Repo-relative scope (the operator's vantage point) recorded as an untrusted provenance hint.
|
|
490
|
+
const scope = git.repoRelativeScope(root, dirPath);
|
|
491
|
+
const tracked = git.listTrackedFiles(dirPath, opts.ref); // sorted repo-relative POSIX paths
|
|
492
|
+
if (tracked.length === 0) {
|
|
493
|
+
throw new Error(
|
|
494
|
+
`git tracks zero files at ${opts.ref || "HEAD"} (${commit.slice(0, 12)}); ` +
|
|
495
|
+
`nothing to hash. Commit at least one file, or hash without --git.`
|
|
496
|
+
);
|
|
497
|
+
}
|
|
498
|
+
const entries = tracked.map((rel) => {
|
|
499
|
+
const abs = path.join(root, rel);
|
|
500
|
+
let content;
|
|
501
|
+
try {
|
|
502
|
+
content = fs.readFileSync(abs);
|
|
503
|
+
} catch (e) {
|
|
504
|
+
throw new Error(
|
|
505
|
+
`tracked file is missing from the work tree: ${rel}\n` +
|
|
506
|
+
` (git lists it at ${commit.slice(0, 12)} but it could not be read: ${e.message})`
|
|
507
|
+
);
|
|
508
|
+
}
|
|
509
|
+
return { path: rel, abs, content };
|
|
510
|
+
});
|
|
511
|
+
const result = hashEntries(entries);
|
|
512
|
+
return { ...result, commit, scope };
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
/**
|
|
516
|
+
* Hash a path, dispatching on whether it is a file or a directory.
|
|
517
|
+
* @param {string} targetPath
|
|
518
|
+
* @returns {{ kind: "file"|"dir", root: string, leaves?: {path:string,leaf:string}[] }}
|
|
519
|
+
*/
|
|
520
|
+
function hashPath(targetPath) {
|
|
521
|
+
const stat = fs.statSync(targetPath);
|
|
522
|
+
if (stat.isDirectory()) {
|
|
523
|
+
const { root, leaves } = hashDir(targetPath);
|
|
524
|
+
return { kind: "dir", root, leaves };
|
|
525
|
+
}
|
|
526
|
+
if (stat.isFile()) {
|
|
527
|
+
return { kind: "file", root: hashFile(targetPath) };
|
|
528
|
+
}
|
|
529
|
+
throw new Error(`not a regular file or directory: ${targetPath}`);
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
module.exports = {
|
|
533
|
+
hashFile,
|
|
534
|
+
hashFileStream,
|
|
535
|
+
hashBytes,
|
|
536
|
+
hashDir,
|
|
537
|
+
hashDirStream,
|
|
538
|
+
hashEntries,
|
|
539
|
+
hashGit,
|
|
540
|
+
hashPath,
|
|
541
|
+
leafHash,
|
|
542
|
+
nodeHash,
|
|
543
|
+
pathLeaf,
|
|
544
|
+
toPosixRel,
|
|
545
|
+
buildTree,
|
|
546
|
+
proofForIndex,
|
|
547
|
+
listFiles,
|
|
548
|
+
DIR_LEAF_DOMAIN,
|
|
549
|
+
DIR_LEAF_DOMAIN_STR,
|
|
550
|
+
};
|