@waku/sds 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json ADDED
@@ -0,0 +1,91 @@
1
+ {
2
+ "name": "@waku/sds",
3
+ "version": "0.0.1",
4
+ "description": "Scalable Data Sync implementation for the browser. Based on https://github.com/vacp2p/rfc-index/blob/main/vac/raw/sds.md",
5
+ "types": "./dist/index.d.ts",
6
+ "module": "./dist/index.js",
7
+ "exports": {
8
+ ".": {
9
+ "types": "./dist/index.d.ts",
10
+ "import": "./dist/index.js"
11
+ }
12
+ },
13
+ "typesVersions": {
14
+ "*": {
15
+ "*": [
16
+ "*",
17
+ "dist/*",
18
+ "dist/*/index"
19
+ ]
20
+ }
21
+ },
22
+ "type": "module",
23
+ "author": "Waku Team",
24
+ "homepage": "https://github.com/waku-org/js-waku/tree/master/packages/scalable-data-sync#readme",
25
+ "repository": {
26
+ "type": "git",
27
+ "url": "https://github.com/waku-org/js-waku.git"
28
+ },
29
+ "bugs": {
30
+ "url": "https://github.com/waku-org/js-waku/issues"
31
+ },
32
+ "license": "MIT OR Apache-2.0",
33
+ "keywords": [
34
+ "waku",
35
+ "decentralized",
36
+ "secure",
37
+ "communication",
38
+ "web3",
39
+ "ethereum",
40
+ "dapps",
41
+ "privacy"
42
+ ],
43
+ "scripts": {
44
+ "build": "run-s build:**",
45
+ "build:esm": "tsc",
46
+ "build:bundle": "rollup --config rollup.config.js",
47
+ "fix": "run-s fix:*",
48
+ "fix:lint": "eslint src *.js --fix",
49
+ "check": "run-s check:*",
50
+ "check:lint": "eslint src *.js",
51
+ "check:spelling": "cspell \"{README.md,src/**/*.ts}\"",
52
+ "check:tsc": "tsc -p tsconfig.dev.json",
53
+ "prepublish": "npm run build",
54
+ "reset-hard": "git clean -dfx -e .idea && git reset --hard && npm i && npm run build",
55
+ "test": "NODE_ENV=test run-s test:*",
56
+ "test:node": "NODE_ENV=test TS_NODE_PROJECT=./tsconfig.dev.json mocha"
57
+ },
58
+ "engines": {
59
+ "node": ">=20"
60
+ },
61
+ "dependencies": {
62
+ "@noble/hashes": "^1.7.1",
63
+ "@waku/message-hash": "^0.1.17",
64
+ "@waku/proto": "^0.0.8",
65
+ "@waku/utils": "^0.0.21",
66
+ "chai": "^5.1.2"
67
+ },
68
+ "devDependencies": {
69
+ "@rollup/plugin-commonjs": "^25.0.7",
70
+ "@rollup/plugin-json": "^6.0.0",
71
+ "@rollup/plugin-node-resolve": "^15.2.3",
72
+ "@waku/build-utils": "*",
73
+ "allure-commandline": "^2.27.0",
74
+ "allure-mocha": "^2.9.2",
75
+ "cspell": "^8.6.1",
76
+ "fast-check": "^3.19.0",
77
+ "mocha-multi-reporters": "^1.5.1",
78
+ "npm-run-all": "^4.1.5",
79
+ "rollup": "^4.12.0"
80
+ },
81
+ "files": [
82
+ "dist",
83
+ "bundle",
84
+ "src/**/*.ts",
85
+ "!**/*.spec.*",
86
+ "!**/*.json",
87
+ "CHANGELOG.md",
88
+ "LICENSE",
89
+ "README.md"
90
+ ]
91
+ }
package/src/bloom.ts ADDED
@@ -0,0 +1,146 @@
1
+ import { hashN } from "./nim_hashn/nim_hashn.mjs";
2
+ import { getMOverNBitsForK } from "./probabilities.js";
3
+
4
+ export interface BloomFilterOptions {
5
+ // The expected maximum number of elements for which this BloomFilter is sized.
6
+ capacity: number;
7
+
8
+ // The desired false-positive rate (between 0 and 1).
9
+ errorRate: number;
10
+
11
+ // (Optional) The exact number of hash functions, if the user wants to override the automatic calculation.
12
+ kHashes?: number;
13
+
14
+ // (Optional) Force a specific number of bits per element instead of using a table or optimal formula.
15
+ forceNBitsPerElem?: number;
16
+ }
17
+
18
+ const sizeOfInt = 8;
19
+
20
+ /**
21
+ * A probabilistic data structure that tracks memberships in a set.
22
+ * Supports time and space efficient lookups, but may return false-positives.
23
+ * Can never return false-negatives.
24
+ * A bloom filter can tell us if an element is:
25
+ * - Definitely not in the set
26
+ * - Potentially in the set (with a probability depending on the false-positive rate)
27
+ */
28
+ export class BloomFilter {
29
+ public totalBits: number;
30
+ public data: Array<bigint> = [];
31
+ public kHashes: number;
32
+ public errorRate: number;
33
+
34
+ public options: BloomFilterOptions;
35
+
36
+ private hashN: (item: string, n: number, maxValue: number) => number;
37
+ public constructor(
38
+ options: BloomFilterOptions,
39
+ hashN: (item: string, n: number, maxValue: number) => number
40
+ ) {
41
+ this.options = options;
42
+
43
+ let nBitsPerElem: number;
44
+ let k = options.kHashes ?? 0;
45
+ const forceNBitsPerElem = options.forceNBitsPerElem ?? 0;
46
+
47
+ if (k < 1) {
48
+ // Calculate optimal k based on target error rate
49
+ const bitsPerElem = Math.ceil(
50
+ -1.0 * (Math.log(options.errorRate) / Math.pow(Math.log(2), 2))
51
+ );
52
+ k = Math.round(Math.log(2) * bitsPerElem);
53
+ nBitsPerElem = Math.round(bitsPerElem);
54
+ } else {
55
+ // Use specified k if possible
56
+ if (forceNBitsPerElem < 1) {
57
+ // Use lookup table
58
+ nBitsPerElem = getMOverNBitsForK(k, options.errorRate);
59
+ } else {
60
+ nBitsPerElem = forceNBitsPerElem;
61
+ }
62
+ }
63
+
64
+ const mBits = options.capacity * nBitsPerElem;
65
+ const mInts = 1 + Math.floor(mBits / (sizeOfInt * 8));
66
+
67
+ this.totalBits = mBits;
68
+ this.data = new Array<bigint>(mInts);
69
+ this.data.fill(BigInt(0));
70
+ this.kHashes = k;
71
+ this.hashN = hashN;
72
+ this.errorRate = options.errorRate;
73
+ }
74
+
75
+ public computeHashes(item: string): number[] {
76
+ const hashes = new Array<number>(this.kHashes);
77
+ for (let i = 0; i < this.kHashes; i++) {
78
+ hashes[i] = this.hashN(item, i, this.totalBits);
79
+ }
80
+ return hashes;
81
+ }
82
+
83
+ // Adds an item to the bloom filter by computing its hash values
84
+ // and setting corresponding bits in "data".
85
+ public insert(item: string): void {
86
+ const hashSet = this.computeHashes(item);
87
+ for (const h of hashSet) {
88
+ const intAddress = Math.floor(h / (sizeOfInt * 8));
89
+ const bitOffset = h % (sizeOfInt * 8);
90
+ this.data[intAddress] =
91
+ this.data[intAddress] | (BigInt(1) << BigInt(bitOffset));
92
+ }
93
+ }
94
+
95
+ // Checks if the item is potentially in the bloom filter.
96
+ // The method is guaranteed to return "true" for items that were inserted,
97
+ // but might also return "true" for items that were never inserted
98
+ // (purpose of false-positive probability).
99
+ public lookup(item: string): boolean {
100
+ const hashSet = this.computeHashes(item);
101
+ for (const h of hashSet) {
102
+ const intAddress = Math.floor(h / (sizeOfInt * 8));
103
+ const bitOffset = h % (sizeOfInt * 8);
104
+ const currentInt = this.data[intAddress];
105
+ if (currentInt != (currentInt | (BigInt(1) << BigInt(bitOffset)))) {
106
+ return false;
107
+ }
108
+ }
109
+ return true;
110
+ }
111
+
112
+ public toBytes(): Uint8Array {
113
+ const buffer = new ArrayBuffer(this.data.length * 8);
114
+ const view = new DataView(buffer);
115
+ for (let i = 0; i < this.data.length; i++) {
116
+ view.setBigInt64(i * 8, this.data[i]);
117
+ }
118
+ return new Uint8Array(buffer);
119
+ }
120
+
121
+ public static fromBytes(
122
+ bytes: Uint8Array,
123
+ options: BloomFilterOptions,
124
+ hashN: (item: string, n: number, maxValue: number) => number
125
+ ): BloomFilter {
126
+ const bloomFilter = new BloomFilter(options, hashN);
127
+ const view = new DataView(bytes.buffer);
128
+ for (let i = 0; i < bloomFilter.data.length; i++) {
129
+ bloomFilter.data[i] = view.getBigUint64(i * 8, false);
130
+ }
131
+ return bloomFilter;
132
+ }
133
+ }
134
+
135
+ export class DefaultBloomFilter extends BloomFilter {
136
+ public constructor(options: BloomFilterOptions) {
137
+ super(options, hashN);
138
+ }
139
+
140
+ public static fromBytes(
141
+ bytes: Uint8Array,
142
+ options: BloomFilterOptions
143
+ ): DefaultBloomFilter {
144
+ return BloomFilter.fromBytes(bytes, options, hashN);
145
+ }
146
+ }
package/src/index.ts ADDED
@@ -0,0 +1,3 @@
1
+ import { BloomFilter } from "./bloom.js";
2
+
3
+ export { BloomFilter };
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Get the nth hash using the double hashing technique from:
3
+ * http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/rsa.pdf
4
+ *
5
+ * Based on https://github.com/waku-org/nim-sds/blob/5df71ad3eaf68172cef39a2e1838ddd871b03b5d/src/bloom.nim#L17
6
+ *
7
+ * @param item - The string to hash.
8
+ * @param n - The number of times to hash the string.
9
+ * @param maxValue - The maximum value to hash the string to.
10
+ */
11
+ export function hashN(item: string, n: number, maxValue: number): number;
@@ -0,0 +1,168 @@
1
+ // This file contains the probability tables used to determine the optimal number of
2
+ // hash functions (k) and bits per element (m/n) for a Bloom filter.
3
+ //
4
+ // These are used to determine how to construct a Bloom filter that can perform
5
+ // lookups with false-positive rate low enough to be satisfactory.
6
+
7
+ /**
8
+ * Represents the error rates for a given number of hash functions (k) across
9
+ * different (m/n) ratios (i.e., bits per element).
10
+ */
11
+ type TErrorForK = Float32Array;
12
+
13
+ /**
14
+ * An array where each index corresponds to a value of k (the number of hash functions),
15
+ * and each element is a vector of false-positive rates for varying bits-per-element ratios.
16
+ * Example:
17
+ * ```ts
18
+ * // Probability of a false positive upon lookup when using 1 hash function (k=1)
19
+ * // and 15 bits per element (mOverN=15):
20
+ * const falsePositiveRate = kErrors[1][15];
21
+ * ```
22
+ */
23
+ type TAllErrorRates = Array<TErrorForK>;
24
+
25
+ /**
26
+ * Table of false positive rates for values of k from 0 to 12, and bits-per-element
27
+ * ratios ranging from 0 up to around 32. Each Float32Array is indexed by mOverN,
28
+ * so kErrors[k][mOverN] gives the estimated false-positive probability.
29
+ *
30
+ * These values mirror commonly used reference data found in Bloom filter literature,
31
+ * such as:
32
+ * https://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html
33
+ * https://dl.acm.org/doi/pdf/10.1145/362686.362692
34
+ */
35
+ // prettier-ignore
36
+ export const kErrors: TAllErrorRates = [
37
+ new Float32Array([1.0]),
38
+ new Float32Array([1.0, 1.0, 0.3930000000, 0.2830000000, 0.2210000000, 0.1810000000,
39
+ 0.1540000000, 0.1330000000, 0.1180000000, 0.1050000000, 0.0952000000,
40
+ 0.0869000000, 0.0800000000, 0.0740000000, 0.0689000000, 0.0645000000,
41
+ 0.0606000000, 0.0571000000, 0.0540000000, 0.0513000000, 0.0488000000,
42
+ 0.0465000000, 0.0444000000, 0.0425000000, 0.0408000000, 0.0392000000,
43
+ 0.0377000000, 0.0364000000, 0.0351000000, 0.0339000000, 0.0328000000,
44
+ 0.0317000000, 0.0308000000]),
45
+
46
+ new Float32Array([1.0, 1.0, 0.4000000000, 0.2370000000, 0.1550000000, 0.1090000000,
47
+ 0.0804000000, 0.0618000000, 0.0489000000, 0.0397000000, 0.0329000000,
48
+ 0.0276000000, 0.0236000000, 0.0203000000, 0.0177000000, 0.0156000000,
49
+ 0.0138000000, 0.0123000000, 0.0111000000, 0.0099800000, 0.0090600000,
50
+ 0.0082500000, 0.0075500000, 0.0069400000, 0.0063900000, 0.0059100000,
51
+ 0.0054800000, 0.0051000000, 0.0047500000, 0.0044400000, 0.0041600000,
52
+ 0.0039000000, 0.0036700000]),
53
+
54
+ new Float32Array([1.0, 1.0, 1.0, 0.2530000000, 0.1470000000, 0.0920000000,
55
+ 0.0609000000, 0.0423000000, 0.0306000000, 0.0228000000, 0.0174000000,
56
+ 0.0136000000, 0.0108000000, 0.0087500000, 0.0071800000, 0.0059600000,
57
+ 0.0108000000, 0.0087500000, 0.0071800000, 0.0059600000, 0.0050000000,
58
+ 0.0042300000, 0.0036200000, 0.0031200000, 0.0027000000, 0.0023600000,
59
+ 0.0020700000, 0.0018300000, 0.0016200000, 0.0014500000, 0.0012900000,
60
+ 0.0011600000, 0.0010500000, 0.0009490000, 0.0008620000, 0.0007850000,
61
+ 0.0007170000]),
62
+
63
+ new Float32Array([1.0, 1.0, 1.0, 1.0, 0.1600000000, 0.0920000000, 0.0561000000, 0.0359000000,
64
+ 0.0240000000, 0.0166000000, 0.0118000000, 0.0086400000, 0.0064600000,
65
+ 0.0049200000, 0.0038100000, 0.0030000000, 0.0023900000, 0.0019300000,
66
+ 0.0015800000, 0.0013000000, 0.0010800000, 0.0009050000, 0.0007640000,
67
+ 0.0006490000, 0.0005550000, 0.0004780000, 0.0004130000, 0.0003590000,
68
+ 0.0003140000, 0.0002760000, 0.0002430000, 0.0002150000, 0.0001910000]),
69
+
70
+ new Float32Array([1.0, 1.0, 1.0, 1.0, 1.0, 0.1010000000, 0.0578000000, 0.0347000000,
71
+ 0.0217000000, 0.0141000000, 0.0094300000, 0.0065000000, 0.0045900000,
72
+ 0.0033200000, 0.0024400000, 0.0018300000, 0.0013900000, 0.0010700000,
73
+ 0.0008390000, 0.0006630000, 0.0005300000, 0.0004270000, 0.0003470000,
74
+ 0.0002850000, 0.0002350000, 0.0001960000, 0.0001640000, 0.0001380000,
75
+ 0.0001170000, 0.0000996000, 0.0000853000, 0.0000733000, 0.0000633000]),
76
+
77
+ new Float32Array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0638000000, 0.0364000000, 0.0216000000,
78
+ 0.0133000000, 0.0084400000, 0.0055200000, 0.0037100000, 0.0025500000,
79
+ 0.0017900000, 0.0012800000, 0.0009350000, 0.0006920000, 0.0005190000,
80
+ 0.0003940000, 0.0003030000, 0.0002360000, 0.0001850000, 0.0001470000,
81
+ 0.0001170000, 0.0000944000, 0.0000766000, 0.0000626000, 0.0000515000,
82
+ 0.0000426000, 0.0000355000, 0.0000297000, 0.0000250000]),
83
+
84
+ new Float32Array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0229000000, 0.0135000000, 0.0081900000,
85
+ 0.0051300000, 0.0032900000, 0.0021700000, 0.0014600000, 0.0010000000,
86
+ 0.0007020000, 0.0004990000, 0.0003600000, 0.0002640000, 0.0001960000,
87
+ 0.0001470000, 0.0001120000, 0.0000856000, 0.0000663000, 0.0000518000,
88
+ 0.0000408000, 0.0000324000, 0.0000259000, 0.0000209000, 0.0000169000,
89
+ 0.0000138000, 0.0000113000]),
90
+
91
+ new Float32Array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
92
+ 1.0, 0.0145000000, 0.0084600000, 0.0050900000, 0.0031400000, 0.0019900000,
93
+ 0.0012900000, 0.0008520000, 0.0005740000, 0.0003940000, 0.0002750000,
94
+ 0.0001940000, 0.0001400000, 0.0001010000, 0.0000746000, 0.0000555000,
95
+ 0.0000417000, 0.0000316000, 0.0000242000, 0.0000187000, 0.0000146000,
96
+ 0.0000114000, 0.0000090100, 0.0000071600, 0.0000057300]),
97
+
98
+ new Float32Array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0053100000, 0.0031700000,
99
+ 0.0019400000, 0.0012100000, 0.0007750000, 0.0005050000, 0.0003350000,
100
+ 0.0002260000, 0.0001550000, 0.0001080000, 0.0000759000, 0.0000542000,
101
+ 0.0000392000, 0.0000286000, 0.0000211000, 0.0000157000, 0.0000118000,
102
+ 0.0000089600, 0.0000068500, 0.0000052800, 0.0000041000, 0.0000032000]),
103
+
104
+ new Float32Array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0033400000,
105
+ 0.0019800000, 0.0012000000, 0.0007440000, 0.0004700000, 0.0003020000,
106
+ 0.0001980000, 0.0001320000, 0.0000889000, 0.0000609000, 0.0000423000,
107
+ 0.0000297000, 0.0000211000, 0.0000152000, 0.0000110000, 0.0000080700,
108
+ 0.0000059700, 0.0000044500, 0.0000033500, 0.0000025400, 0.0000019400]),
109
+
110
+ new Float32Array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
111
+ 0.0021000000, 0.0012400000, 0.0007470000, 0.0004590000, 0.0002870000,
112
+ 0.0001830000, 0.0001180000, 0.0000777000, 0.0000518000, 0.0000350000,
113
+ 0.0000240000, 0.0000166000, 0.0000116000, 0.0000082300, 0.0000058900,
114
+ 0.0000042500, 0.0000031000, 0.0000022800, 0.0000016900, 0.0000012600]),
115
+
116
+ new Float32Array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
117
+ 0.0007780000, 0.0004660000, 0.0002840000, 0.0001760000, 0.0001110000,
118
+ 0.0000712000, 0.0000463000, 0.0000305000, 0.0000204000, 0.0000138000,
119
+ 0.0000094200, 0.0000065200, 0.0000045600, 0.0000032200, 0.0000022900,
120
+ 0.0000016500, 0.0000012000, 0.0000008740]),
121
+ ]
122
+
123
+ export const KTooLargeError = "K must be <= 12";
124
+ export const NoSuitableRatioError =
125
+ "Specified value of k and error rate not achievable using less than 4 bytes / element.";
126
+
127
+ /**
128
+ * Given a number of hash functions (k) and a target false-positive rate (targetError),
129
+ * determines the minimum (m/n) bits-per-element that satisfies the error threshold.
130
+ *
131
+ * In the context of a Bloom filter:
132
+ * - m is the total number of bits in the filter.
133
+ * - n is the number of elements you expect to insert.
134
+ * Thus, (m/n) describes how many bits are assigned per inserted element.
135
+ *
136
+ * Example:
137
+ * ```ts
138
+ * // We want to use 3 hash functions (k=3) and a false-positive rate of 1% (targetError=0.01).
139
+ * const mOverN = getMOverNBitsForK(3, 0.01);
140
+ * // The function will iterate through the error tables and find the smallest m/n that satisfies the error threshold.
141
+ * // In this case, kErrors[3][5] is the first value in the vector kErrors[3] that is less than 0.01 (0.0920000000).
142
+ * console.log(mOverN); // 5
143
+ * ```
144
+ *
145
+ * @param k - The number of hash functions.
146
+ * @param targetError - The desired maximum false-positive rate.
147
+ * @param probabilityTable - An optional table of false-positive probabilities indexed by k.
148
+ * @returns The smallest (m/n) bit ratio for which the false-positive rate is below targetError.
149
+ * @throws If k is out of range or if no suitable ratio can be found.
150
+ */
151
+ export function getMOverNBitsForK(
152
+ k: number,
153
+ targetError: number,
154
+ probabilityTable = kErrors
155
+ ): number {
156
+ // Returns the optimal number of m/n bits for a given k.
157
+ if (k < 0 || k > 12) {
158
+ throw new Error(KTooLargeError);
159
+ }
160
+
161
+ for (let mOverN = 2; mOverN < probabilityTable[k].length; mOverN++) {
162
+ if (probabilityTable[k][mOverN] < targetError) {
163
+ return mOverN;
164
+ }
165
+ }
166
+
167
+ throw new Error(NoSuitableRatioError);
168
+ }