@iscc/wasm 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +79 -0
- package/iscc_wasm.d.ts +280 -0
- package/iscc_wasm.js +1283 -0
- package/iscc_wasm_bg.wasm +0 -0
- package/package.json +19 -0
package/README.md
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# @iscc/wasm
|
|
2
|
+
|
|
3
|
+
[](https://github.com/iscc/iscc-lib/actions/workflows/ci.yml)
|
|
4
|
+
[](https://www.npmjs.com/package/@iscc/wasm)
|
|
5
|
+
[](https://opensource.org/licenses/Apache-2.0)
|
|
6
|
+
|
|
7
|
+
> **Experimental:** This library is in early development (v0.0.x). APIs may change without notice.
|
|
8
|
+
> Not recommended for production use yet.
|
|
9
|
+
|
|
10
|
+
Browser-compatible WebAssembly bindings for
|
|
11
|
+
[ISO 24138:2024](https://www.iso.org/standard/77899.html) -- International Standard Content Code
|
|
12
|
+
(ISCC). Built with Rust and [wasm-bindgen](https://rustwasm.github.io/wasm-bindgen/) for use in
|
|
13
|
+
browsers and Node.js.
|
|
14
|
+
|
|
15
|
+
## What is ISCC
|
|
16
|
+
|
|
17
|
+
The ISCC is a similarity-preserving fingerprint and identifier for digital media assets. ISCCs are
|
|
18
|
+
generated algorithmically from digital content, just like cryptographic hashes. However, instead of
|
|
19
|
+
using a single cryptographic hash function to identify data only, the ISCC uses various algorithms
|
|
20
|
+
to create a composite identifier that exhibits similarity-preserving properties (soft hash).
|
|
21
|
+
|
|
22
|
+
## Installation
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
npm install @iscc/wasm
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Quick Start
|
|
29
|
+
|
|
30
|
+
```javascript
|
|
31
|
+
import init, {
|
|
32
|
+
gen_meta_code_v0
|
|
33
|
+
} from "@iscc/wasm";
|
|
34
|
+
|
|
35
|
+
await init();
|
|
36
|
+
const iscc = gen_meta_code_v0("ISCC Test Document!");
|
|
37
|
+
console.log(`Meta-Code: ${iscc}`);
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Both browser and Node.js targets are supported.
|
|
41
|
+
|
|
42
|
+
## API Overview
|
|
43
|
+
|
|
44
|
+
### Code Generators
|
|
45
|
+
|
|
46
|
+
| Function | Description |
|
|
47
|
+
| ---------------------- | -------------------------------------------- |
|
|
48
|
+
| `gen_meta_code_v0` | Generate a Meta-Code from metadata fields |
|
|
49
|
+
| `gen_text_code_v0` | Generate a Text-Code from plain text |
|
|
50
|
+
| `gen_image_code_v0` | Generate an Image-Code from pixel data |
|
|
51
|
+
| `gen_audio_code_v0` | Generate an Audio-Code from Chromaprint data |
|
|
52
|
+
| `gen_video_code_v0` | Generate a Video-Code from frame signatures |
|
|
53
|
+
| `gen_mixed_code_v0` | Generate a Mixed-Code from Content-Codes |
|
|
54
|
+
| `gen_data_code_v0` | Generate a Data-Code from raw bytes |
|
|
55
|
+
| `gen_instance_code_v0` | Generate an Instance-Code from raw bytes |
|
|
56
|
+
| `gen_iscc_code_v0` | Generate a composite ISCC-CODE |
|
|
57
|
+
|
|
58
|
+
All code generators return ISCC strings directly.
|
|
59
|
+
|
|
60
|
+
### Utilities
|
|
61
|
+
|
|
62
|
+
- **Text processing:** `text_clean`, `text_remove_newlines`, `text_trim`, `text_collapse`
|
|
63
|
+
- **Algorithm primitives:** `alg_simhash`, `alg_minhash_256`, `alg_cdc_chunks`, `sliding_window`
|
|
64
|
+
- **Soft hashing:** `soft_hash_video_v0`
|
|
65
|
+
- **Encoding:** `encode_base64`
|
|
66
|
+
- **Codec:** `iscc_decompose`
|
|
67
|
+
- **Streaming:** `DataHasher`, `InstanceHasher` classes for incremental processing
|
|
68
|
+
- **Diagnostics:** `conformance_selftest`
|
|
69
|
+
|
|
70
|
+
## Links
|
|
71
|
+
|
|
72
|
+
- [Documentation](https://lib.iscc.codes)
|
|
73
|
+
- [Repository](https://github.com/iscc/iscc-lib)
|
|
74
|
+
- [ISCC Specification (ISO 24138)](https://www.iso.org/standard/77899.html)
|
|
75
|
+
- [ISCC Foundation](https://iscc.io)
|
|
76
|
+
|
|
77
|
+
## License
|
|
78
|
+
|
|
79
|
+
Apache-2.0
|
package/iscc_wasm.d.ts
ADDED
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
/* tslint:disable */
|
|
2
|
+
/* eslint-disable */
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Streaming Data-Code generator.
|
|
6
|
+
*
|
|
7
|
+
* Incrementally processes data with content-defined chunking and MinHash
|
|
8
|
+
* to produce results identical to `gen_data_code_v0`. Follows the
|
|
9
|
+
* `new() → update() → finalize()` pattern.
|
|
10
|
+
*/
|
|
11
|
+
export class DataHasher {
|
|
12
|
+
free(): void;
|
|
13
|
+
[Symbol.dispose](): void;
|
|
14
|
+
/**
|
|
15
|
+
* Consume the hasher and produce a Data-Code ISCC string.
|
|
16
|
+
*
|
|
17
|
+
* After calling `finalize`, subsequent calls to `update` or `finalize`
|
|
18
|
+
* will throw. Default `bits` is 64.
|
|
19
|
+
*/
|
|
20
|
+
finalize(bits?: number | null): string;
|
|
21
|
+
/**
|
|
22
|
+
* Create a new `DataHasher`.
|
|
23
|
+
*/
|
|
24
|
+
constructor();
|
|
25
|
+
/**
|
|
26
|
+
* Push data into the hasher.
|
|
27
|
+
*/
|
|
28
|
+
update(data: Uint8Array): void;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Streaming Instance-Code generator.
|
|
33
|
+
*
|
|
34
|
+
* Incrementally hashes data with BLAKE3 to produce results identical
|
|
35
|
+
* to `gen_instance_code_v0`. Follows the
|
|
36
|
+
* `new() → update() → finalize()` pattern.
|
|
37
|
+
*/
|
|
38
|
+
export class InstanceHasher {
|
|
39
|
+
free(): void;
|
|
40
|
+
[Symbol.dispose](): void;
|
|
41
|
+
/**
|
|
42
|
+
* Consume the hasher and produce an Instance-Code ISCC string.
|
|
43
|
+
*
|
|
44
|
+
* After calling `finalize`, subsequent calls to `update` or `finalize`
|
|
45
|
+
* will throw. Default `bits` is 64.
|
|
46
|
+
*/
|
|
47
|
+
finalize(bits?: number | null): string;
|
|
48
|
+
/**
|
|
49
|
+
* Create a new `InstanceHasher`.
|
|
50
|
+
*/
|
|
51
|
+
constructor();
|
|
52
|
+
/**
|
|
53
|
+
* Push data into the hasher.
|
|
54
|
+
*/
|
|
55
|
+
update(data: Uint8Array): void;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Split data into content-defined chunks using gear rolling hash.
|
|
60
|
+
*
|
|
61
|
+
* Returns a JS array of `Uint8Array` chunks. At least one chunk is always
|
|
62
|
+
* returned (empty bytes for empty input). When `utf32` is true, aligns cut
|
|
63
|
+
* points to 4-byte boundaries. Default `avg_chunk_size` is 1024.
|
|
64
|
+
*/
|
|
65
|
+
export function alg_cdc_chunks(data: Uint8Array, utf32: boolean, avg_chunk_size?: number | null): any;
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Compute a 256-bit MinHash digest from 32-bit integer features.
|
|
69
|
+
*
|
|
70
|
+
* Uses 64 universal hash functions with bit-interleaved compression to
|
|
71
|
+
* produce a 32-byte similarity-preserving digest.
|
|
72
|
+
*/
|
|
73
|
+
export function alg_minhash_256(features: Uint32Array): Uint8Array;
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Compute a SimHash from a sequence of equal-length hash digests.
|
|
77
|
+
*
|
|
78
|
+
* Accepts a JS array of `Uint8Array` values. Returns a similarity-preserving
|
|
79
|
+
* hash whose length matches the input digest length. Returns 32 zero bytes
|
|
80
|
+
* for empty input. Throws on mismatched digest lengths.
|
|
81
|
+
*/
|
|
82
|
+
export function alg_simhash(hash_digests: any): Uint8Array;
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Encode bytes as base64url (RFC 4648 §5, no padding).
|
|
86
|
+
*
|
|
87
|
+
* Returns a URL-safe base64 encoded string without padding characters.
|
|
88
|
+
*/
|
|
89
|
+
export function encode_base64(data: Uint8Array): string;
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Generate an Audio-Code from a Chromaprint feature vector.
|
|
93
|
+
*
|
|
94
|
+
* Produces an ISCC Content-Code for audio from signed integer
|
|
95
|
+
* Chromaprint fingerprint features using multi-stage SimHash.
|
|
96
|
+
*/
|
|
97
|
+
export function gen_audio_code_v0(cv: Int32Array, bits?: number | null): string;
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Generate a Data-Code from raw byte data.
|
|
101
|
+
*
|
|
102
|
+
* Produces an ISCC Data-Code by splitting data into content-defined
|
|
103
|
+
* chunks and applying MinHash for similarity hashing.
|
|
104
|
+
*/
|
|
105
|
+
export function gen_data_code_v0(data: Uint8Array, bits?: number | null): string;
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Generate an Image-Code from pixel data.
|
|
109
|
+
*
|
|
110
|
+
* Produces an ISCC Content-Code for images from 1024 grayscale pixels
|
|
111
|
+
* (32x32) using a DCT-based perceptual hash.
|
|
112
|
+
*/
|
|
113
|
+
export function gen_image_code_v0(pixels: Uint8Array, bits?: number | null): string;
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Generate an Instance-Code from raw byte data.
|
|
117
|
+
*
|
|
118
|
+
* Produces an ISCC Instance-Code by hashing the complete byte stream
|
|
119
|
+
* with BLAKE3. Returns the ISCC string with "ISCC:" prefix.
|
|
120
|
+
*/
|
|
121
|
+
export function gen_instance_code_v0(data: Uint8Array, bits?: number | null): string;
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Generate a composite ISCC-CODE from individual unit codes.
|
|
125
|
+
*
|
|
126
|
+
* Combines multiple ISCC unit codes into a single composite ISCC-CODE.
|
|
127
|
+
* Requires at least Data-Code and Instance-Code. Accepts a JS array of strings.
|
|
128
|
+
*/
|
|
129
|
+
export function gen_iscc_code_v0(codes: any, wide?: boolean | null): string;
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Generate a Meta-Code from name and optional metadata.
|
|
133
|
+
*
|
|
134
|
+
* Produces an ISCC Meta-Code by hashing the provided name, description,
|
|
135
|
+
* and metadata fields using the SimHash algorithm.
|
|
136
|
+
*/
|
|
137
|
+
export function gen_meta_code_v0(name: string, description?: string | null, meta?: string | null, bits?: number | null): string;
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Generate a Mixed-Code from multiple Content-Code strings.
|
|
141
|
+
*
|
|
142
|
+
* Produces a Mixed Content-Code by combining multiple ISCC Content-Codes
|
|
143
|
+
* of different types using SimHash. Accepts a JS array of strings.
|
|
144
|
+
*/
|
|
145
|
+
export function gen_mixed_code_v0(codes: any, bits?: number | null): string;
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Generate a Text-Code from plain text content.
|
|
149
|
+
*
|
|
150
|
+
* Produces an ISCC Content-Code for text using MinHash-based
|
|
151
|
+
* similarity hashing.
|
|
152
|
+
*/
|
|
153
|
+
export function gen_text_code_v0(text: string, bits?: number | null): string;
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Generate a Video-Code from frame signature data.
|
|
157
|
+
*
|
|
158
|
+
* Produces an ISCC Content-Code for video from MPEG-7 frame
|
|
159
|
+
* signature vectors using WTA-Hash. Accepts a JS array of arrays of i32.
|
|
160
|
+
*/
|
|
161
|
+
export function gen_video_code_v0(frame_sigs: any, bits?: number | null): string;
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Decompose a composite ISCC-CODE into individual ISCC-UNITs.
|
|
165
|
+
*
|
|
166
|
+
* Accepts a normalized ISCC-CODE or concatenated ISCC-UNIT sequence.
|
|
167
|
+
* The optional "ISCC:" prefix is stripped before decoding.
|
|
168
|
+
* Returns an array of base32-encoded ISCC-UNIT strings (without prefix).
|
|
169
|
+
*/
|
|
170
|
+
export function iscc_decompose(iscc_code: string): string[];
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Generate sliding window n-grams from a string.
|
|
174
|
+
*
|
|
175
|
+
* Returns overlapping substrings of `width` Unicode characters, advancing
|
|
176
|
+
* by one character at a time. Throws if width is less than 2.
|
|
177
|
+
*/
|
|
178
|
+
export function sliding_window(seq: string, width: number): string[];
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Compute a similarity-preserving hash from video frame signatures.
|
|
182
|
+
*
|
|
183
|
+
* Accepts a JS array of arrays of `i32`. Returns raw bytes of length
|
|
184
|
+
* `bits / 8`. Default `bits` is 64. Throws if `frame_sigs` is empty.
|
|
185
|
+
*/
|
|
186
|
+
export function soft_hash_video_v0(frame_sigs: any, bits?: number | null): Uint8Array;
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Clean and normalize text for display.
|
|
190
|
+
*
|
|
191
|
+
* Applies NFKC normalization, removes control characters (except newlines),
|
|
192
|
+
* normalizes `\r\n` to `\n`, collapses consecutive empty lines, and strips
|
|
193
|
+
* leading/trailing whitespace.
|
|
194
|
+
*/
|
|
195
|
+
export function text_clean(text: string): string;
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Normalize and simplify text for similarity hashing.
|
|
199
|
+
*
|
|
200
|
+
* Applies NFD normalization, lowercasing, removes whitespace and characters
|
|
201
|
+
* in Unicode categories C (control), M (mark), and P (punctuation), then
|
|
202
|
+
* recombines with NFKC normalization.
|
|
203
|
+
*/
|
|
204
|
+
export function text_collapse(text: string): string;
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* Remove newlines and collapse whitespace to single spaces.
|
|
208
|
+
*
|
|
209
|
+
* Converts multi-line text into a single normalized line.
|
|
210
|
+
*/
|
|
211
|
+
export function text_remove_newlines(text: string): string;
|
|
212
|
+
|
|
213
|
+
/**
|
|
214
|
+
* Trim text so its UTF-8 encoded size does not exceed `nbytes`.
|
|
215
|
+
*
|
|
216
|
+
* Multi-byte characters that would be split are dropped entirely.
|
|
217
|
+
* Leading/trailing whitespace is stripped from the result.
|
|
218
|
+
*/
|
|
219
|
+
export function text_trim(text: string, nbytes: number): string;
|
|
220
|
+
|
|
221
|
+
export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembly.Module;
|
|
222
|
+
|
|
223
|
+
export interface InitOutput {
|
|
224
|
+
readonly memory: WebAssembly.Memory;
|
|
225
|
+
readonly __wbg_datahasher_free: (a: number, b: number) => void;
|
|
226
|
+
readonly __wbg_instancehasher_free: (a: number, b: number) => void;
|
|
227
|
+
readonly alg_cdc_chunks: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
228
|
+
readonly alg_minhash_256: (a: number, b: number, c: number) => void;
|
|
229
|
+
readonly alg_simhash: (a: number, b: number) => void;
|
|
230
|
+
readonly datahasher_finalize: (a: number, b: number, c: number) => void;
|
|
231
|
+
readonly datahasher_new: () => number;
|
|
232
|
+
readonly datahasher_update: (a: number, b: number, c: number, d: number) => void;
|
|
233
|
+
readonly encode_base64: (a: number, b: number, c: number) => void;
|
|
234
|
+
readonly gen_audio_code_v0: (a: number, b: number, c: number, d: number) => void;
|
|
235
|
+
readonly gen_data_code_v0: (a: number, b: number, c: number, d: number) => void;
|
|
236
|
+
readonly gen_image_code_v0: (a: number, b: number, c: number, d: number) => void;
|
|
237
|
+
readonly gen_instance_code_v0: (a: number, b: number, c: number, d: number) => void;
|
|
238
|
+
readonly gen_iscc_code_v0: (a: number, b: number, c: number) => void;
|
|
239
|
+
readonly gen_meta_code_v0: (a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number) => void;
|
|
240
|
+
readonly gen_mixed_code_v0: (a: number, b: number, c: number) => void;
|
|
241
|
+
readonly gen_text_code_v0: (a: number, b: number, c: number, d: number) => void;
|
|
242
|
+
readonly gen_video_code_v0: (a: number, b: number, c: number) => void;
|
|
243
|
+
readonly instancehasher_finalize: (a: number, b: number, c: number) => void;
|
|
244
|
+
readonly instancehasher_new: () => number;
|
|
245
|
+
readonly instancehasher_update: (a: number, b: number, c: number, d: number) => void;
|
|
246
|
+
readonly iscc_decompose: (a: number, b: number, c: number) => void;
|
|
247
|
+
readonly sliding_window: (a: number, b: number, c: number, d: number) => void;
|
|
248
|
+
readonly soft_hash_video_v0: (a: number, b: number, c: number) => void;
|
|
249
|
+
readonly text_clean: (a: number, b: number, c: number) => void;
|
|
250
|
+
readonly text_collapse: (a: number, b: number, c: number) => void;
|
|
251
|
+
readonly text_remove_newlines: (a: number, b: number, c: number) => void;
|
|
252
|
+
readonly text_trim: (a: number, b: number, c: number, d: number) => void;
|
|
253
|
+
readonly __wbindgen_export: (a: number, b: number) => number;
|
|
254
|
+
readonly __wbindgen_export2: (a: number, b: number, c: number, d: number) => number;
|
|
255
|
+
readonly __wbindgen_export3: (a: number) => void;
|
|
256
|
+
readonly __wbindgen_add_to_stack_pointer: (a: number) => number;
|
|
257
|
+
readonly __wbindgen_export4: (a: number, b: number, c: number) => void;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
export type SyncInitInput = BufferSource | WebAssembly.Module;
|
|
261
|
+
|
|
262
|
+
/**
|
|
263
|
+
* Instantiates the given `module`, which can either be bytes or
|
|
264
|
+
* a precompiled `WebAssembly.Module`.
|
|
265
|
+
*
|
|
266
|
+
* @param {{ module: SyncInitInput }} module - Passing `SyncInitInput` directly is deprecated.
|
|
267
|
+
*
|
|
268
|
+
* @returns {InitOutput}
|
|
269
|
+
*/
|
|
270
|
+
export function initSync(module: { module: SyncInitInput } | SyncInitInput): InitOutput;
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* If `module_or_path` is {RequestInfo} or {URL}, makes a request and
|
|
274
|
+
* for everything else, calls `WebAssembly.instantiate` directly.
|
|
275
|
+
*
|
|
276
|
+
* @param {{ module_or_path: InitInput | Promise<InitInput> }} module_or_path - Passing `InitInput` directly is deprecated.
|
|
277
|
+
*
|
|
278
|
+
* @returns {Promise<InitOutput>}
|
|
279
|
+
*/
|
|
280
|
+
export default function __wbg_init (module_or_path?: { module_or_path: InitInput | Promise<InitInput> } | InitInput | Promise<InitInput>): Promise<InitOutput>;
|