xitdb 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(bun test:*)",
5
+ "Bash(bun run tsc:*)",
6
+ "Bash(bunx tsc:*)"
7
+ ]
8
+ }
9
+ }
package/README.md ADDED
@@ -0,0 +1,403 @@
1
+ xitdb is an immutable database written in TypeScript.
2
+
3
+ * Each transaction efficiently creates a new "copy" of the database, and past copies can still be read from.
4
+ * It supports writing to a file as well as purely in-memory use.
5
+ * No query engine of any kind. You just write data structures (primarily an `ArrayList` and `HashMap`) that can be nested arbitrarily.
6
+ * No dependencies besides the JavaScript standard library.
7
+ * This project is a port of the [original Zig version](https://github.com/radarroark/xitdb) and the [Java version](https://github.com/radarroark/xitdb-java).
8
+
9
+ This database was originally made for the [xit version control system](https://github.com/radarroark/xit), but I bet it has a lot of potential for other projects. The combination of being immutable and having an API similar to in-memory data structures is pretty powerful. Consider using it [instead of SQLite](https://gist.github.com/radarroark/03a0724484e1111ef4c05d72a935c42c) for your TypeScript projects: it's simpler, it's pure TypeScript, and it creates no impedance mismatch with your program the way SQL databases do.
10
+
11
+ * [Example](#example)
12
+ * [Initializing a Database](#initializing-a-database)
13
+ * [Types](#types)
14
+ * [Cloning and Undoing](#cloning-and-undoing)
15
+ * [Large Byte Arrays](#large-byte-arrays)
16
+ * [Iterators](#iterators)
17
+ * [Hashing](#hashing)
18
+
19
+ ## Example
20
+
21
+ In this example, we create a new database, write some data in a transaction, and read the data afterwards.
22
+
23
+ ```typescript
24
+ // init the db
25
+ using core = await CoreBufferedFile.create('main.db');
26
+ const hasher = new Hasher('SHA-1');
27
+ const db = await Database.create(core, hasher);
28
+
29
+ // to get the benefits of immutability, the top-level data structure
30
+ // must be an ArrayList, so each transaction is stored as an item in it
31
+ const history = await WriteArrayList.create(await db.rootCursor());
32
+
33
+ // this is how a transaction is executed. we call history.appendContext,
34
+ // providing it with the most recent copy of the db and a context
35
+ // function. the context function will run before the transaction has
36
+ // completed. this function is where we can write changes to the db.
37
+ // if any error happens in it, the transaction will not complete and
38
+ // the db will be unaffected.
39
+ //
40
+ // after this transaction, the db will look like this if represented
41
+ // as JSON (in reality the format is binary):
42
+ //
43
+ // {"foo": "foo",
44
+ // "bar": "bar",
45
+ // "fruits": ["apple", "pear", "grape"],
46
+ // "people": [
47
+ // {"name": "Alice", "age": 25},
48
+ // {"name": "Bob", "age": 42}
49
+ // ]}
50
+ await history.appendContext(await history.getSlot(-1), async (cursor) => {
51
+ const moment = await WriteHashMap.create(cursor);
52
+
53
+ await moment.putByString('foo', new Bytes('foo'));
54
+ await moment.putByString('bar', new Bytes('bar'));
55
+
56
+ const fruitsCursor = await moment.putCursorByString('fruits');
57
+ const fruits = await WriteArrayList.create(fruitsCursor);
58
+ await fruits.append(new Bytes('apple'));
59
+ await fruits.append(new Bytes('pear'));
60
+ await fruits.append(new Bytes('grape'));
61
+
62
+ const peopleCursor = await moment.putCursorByString('people');
63
+ const people = await WriteArrayList.create(peopleCursor);
64
+
65
+ const aliceCursor = await people.appendCursor();
66
+ const alice = await WriteHashMap.create(aliceCursor);
67
+ await alice.putByString('name', new Bytes('Alice'));
68
+ await alice.putByString('age', new Uint(25));
69
+
70
+ const bobCursor = await people.appendCursor();
71
+ const bob = await WriteHashMap.create(bobCursor);
72
+ await bob.putByString('name', new Bytes('Bob'));
73
+ await bob.putByString('age', new Uint(42));
74
+ });
75
+
76
+ // get the most recent copy of the database, like a moment
77
+ // in time. the -1 index will return the last index in the list.
78
+ const momentCursor = await history.getCursor(-1);
79
+ const moment = await ReadHashMap.create(momentCursor!);
80
+
81
+ // we can read the value of "foo" from the map by getting
82
+ // the cursor to "foo" and then calling readBytes on it
83
+ const fooCursor = await moment.getCursorByString('foo');
84
+ const fooValue = await fooCursor!.readBytes(MAX_READ_BYTES);
85
+ console.log(new TextDecoder().decode(fooValue)); // "foo"
86
+
87
+ // to get the "fruits" list, we get the cursor to it and
88
+ // then pass it to the ReadArrayList constructor
89
+ const fruitsCursor = await moment.getCursorByString('fruits');
90
+ const fruits = new ReadArrayList(fruitsCursor!);
91
+ console.log(await fruits.count()); // 3
92
+
93
+ // now we can get the first item from the fruits list and read it
94
+ const appleCursor = await fruits.getCursor(0);
95
+ const appleValue = await appleCursor!.readBytes(MAX_READ_BYTES);
96
+ console.log(new TextDecoder().decode(appleValue)); // "apple"
97
+ ```
98
+
99
+ ## Initializing a Database
100
+
101
+ A `Database` is initialized with an implementation of the `Core` interface, which determines how the i/o is done. There are three implementations of `Core` in this library: `CoreBufferedFile`, `CoreFile`, and `CoreMemory`.
102
+
103
+ * `CoreBufferedFile` databases, like in the example above, write to a file while using an in-memory buffer to dramatically improve performance. This is highly recommended if you want to create a file-based database.
104
+ * `CoreFile` databases use no buffering when reading and writing data. This is almost never necessary but it's useful as a benchmark comparison with `CoreBufferedFile` databases.
105
+ * `CoreMemory` databases work completely in memory.
106
+
107
+ Usually, you want to use a top-level `ArrayList` like in the example above, because that allows you to store a reference to each copy of the database (which I call a "moment"). This is how it supports transactions, despite not having any rollback journal or write-ahead log. It's an append-only database, so the data you are writing is invisible to any reader until the very last step, when the top-level list's header is updated.
108
+
109
+ You can also use a top-level `HashMap`, which is useful for ephemeral databases where immutability or transaction safety isn't necessary. Since xitdb supports in-memory databases, you could use it as an over-the-wire serialization format. Much like "Cap'n Proto", xitdb has no encoding/decoding step: you just give the buffer to xitdb and it can immediately read from it.
110
+
111
+ ## Types
112
+
113
+ In xitdb there are a variety of immutable data structures that you can nest arbitrarily:
114
+
115
+ * `HashMap` contains key-value pairs stored with a hash
116
+ * `HashSet` is like a `HashMap` that only sets the keys; it is useful when only checking for membership
117
+ * `CountedHashMap` and `CountedHashSet` are just a `HashMap` and `HashSet` that maintain a count of their contents
118
+ * `ArrayList` is a growable array
119
+ * `LinkedArrayList` is like an `ArrayList` that can also be efficiently sliced and concatenated
120
+
121
+ All data structures use the hash array mapped trie, invented by Phil Bagwell. The `LinkedArrayList` is based on his later work on RRB trees. These data structures were originally made immutable and widely available by Rich Hickey in Clojure. To my knowledge, they haven't been available in any open source database until xitdb.
122
+
123
+ There are also scalar types you can store in the above-mentioned data structures:
124
+
125
+ * `Bytes` is a byte array
126
+ * `Uint` is an unsigned 64-bit int
127
+ * `Int` is a signed 64-bit int
128
+ * `Float` is a 64-bit float
129
+
130
+ You may also want to define custom types. For example, you may want to store a big integer that can't fit in 64 bits. You could just store this with `Bytes`, but when reading the byte array there wouldn't be any indication that it should be interpreted as a big integer.
131
+
132
+ In xitdb, you can optionally store a format tag with a byte array. A format tag is a 2 byte tag that is stored alongside the byte array. Readers can use it to decide how to interpret the byte array. Here's an example of storing a random 256-bit number with `bi` as the format tag:
133
+
134
+ ```typescript
135
+ const randomBytes = new Uint8Array(32);
136
+ crypto.getRandomValues(randomBytes);
137
+ await moment.putByString('random-number', new Bytes(randomBytes, new TextEncoder().encode('bi')));
138
+ ```
139
+
140
+ Then, you can read it like this:
141
+
142
+ ```typescript
143
+ const randomNumberCursor = await moment.getCursorByString('random-number');
144
+ const randomNumber = await randomNumberCursor!.readBytesObject(MAX_READ_BYTES);
145
+ console.log(new TextDecoder().decode(randomNumber.formatTag!)); // "bi"
146
+ const randomBigInt = randomNumber.value;
147
+ ```
148
+
149
+ There are many types you may want to store this way. Maybe an ISO-8601 date like `2026-01-01T18:55:48Z` could be stored with `dt` as the format tag. It's also great for storing custom objects. Just define the object, serialize it as a byte array using whatever mechanism you wish, and store it with a format tag. Keep in mind that format tags can be *any* 2 bytes, so there are 65536 possible format tags.
150
+
151
+ ## Cloning and Undoing
152
+
153
+ A powerful feature of immutable data is fast cloning. Any data structure can be instantly cloned and changed without affecting the original. Starting with the example code above, we can make a new transaction that creates a "food" list based on the existing "fruits" list:
154
+
155
+ ```typescript
156
+ await history.appendContext(await history.getSlot(-1), async (cursor) => {
157
+ const moment = await WriteHashMap.create(cursor);
158
+
159
+ const fruitsCursor = await moment.getCursorByString('fruits');
160
+ const fruits = new ReadArrayList(fruitsCursor!);
161
+
162
+ // create a new key called "food" whose initial value is
163
+ // based on the "fruits" list
164
+ const foodCursor = await moment.putCursorByString('food');
165
+ await foodCursor.write(fruits.slot());
166
+
167
+ const food = await WriteArrayList.create(foodCursor);
168
+ await food.append(new Bytes('eggs'));
169
+ await food.append(new Bytes('rice'));
170
+ await food.append(new Bytes('fish'));
171
+ });
172
+
173
+ const momentCursor = await history.getCursor(-1);
174
+ const moment = await ReadHashMap.create(momentCursor!);
175
+
176
+ // the food list includes the fruits
177
+ const foodCursor = await moment.getCursorByString('food');
178
+ const food = new ReadArrayList(foodCursor!);
179
+ console.log(await food.count()); // 6
180
+
181
+ // ...but the fruits list hasn't been changed
182
+ const fruitsCursor = await moment.getCursorByString('fruits');
183
+ const fruits = new ReadArrayList(fruitsCursor!);
184
+ console.log(await fruits.count()); // 3
185
+ ```
186
+
187
+ Before we continue, let's save the latest history index, so we can revert back to this moment of the database later:
188
+
189
+ ```typescript
190
+ const historyIndex = (await history.count()) - 1;
191
+ ```
192
+
193
+ There's one catch you'll run into when cloning. If we try cloning a data structure that was created in the same transaction, it doesn't seem to work:
194
+
195
+ ```typescript
196
+ await history.appendContext(await history.getSlot(-1), async (cursor) => {
197
+ const moment = await WriteHashMap.create(cursor);
198
+
199
+ const bigCitiesCursor = await moment.putCursorByString('big-cities');
200
+ const bigCities = await WriteArrayList.create(bigCitiesCursor);
201
+ await bigCities.append(new Bytes('New York, NY'));
202
+ await bigCities.append(new Bytes('Los Angeles, CA'));
203
+
204
+ // create a new key called "cities" whose initial value is
205
+ // based on the "big-cities" list
206
+ const citiesCursor = await moment.putCursorByString('cities');
207
+ await citiesCursor.write(bigCities.slot());
208
+
209
+ const cities = await WriteArrayList.create(citiesCursor);
210
+ await cities.append(new Bytes('Charleston, SC'));
211
+ await cities.append(new Bytes('Louisville, KY'));
212
+ });
213
+
214
+ const momentCursor = await history.getCursor(-1);
215
+ const moment = await ReadHashMap.create(momentCursor!);
216
+
217
+ // the cities list contains all four
218
+ const citiesCursor = await moment.getCursorByString('cities');
219
+ const cities = new ReadArrayList(citiesCursor!);
220
+ console.log(await cities.count()); // 4
221
+
222
+ // ..but so does big-cities! we did not intend to mutate this
223
+ const bigCitiesCursor = await moment.getCursorByString('big-cities');
224
+ const bigCities = new ReadArrayList(bigCitiesCursor!);
225
+ console.log(await bigCities.count()); // 4
226
+ ```
227
+
228
+ The reason that `big-cities` was mutated is because all data in a given transaction is temporarily mutable. This is a very important optimization, but in this case, it's not what we want.
229
+
230
+ To show how to fix this, let's first undo the transaction we just made. Here we use the `historyIndex` we saved before to revert back to the older database moment:
231
+
232
+ ```typescript
233
+ await history.append((await history.getSlot(historyIndex))!);
234
+ ```
235
+
236
+ This time, after making the "big cities" list, we call `freeze`, which tells xitdb to consider all data made so far in the transaction to be immutable. After that, we can clone it into the "cities" list and it will work the way we wanted:
237
+
238
+ ```typescript
239
+ await history.appendContext(await history.getSlot(-1), async (cursor) => {
240
+ const moment = await WriteHashMap.create(cursor);
241
+
242
+ const bigCitiesCursor = await moment.putCursorByString('big-cities');
243
+ const bigCities = await WriteArrayList.create(bigCitiesCursor);
244
+ await bigCities.append(new Bytes('New York, NY'));
245
+ await bigCities.append(new Bytes('Los Angeles, CA'));
246
+
247
+ // freeze here, so big-cities won't be mutated
248
+ cursor.db.freeze();
249
+
250
+ // create a new key called "cities" whose initial value is
251
+ // based on the "big-cities" list
252
+ const citiesCursor = await moment.putCursorByString('cities');
253
+ await citiesCursor.write(bigCities.slot());
254
+
255
+ const cities = await WriteArrayList.create(citiesCursor);
256
+ await cities.append(new Bytes('Charleston, SC'));
257
+ await cities.append(new Bytes('Louisville, KY'));
258
+ });
259
+
260
+ const momentCursor = await history.getCursor(-1);
261
+ const moment = await ReadHashMap.create(momentCursor!);
262
+
263
+ // the cities list contains all four
264
+ const citiesCursor = await moment.getCursorByString('cities');
265
+ const cities = new ReadArrayList(citiesCursor!);
266
+ console.log(await cities.count()); // 4
267
+
268
+ // and big-cities only contains the original two
269
+ const bigCitiesCursor = await moment.getCursorByString('big-cities');
270
+ const bigCities = new ReadArrayList(bigCitiesCursor!);
271
+ console.log(await bigCities.count()); // 2
272
+ ```
273
+
274
+ ## Large Byte Arrays
275
+
276
+ When reading and writing large byte arrays, you probably don't want to have all of their contents in memory at once. To incrementally write to a byte array, just get a writer from a cursor:
277
+
278
+ ```typescript
279
+ const longTextCursor = await moment.putCursorByString('long-text');
280
+ const cursorWriter = await longTextCursor.writer();
281
+ for (let i = 0; i < 50; i++) {
282
+ await cursorWriter.write(new TextEncoder().encode('hello, world\n'));
283
+ }
284
+ await cursorWriter.finish(); // remember to call this!
285
+ ```
286
+
287
+ If you need to set a format tag for the byte array, put it in the `formatTag` field of the writer before you call `finish`.
288
+
289
+ To read a byte array incrementally, get a reader from a cursor:
290
+
291
+ ```typescript
292
+ const longTextCursor = await moment.getCursorByString('long-text');
293
+ const cursorReader = await longTextCursor!.reader();
294
+ const content = new Uint8Array(Number(await longTextCursor!.count()));
295
+ await cursorReader.readFully(content);
296
+ const lines = new TextDecoder().decode(content).split('\n').filter(l => l.length > 0);
297
+ console.log(lines.length); // 50
298
+ ```
299
+
300
+ ## Iterators
301
+
302
+ All data structures support iteration. Here's an example of iterating over an `ArrayList` and printing all of the keys and values of each `HashMap` contained in it:
303
+
304
+ ```typescript
305
+ const peopleCursor = await moment.getCursorByString('people');
306
+ const people = new ReadArrayList(peopleCursor!);
307
+
308
+ const peopleIter = people.iterator();
309
+ await peopleIter.init();
310
+ while (await peopleIter.hasNext()) {
311
+ const personCursor = await peopleIter.next();
312
+ const person = await ReadHashMap.create(personCursor!);
313
+ const personIter = person.iterator();
314
+ await personIter.init();
315
+ while (await personIter.hasNext()) {
316
+ const kvPairCursor = await personIter.next();
317
+ const kvPair = await kvPairCursor!.readKeyValuePair();
318
+
319
+ const key = new TextDecoder().decode(await kvPair.keyCursor.readBytes(MAX_READ_BYTES));
320
+
321
+ switch (kvPair.valueCursor.slot().tag) {
322
+ case Tag.SHORT_BYTES:
323
+ case Tag.BYTES:
324
+ console.log(`${key}: ${new TextDecoder().decode(await kvPair.valueCursor.readBytes(MAX_READ_BYTES))}`);
325
+ break;
326
+ case Tag.UINT:
327
+ console.log(`${key}: ${kvPair.valueCursor.readUint()}`);
328
+ break;
329
+ case Tag.INT:
330
+ console.log(`${key}: ${kvPair.valueCursor.readInt()}`);
331
+ break;
332
+ case Tag.FLOAT:
333
+ console.log(`${key}: ${kvPair.valueCursor.readFloat()}`);
334
+ break;
335
+ }
336
+ }
337
+ }
338
+ ```
339
+
340
+ The above code iterates over `people`, which is an `ArrayList`, and for each person (which is a `HashMap`), it iterates over each of its key-value pairs.
341
+
342
+ The iteration of the `HashMap` looks the same with `HashSet`, `CountedHashMap`, and `CountedHashSet`. When iterating, you call `readKeyValuePair` on the cursor and can read the `keyCursor` and `valueCursor` from it. In maps, `put` sets the key and value. In sets, `put` only sets the key; the value will always have a tag type of `NONE`.
343
+
344
+ ## Hashing
345
+
346
+ The hashing data structures will create the hash for you when you call methods like `putByString` or `getCursorByString` and provide the key as a string. If you want to do the hashing yourself, there are methods like `put` and `getCursor` that take a `Uint8Array` as the key, which should be the hash that you computed.
347
+
348
+ When initializing a database, you tell xitdb how to hash with the `Hasher`. If you're using SHA-1, it will look like this:
349
+
350
+ ```typescript
351
+ using core = await CoreBufferedFile.create('main.db');
352
+ const hasher = new Hasher('SHA-1');
353
+ const db = await Database.create(core, hasher);
354
+ ```
355
+
356
+ The size of the hash in bytes will be stored in the database's header. If you try opening it later with a hashing algorithm that has the wrong hash size, it will throw an exception. If you are unsure what hash size the database uses, this creates a chicken-and-egg problem. You can read the header before initializing the database like this:
357
+
358
+ ```typescript
359
+ await core.seek(0);
360
+ const header = await Header.read(core);
361
+ console.log(header.hashSize); // 20
362
+ ```
363
+
364
+ The hash size alone does not disambiguate hashing algorithms, though. In addition, xitdb reserves four bytes in the header that you can use to put the name of the algorithm. You must provide it in the `Hasher` constructor:
365
+
366
+ ```typescript
367
+ const hasher = new Hasher('SHA-1', Hasher.stringToId('sha1'));
368
+ ```
369
+
370
+ The hash id is only written to the database header when it is first initialized. When you open it later, the hash id in the `Hasher` is ignored. You can read the hash id of an existing database like this:
371
+
372
+ ```typescript
373
+ await core.seek(0);
374
+ const header = await Header.read(core);
375
+ console.log(Hasher.idToString(header.hashId)); // "sha1"
376
+ ```
377
+
378
+ If you want to use SHA-256, I recommend using `sha2` as the hash id. You can then distinguish between SHA-256 and SHA-512 using the hash size, like this:
379
+
380
+ ```typescript
381
+ let hasher: Hasher;
382
+ const hashIdStr = Hasher.idToString(header.hashId);
383
+
384
+ switch (hashIdStr) {
385
+ case 'sha1':
386
+ hasher = new Hasher('SHA-1', header.hashId);
387
+ break;
388
+ case 'sha2':
389
+ switch (header.hashSize) {
390
+ case 32:
391
+ hasher = new Hasher('SHA-256', header.hashId);
392
+ break;
393
+ case 64:
394
+ hasher = new Hasher('SHA-512', header.hashId);
395
+ break;
396
+ default:
397
+ throw new Error('Invalid hash size');
398
+ }
399
+ break;
400
+ default:
401
+ throw new Error('Invalid hash algorithm');
402
+ }
403
+ ```
package/bun.lock ADDED
@@ -0,0 +1,24 @@
1
+ {
2
+ "lockfileVersion": 1,
3
+ "configVersion": 1,
4
+ "workspaces": {
5
+ "": {
6
+ "name": "xitdb",
7
+ "devDependencies": {
8
+ "@types/bun": "latest",
9
+ "typescript": "^5.3.0",
10
+ },
11
+ },
12
+ },
13
+ "packages": {
14
+ "@types/bun": ["@types/bun@1.3.6", "", { "dependencies": { "bun-types": "1.3.6" } }, "sha512-uWCv6FO/8LcpREhenN1d1b6fcspAB+cefwD7uti8C8VffIv0Um08TKMn98FynpTiU38+y2dUO55T11NgDt8VAA=="],
15
+
16
+ "@types/node": ["@types/node@25.0.9", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-/rpCXHlCWeqClNBwUhDcusJxXYDjZTyE8v5oTO7WbL8eij2nKhUeU89/6xgjU7N4/Vh3He0BtyhJdQbDyhiXAw=="],
17
+
18
+ "bun-types": ["bun-types@1.3.6", "", { "dependencies": { "@types/node": "*" } }, "sha512-OlFwHcnNV99r//9v5IIOgQ9Uk37gZqrNMCcqEaExdkVq3Avwqok1bJFmvGMCkCE0FqzdY8VMOZpfpR3lwI+CsQ=="],
19
+
20
+ "typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="],
21
+
22
+ "undici-types": ["undici-types@7.16.0", "", {}, "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="],
23
+ }
24
+ }
package/bunfig.toml ADDED
@@ -0,0 +1 @@
1
+ # Bun configuration for xitdb-ts