querysub 0.178.0 → 0.181.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/.VSCodeCounter/2025-07-01_18-19-28/details.md +233 -0
  2. package/.VSCodeCounter/2025-07-01_18-19-28/diff-details.md +15 -0
  3. package/.VSCodeCounter/2025-07-01_18-19-28/diff.csv +2 -0
  4. package/.VSCodeCounter/2025-07-01_18-19-28/diff.md +19 -0
  5. package/.VSCodeCounter/2025-07-01_18-19-28/diff.txt +22 -0
  6. package/.VSCodeCounter/2025-07-01_18-19-28/results.csv +220 -0
  7. package/.VSCodeCounter/2025-07-01_18-19-28/results.json +1 -0
  8. package/.VSCodeCounter/2025-07-01_18-19-28/results.md +60 -0
  9. package/.VSCodeCounter/2025-07-01_18-19-28/results.txt +281 -0
  10. package/package.json +4 -4
  11. package/spec.txt +299 -26
  12. package/src/-c-identity/IdentityController.ts +3 -2
  13. package/src/-d-trust/NetworkTrust2.ts +7 -2
  14. package/src/-f-node-discovery/NodeDiscovery.ts +6 -3
  15. package/src/-h-path-value-serialize/PathValueSerializer.ts +3 -1
  16. package/src/0-path-value-core/PathValueCommitter.ts +2 -2
  17. package/src/0-path-value-core/PathValueController.ts +6 -2
  18. package/src/0-path-value-core/archiveLocks/ArchiveLocks2.ts +34 -9
  19. package/src/0-path-value-core/pathValueArchives.ts +7 -0
  20. package/src/0-path-value-core/pathValueCore.ts +1 -1
  21. package/src/1-path-client/RemoteWatcher.ts +1 -1
  22. package/src/1-path-client/pathValueClientWatcher.ts +93 -12
  23. package/src/2-proxy/PathValueProxyWatcher.ts +196 -56
  24. package/src/2-proxy/pathValueProxy.ts +22 -0
  25. package/src/3-path-functions/PathFunctionRunner.ts +4 -27
  26. package/src/3-path-functions/syncSchema.ts +2 -1
  27. package/src/4-deploy/deployMain.ts +3 -4
  28. package/src/4-deploy/edgeClientWatcher.tsx +13 -16
  29. package/src/4-deploy/edgeNodes.ts +3 -3
  30. package/src/4-deploy/git.ts +29 -5
  31. package/src/4-dom/qreact.tsx +372 -73
  32. package/src/4-querysub/Querysub.ts +83 -9
  33. package/src/5-diagnostics/FullscreenModal.tsx +8 -5
  34. package/src/5-diagnostics/Modal.tsx +7 -5
  35. package/src/5-diagnostics/qreactDebug.tsx +22 -4
  36. package/src/archiveapps/archiveJoinEntry.ts +7 -5
  37. package/src/config.ts +5 -0
  38. package/src/diagnostics/logs/diskLogger.ts +9 -4
  39. package/src/diagnostics/misc-pages/SnapshotViewer.tsx +7 -3
  40. package/src/diagnostics/watchdog.ts +7 -2
  41. package/src/functional/onNextPaint.ts +93 -0
  42. package/src/functional/throttleRerender.tsx +15 -0
  43. package/src/library-components/Input.tsx +2 -1
  44. package/src/library-components/InputLabel.tsx +5 -5
  45. package/src/library-components/ShowMore.tsx +0 -1
  46. package/src/library-components/URLParam.ts +6 -0
  47. package/src/library-components/drag.ts +37 -8
  48. package/src/library-components/errorNotifications.tsx +19 -16
  49. package/src/misc/extractType.ts +1 -1
  50. package/src/path.ts +3 -0
  51. package/src/user-implementation/userData.ts +7 -1
package/spec.txt CHANGED
@@ -23,6 +23,305 @@ TIMING: Transactions, currently about ~1.5ms per transaction + 10us per write an
23
23
  TIMING: Function calls, about ~1ms-3ms for a trivial function, if it can be batched with similar functions
24
24
  - Slower if we need to sync new paths
25
25
 
26
+
27
+ Fixed function props / function closure parsing
28
+ - Presently, if a parent re-renders, and it passed a lambda to a component, that component will always have changed props. HOWEVER, this is very inefficient!
29
+ 1) Process the file to convert lambdas to provide debug information which includes all of the values they close upon (as properties set on the lambda itself). Also an id, unique to that specific piece of code.
30
+ 2) When evaluating if props changed, if the lambda id is the same, and the closed upone variables are ===, then... it's the same thing, so consider the prop unchanged
31
+ 3) Test if all of our "updateOperation" lambdas in SideOpConfig.tsx work with this. I THINK updateOperation is the same, but if it isn't... we can always recognize nested lambdas.
32
+
33
+ Option to not use permissions checks locks
34
+ 1) Create a way to have any code run without locks (we might already have this?)
35
+ 2) Add a flag in schemas to either specify all functions, or certain functions should run permissions without locks.
36
+ - Also something to specify a function SHOULD run permissions with locks
37
+ - Using the ArchiveViewer verify "heartbeat" goes from 25 to 0 locks
38
+ - If you step into pathValueClientWatcher.ts:setValues in FunctionRunner, you can see the actual paths that are used.
39
+ - Benchmark a simple function to see how much faster this is, and how much less memory we use
40
+ - If it doesn't matter... maybe remove the feature?
41
+
42
+ More corruption resistance files
43
+ - Add a new serialization format, via versioning in the settings
44
+ - Each datum will be length prefixed WITH a special sentinel AND with a checksum for the datum data
45
+ - The sentinel can depend on the length (but otherwise be constant)
46
+ - We can have a recovery mode (automatically attempted if the checksums are wrong?) where we scan the file for length+sentinel pairs (where the sentinel matches the checksum), see which ones have a valid checksum, and then use an algorithm to decide which ones to use
47
+ - Maybe we sort by success rate, which is [-numberOfValidChecksumsOverlapped, length], which is very likely to work EVEN if there are values which have the right checksum, but are actuall corrupted.
48
+ - As long as the checksums are random enough, so... we should never use any small values for checksums, etc.
49
+ - Record the value layout in the settings, so we can generate arbitrary values, even if we've never seen that layout before.
50
+ - A series of parallel object arrays, with each object having flags (which indicate which values exist), values which always exist, etc
51
+ - Value types will be string, float64, byte, Buffer[]
52
+ - and... we might as well add support for short, int, float, and uint (uint is a good way to store a guid, via storing 8 uint variables).
53
+
54
+
55
+
56
+
57
+
58
+ Schema/binary PathValues accesses
59
+
60
+ Base code
61
+ reader
62
+ let viewTime = 0;
63
+ for (let user of global().users) {
64
+ viewTime = user.viewTime;
65
+ }
66
+ return viewTime;
67
+ writer
68
+ for (let [video, viewTime] of viewedVideos) {
69
+ global().users[userId].viewTime += viewTime;
70
+ }
71
+
72
+ Schema optimization
73
+ writer
74
+ let changeStream = new SchemaPath(() => global().users[wildcard].viewTime);
75
+ for (let [video, viewTime] of viewedVideos) {
76
+ changeStream.write(userId, viewTime);
77
+ }
78
+ reader
79
+ let schemaWatcher = new SchemaPath(() => global().users[wildcard].viewTime);
80
+ watch(() => {
81
+ let viewTime = 0;
82
+ for (let [value, userId] of schemaWatcher.getValues()) {
83
+ viewTime += value;
84
+ }
85
+ return viewTime;
86
+ });
87
+ reader specific path
88
+ // I guess we have to support gets as well. It should still be faster, as the schema is pre-defined
89
+ let viewTimeWatcher = new SchemaPath(() => global().users[wildcard].viewTime);
90
+ let factorWatcher = new SchemaPath(() => global().users[wildcard].factor);
91
+ watch(() => {
92
+ let viewTime = 0;
93
+ for (let [value, userId] of schemaWatcher.getValues()) {
94
+ let factor = factorWatcher.get(userId);
95
+ viewTime += value * factor;
96
+ }
97
+ return viewTime;
98
+ });
99
+ delta reader (eventually)
100
+ // This can easily be extrapolated to just getting deltas
101
+ let deltaWatcher = new DeltaWatcher(() => global().users[wildcard].viewTime);
102
+ let viewTime = 0;
103
+ onDelta(() => {
104
+ for (let [newValue, prevValue, userId] of deltaWatcher.getChanges()) {
105
+ viewTime += newValue - prevValue;
106
+ }
107
+ return viewTime;
108
+ });
109
+
110
+ - Can be binary, or not.
111
+ - Streams can work with non-streams, and vice versa.
112
+ - The streams need to be setup in a tree, so we can efficiently check for watchers of them
113
+ - We also need to support partial key watching. Often we will watch a few keys (and then within them, maybe all keys at another level).
114
+
115
+ 1) START by supporting write streams (but NOT read schemas), as this allows us to define our schemas.
116
+ - This will give us a big chunk, which we will pass around (even passing around arrays of chunks).
117
+ - The core will break this apart somewhat, with an object per schema, and then a tree of maps for the dynamicValues inside of it (and global lookups for the locks and values)
118
+ - We will never create this from PathValues, instead, we will append values to specific schemas as we build it
119
+ - And... locks need to be kept track of as well
120
+ - So we need a global "captureWrites", to set the state
121
+ - It will return a function, which will take parameter to finish the writes?
122
+ - Or... something. We need to look at proxyWatcher and see what the best way to do this is. I think it might check the values before finishing them?
123
+ - The schema builder functions (on the object returned by defining the schema), will then internally add to a lookup in the globally capturing state (keys by the schema seqNum)
124
+ - Directly adding to the PathValue for that schema
125
+ - A lot of fields will only be set on finish
126
+ - I think we still need to support the helper flags (.valid). Which is actually fine, even if it's binary. It's fairly easy to flip flags in binary data...
127
+ - When we need to reason about PathValues independently we can provide some kind of pointer than can be used in conjuction with the chunk
128
+ - Although most of the time we will just provide iterators to iterate over all the PathValues?
129
+ - I guess eventually the pointer could be serializable too, so that we could select PathValues with a Chunk + Buffer containing pointers, so we entirely manage our own memory. But... probably not for a while, as { schemaId: number; index: number }[] should be VERY efficient to allocate and store, especially if it isn't persisted.
130
+ // PathValue
131
+ {
132
+ schema: ({
133
+ type: "constant";
134
+ key: string | number;
135
+ } | {
136
+ type: "dynamic";
137
+ })[];
138
+ values: {
139
+ dynamicValues: (string | number)[];
140
+ // Pointer to position in value Buffer
141
+ setValue: Pointer;
142
+ // SeqNum to ReadLockGroup (as often many values will have the same set of ReadLocks)
143
+ readLocks: Pointer;
144
+ ...more fields for various PathValue fields
145
+ // NOTE: ReadLocks need to reference another binary structure
146
+ // - Probably via a locally unique seqNum, which is remapped upon receiving data over the network
147
+ }[];
148
+ }[]
149
+ // ReadLockGroups
150
+ {
151
+ seqNum: number;
152
+ lockSeqNums: {
153
+ schemaSeqNum: number;
154
+ seqNum: number;
155
+ }[];
156
+ }[]
157
+ // ReadLocks
158
+ {
159
+ seqNum: number;
160
+ schema: ({
161
+ type: "constant";
162
+ key: string | number;
163
+ } | {
164
+ type: "dynamic";
165
+ })[];
166
+ locks: {
167
+ seqNum: number;
168
+ dynamicValues: (string | number)[];
169
+ startTime, endTime, flags, ...;
170
+ }[];
171
+ }[]
172
+ // Values
173
+ {
174
+ seqNum: number;
175
+ // arbitrary binary data
176
+ }[]
177
+ 2) Isolate this object structure, so it is only manipulated or read via helpers, so we can change it to use a binary format later on
178
+ - This means adding functions to go to/from binary, even at the start, and just having it go to PathValue and use PathValueSerializer for now.
179
+ - We will have unique id (a number, locally unique) per schema, so we can very quickly tell if anything might match
180
+ - OH! use global objects for schema defs, so equivalent schema defs share an object. This lets us store flags for them!
181
+ - Not the whole schema, just the def part
182
+ - Just a pointer as well
183
+ - If we have conflicts with other schemas (now or in the future), this needs to point to the shared data for the combined schema.
184
+ - Expose the shared objects as createdSharedAccessor<Object>(key) => { get(schema); set(schema, value) }
185
+ 3) THEN, use this structure throughout, replacing PathValues in the entire application with lists of these chunks
186
+ - We'll probably store in the core in an entirely splayed manner. Maybe the schema, then a tree of dynamic values with the leaves being the set value?
187
+ - The set value will contain much of the PathValue, excluding the path
188
+ - Update the core code to store data in schemas
189
+ - If schemas conflict, we take the more general union of them. Annoying, but this should be rare, as schemas aren't recursive.
190
+ - Ex, "x.*.z" and "x.y.*" NEED to be combined to "x.*.*", at least for storage.
191
+ - AND, we need to store the original schemas, and have them MAP to the combined schema!
192
+ - So we end up with a non-schema data storage, and a bunch of schemas
193
+ - TWO ways to access data in schemas
194
+ - If you have a schema, you just directly lookup your data in the shared schema data, which is owned by the core, but every schema points to it
195
+ - If you don't have a schema...
196
+ - We need a lookup of schemas as values with wildcard (I guess empty keys equals a wildcard? Hmm...)
197
+ - I think we disallow reading empty keys? If not... maybe we should? They are already used a wildcard watches anyways...
198
+ - Check each part of the path, with wildcard filling the rest "x.y.z" => ["*.*.*", "x.*.*", "x.y.*", "x.y.z"]
199
+ - OH! That's not enough. Because... "x.*.z" is a valid path, which won't match any of them.
200
+ - So... maybe a sorted list?
201
+ - Should be reasonably fast...|
202
+ 4) Then update it to be binary
203
+ - On write
204
+ - And on network traversal
205
+ - I think PathValueSerializer needs to be rewritten. Basically, it will be given schemas ahead of time, sometimes (or maybe always, as we might as well always encode in our base schema), and then encode the paths more efficiently.
206
+ - OH! ReadLocks need some kind of schema as well. Hmm... I guess, they can use the schema of their readers?
207
+ 5) ALSO, create some kind of global string lookup? A lot of string keys will be the same, and it's easier to manipulate/store numbers
208
+ - We will need to provide definition when we send them over the wire though
209
+ - THIS is IMPORTANT! Without this our strings become inefficient, and the fastest code would involve mapping all strings (such as userIds), to numbers at an application level, which is a lot of work that the framework should really handle...
210
+ - Maybe not for ALL string keys? Hmm...
211
+ - OH! And... they only need to be unique per schema? Hmm...
212
+
213
+ - Test with structures like "components.*.x" (but lots of different x values)
214
+ - Hmm... the big speed issue is that the component ids don't map to an array. Although, in theory, we could do that, remapping ids to indexes. And then... reuse indexes on gc, so they become direct offsets into binary memory.
215
+ - Then... I guess we can remember the last mapping, and so quickly go from string to index, with just a === on the strings (which should just be a pointer comparison), which... and even though the index is dynamic, that's still basically just a pointer. So... we could recover a lot of the speed
216
+ - Maybe we should try this out in a mini-benchmark, with a greatly simplified structure (values constant size, no extra fields, all schemas are just 3 long and have similar structure)?
217
+ - AH! Have the schema context wrapper (so schema reads/write get registered), give us a context object, which it gives back for the same type of access.
218
+ - THEN, we can have state per watcher, and... maybe per watcher+schema. Then we can use this to cache the last keys at indexes per schema, per watcher... and for component accesses... this will always be the same, making the mapping instant every time
219
+ 6) Support reading from schemas as well
220
+ - If the data is already stored in the schema format, querying in this format should be very fast
221
+ - We will need a shorter read binary format, which has a schema and dynamic values
222
+ 7) AUTOMATIC schema definition generation from createSchema (replacing the code), and replacing (where possible) the uses of the schema to use the schema definition
223
+ - Probably the hardest part, but... the most important. Without this our application code becomes too difficult to use
224
+ - Nested accesses become difficult, but... not impossible.
225
+ - Object assignment needs to become global
226
+ Local assignment
227
+ let list = data().list;
228
+ for (let datum of newData) {
229
+ list[datum.key] = datum.value;
230
+ }
231
+ Global assigment
232
+ let list = data().list;
233
+ for (let datum of newData) {
234
+ data().list[datum.key] = datum.value;
235
+ }
236
+ Schema based
237
+ let list = data().list;
238
+ for (let datum of newData) {
239
+ listSchema.write(datum.key, datum.value);
240
+ }
241
+
242
+ Reads are more likely to use highly nested assignment... even cross function
243
+ - Cross function stuff is hard.
244
+ - We might need to specialize functions?
245
+ - As in, per type of possible schema input object, make a new function, and still leave a non-schema function.
246
+ - Actually... this should be fairly easy. We aren't going to use === on the functions... ever? And if we do... then I guess it'll fail (or they can mark them as non-specializable, or... we can detect if === is ever used on the function).
247
+
248
+ 8) Delta watchers
249
+ - If we store change reasons in the schema format... then we can query deltas for free
250
+ - Stored in the packed format.
251
+
252
+ Global nested object state for latest values
253
+ - Can have multiple global objects, maybe one per collection / schema?
254
+ - We still need to register reads
255
+ - If we can get code to not write duplicate reads (which a lot of code can be verified to not do anyways), then tracking the reads becomes very simple with our binary schema data, basically just involving writing numbers (or even just the start/end of ranges) to an output Buffer.
256
+
257
+ Code transformation for accesses
258
+ - Add code which can parse a function, find any schema accesses (this requires walking the type tree, and maybe checking other files), and transform the code to call data.q() functions instead (if the schema is a schema2 schema)
259
+ - Add support for __callerFileName__, etc so this function can determine the caller file, so it can know how to parse it.
260
+ - At first do this via a stand-alone function call?
261
+ - And then make it a parameter on the schema?
262
+ - OR, should we transform the code directly?
263
+ - If we do it at runtime, we have to figure out capturing scope again. This is hard, and breaks static variables, but... maybe that's okay?
264
+ - We could always warn about static variables
265
+ - For any non-static variables (constants, functions, etc), we will need to duplicate them, and anything they access, etc).
266
+ - THIS is actually REALLY nice. We can add a file name for this so it can be discoverable in devtools, or, the user can set `debugger;`, at which point, they will see a file which just has the function and anything it depends on. This makes debugging easier, as it removes everything not involved in that function (and more importantly, shows everything that IS involved, which could be surprising, as you might miss the fact that certain functions are being called, but if their definition is there you are far less likely to miss it).
267
+
268
+ Code transformation to turn types into schemas?
269
+ - If we could do this... we could get fast behavior from normal typescript code
270
+ - We would need an out in case the code accesses the schema in a non-schema type way (just for accesses, the backend should already be able to handle this).
271
+
272
+ Binary ValuePaths, which are never decoded
273
+ 1) Allowing accessing a value in a way which returns the intermediate deserialized object (the Buffer[], strings lookup, etc) PLUS the id (or an object?).
274
+ - Call it, ValuePathBinary
275
+ 2) Allow using ValuePathBinary as an input to SchemaPath parameters, updating the SchemaPath code to directly copy the binary data
276
+ 3) Create a function which exclusively accesses ValuePathBinary, using it for nested sets (getting a value, using that to index another value, etc)
277
+ - Verify we are never actually decoding the values
278
+ 4) Hack together something where we can NOT decode strings in the ValuePath serializer, sometimes, as our special function will never use strings directly.
279
+ 5) Benchmark to see the benefit (probably via memory pressure), or not decoding the strings?
280
+
281
+ Automatic commit wrapping of local synced writes
282
+ - If we write or read from local synced state... and are not in a transaction, we should start a transaction, which exists behind the end of the call, and is only stopped:
283
+ a) After Promise().finally()
284
+ b) When any non-local synced state is accessed (which will cause us to throw as well)
285
+ c) When any transactions are explicitly started
286
+ - This resolves the issue of having to constantly wrap state writes, which is just annoying
287
+ - We SHOULD wrap this into a singleton local watcher which we re-use for local events ("once" watchers). This will make events faster, and should make it easier to implement.
288
+ ALSO
289
+ - Allow accessing local state values without being in a synced state?
290
+
291
+ Server crash log protection
292
+ - Up to 10 times per hour, synchronously write unclassified logs to special single log blocks
293
+ - We will delete these special blocks when we finally write the logs
294
+ - This way if we get a fatal error, the console.error before we crash (which HOPEFULLY) we get, will let us know what went wrong.
295
+ - OR... maybe not... maybe the server manager should just watch for crashes and log the most recent stdout/stderr logs?
296
+
297
+ PathValueController direct remote database support
298
+ --ext-remote "D:/code/other/loader.ts"
299
+ {
300
+ /** undefined means it doesn't need to be loaded remotely */
301
+ loadRemoteValue(path): Promise<unknown | undefined> | undefined;
302
+ }
303
+ We'll probably have a timeout, just providing epochValues if loadRemoteValue takes more than a few seconds
304
+ We COULD allow for core to be resolved to the same thing independent of version (by making the core register itself globally, and all cores to use the same thing).
305
+ - AND, we could do the same thing for a few other files, such as clientWatcher, etc
306
+ - This would allow clientWatcher.setValues to be called explicitly
307
+ - This MIGHT be useful, but... then again... it adds a lot of complexity
308
+
309
+ Hot render throttle
310
+ - We want all inputs to be hot, BUT, to not render on each keydown. So... we need to delay rendering triggering?
311
+ - BUT, we run into issues if a button then runs, which expects to have the most recent data closed upon? Ugh...
312
+
313
+ Auto commit mode
314
+ - If anything is accessed outside of synced state, enter a commit, and end that commit when:
315
+ a) Promise.resolve().finally
316
+ b) We try to end any other commit
317
+ - Should be a Querysub config value which is on by default
318
+ - VERY useful clientside
319
+ - We need to support "lazyClose" watcher in proxyWatcher, that DOESN'T reset the runningWatcher UNTIL it is explicitly reset, OR, until another watcher tries to run
320
+ - We need to detect synchronized accesses in proxyWatcher and start this special type of commit
321
+ - We need to know if we are in this state, and THROW if we access any non-synced paths
322
+ - This is mostly for non-local paths
323
+ - Throwing WHEN they paths are accesses is required, otherwise the error location is too annoying
324
+
26
325
  Watcher diff mode
27
326
  - Running stress tests (satTest.ts) with a single watcher shows that the slowest part is our function watcher, specifically all of the parts that don't operate in a delta mode (ex, setWatches).
28
327
  - If we made a watcher fully support a delta mode (which is fairly easy, as it converts all the changes to deltas anyways), the watcher would likely work MUCH faster.
@@ -126,14 +425,6 @@ Turn on and test backblaze storage
126
425
  - Test starting a server on another machine
127
426
  - After setting up the .json keys files, it... should just work?
128
427
 
129
- Undo support
130
- - Not needed for games, but needed for every other application
131
- - Probably by marking certain functions as "undoable", then showing (in the app UI), a list of "versions" (time points) at a certain path (showing all the changes under a path), and letting the user pick a point. Anything that is undoable can just be reverted, creating new writes, everything that isn't undoable... well, any non-undoable changes cause the undo to not be able to go that far
132
- - ALSO, anything that depends on writes has to be undoable as well, and... also undone (ideally nothing outside the undo scope would depend on it)
133
- - Alternatively, dependencies could be made "undoIgnored", which would allow them to stick around, ignoring that fact that their write has been clobbered.
134
- - OR, maybe we just ignore dependencies, as... undoing adds new writes, so anything that is undoable has to be something where reversing is fine?
135
- - OR, we made multiple undo modes, "safe", and "unsafe"? Or different undo flags, for failing on dependencies, or not failing? Hmm...
136
-
137
428
  Anti-rejection code (isn't REQUIRED to make the database useful, so we should wait. Would be pretty slick though... Ideally this can all be extensions that have no or only modular impact on the core functions, or even no or modular impact on the proxy? Although that might not be possible...)
138
429
  Summary
139
430
  Excess work (N^2) due to rejections
@@ -253,24 +544,6 @@ Anti-rejection code (isn't REQUIRED to make the database useful, so we should wa
253
544
 
254
545
  ++ Makes system a lot faster, by reducing rejections that don't change the value, and by increasing the efficiency of comparisons
255
546
 
256
- Explicit schema objects accesses
257
- - Allow defining a strict schema, with defines the names of both the keys and values, with fixed values just being implicit to the schema, and not requiring a key or any storage.
258
- - Would access values via the schema, which changes how to PathValue is interpretted
259
- - ALSO, we encode the value using the schema, so that when it is both encoded and interpretted with the schema, the operation becomes VERY fast.
260
- - ONLY for wire transfer, not archival storage
261
- - We can probably encode the schema when transferring it, storing all values of the same schema in a row, making it highly compresssed, and efficient to decode.
262
- - Could be used for function calls, which would make permission checks faster
263
- - Could help permission checks too
264
- - Would have to support multiple levels, so you can check the root permission check, etc (if all the levels use the schema object, this then becomes very efficient)
265
- - By default key schemas would derive from the Data schema
266
- - If values weren't serialized using the schema we can quickly fallback to using the path+value
267
- - We need a string wrapper so we can pass PathValue and then use our schema to access it the path on it
268
- - We also need to make ALL PathValue.path accesses use this helper function, even if they aren't providing a schema.
269
- - Right now we have 52 references to PathValue.path, so... this is very doable. Of course this just covers compatibility, but we would also want to allow not even decoding the path, such as in cases where we are just storing it in a Map and checking for ===.
270
- - A way to access paths to get a value that is not the path, but will preserve === (just threadwide is fine)
271
- - Using a number would be plausible. IF the path comes from a schema we can make the first 32 bits be associated with the schema, which can make finding a new number a lot faster.
272
- - AND THEN... the end goal, is to have functions which ONLY use schemas, and then transpile them to a schema-full language (such as C#, or even C).
273
-
274
547
  Automatic large value storage separation
275
548
  - If a value is > ~10MB, when we write it to archives, we should ALWAYS break it into another file
276
549
  - Have this decided via a flag on the PathValue, which can be dynamically set via size, or explicitly set.
@@ -7,7 +7,7 @@
7
7
  import debugbreak from "debugbreak";
8
8
  import { SocketFunction } from "socket-function/SocketFunction";
9
9
  import { CallerContext } from "socket-function/SocketFunctionTypes";
10
- import { cache, lazy } from "socket-function/src/caching";
10
+ import { cache, cacheWeak, lazy } from "socket-function/src/caching";
11
11
  import { getClientNodeId, getNodeId, getNodeIdDomain, getNodeIdIP, getNodeIdLocation, isClientNodeId } from "socket-function/src/nodeCache";
12
12
  import { getCommonName, getIdentityCA, getMachineId, getPublicIdentifier, getThreadKeyCert, parseCert, sign, validateCertificate, verify } from "../-a-auth/certs";
13
13
  import { getShortNumber } from "../bits";
@@ -175,7 +175,8 @@ const IdentityController = SocketFunction.register(
175
175
  })
176
176
  );
177
177
 
178
- const changeIdentityOnce = cache(async function changeIdentityOnce(connectionId: { nodeId: string }) {
178
+ // IMPORTANT! We need to cache per connection, not per nodeId, so caching based on connectionId is required!
179
+ const changeIdentityOnce = cacheWeak(async function changeIdentityOnce(connectionId: { nodeId: string }) {
179
180
  let nodeId = connectionId.nodeId;
180
181
  let threadKeyCert = getThreadKeyCert();
181
182
  let issuer = getIdentityCA();
@@ -6,13 +6,14 @@ import { SocketFunctionHook } from "socket-function/SocketFunctionTypes";
6
6
  import { SocketFunction } from "socket-function/SocketFunction";
7
7
  import { IdentityController_getMachineId } from "../-c-identity/IdentityController";
8
8
  import { cache, lazy } from "socket-function/src/caching";
9
- import { getNodeIdDomainMaybeUndefined, getNodeIdLocation } from "socket-function/src/nodeCache";
9
+ import { getNodeIdDomainMaybeUndefined, getNodeIdIP, getNodeIdLocation } from "socket-function/src/nodeCache";
10
10
  import { trustCertificate } from "socket-function/src/certStore";
11
11
  import { isClient, isServer } from "../config2";
12
12
  import debugbreak from "debugbreak";
13
- import { devDebugbreak, getDomain, isDevDebugbreak, isPublic } from "../config";
13
+ import { devDebugbreak, getDomain, isDevDebugbreak, isPublic, isRecovery } from "../config";
14
14
  import { formatTime } from "socket-function/src/formatting/format";
15
15
  import { runInSerial } from "socket-function/src/batching";
16
+ import { Querysub } from "../4-querysub/QuerysubController";
16
17
 
17
18
  // Cache the untrust list, to prevent bugs from causing too many backend reads (while also allowing
18
19
  // bad servers which make request before their trust is verified from staying broken).
@@ -34,6 +35,10 @@ export const requiresNetworkTrustHook: SocketFunctionHook = async config => {
34
35
  if (isClient()) {
35
36
  return;
36
37
  }
38
+ let caller = SocketFunction.getCaller();
39
+ if (getNodeIdIP(caller.nodeId) === "127.0.0.1" && isRecovery()) {
40
+ return;
41
+ }
37
42
  let machineId = IdentityController_getMachineId(SocketFunction.getCaller());
38
43
  let trusted = await isTrusted(machineId);
39
44
  if (!trusted) {
@@ -112,9 +112,12 @@ export async function getAllNodeIds() {
112
112
 
113
113
  // NOTE: We always wait for the time sync before returning any node ids. Because if we try to talk
114
114
  // to remote nodes with a massively out of sync time, they will likely reject our messages!
115
- await measureBlock(async () => {
116
- await waitForFirstTimeSync();
117
- }, "waitForFirstTimeSync");
115
+ let promise = waitForFirstTimeSync();
116
+ if (promise) {
117
+ await measureBlock(async () => {
118
+ await promise;
119
+ }, "waitForFirstTimeSync");
120
+ }
118
121
 
119
122
  if (nodeOverrides) {
120
123
  return nodeOverrides;
@@ -612,7 +612,9 @@ class PathValueSerializer {
612
612
  await delay("paintLoop");
613
613
  }
614
614
  }
615
- await delay("paintLoop");
615
+ if (stringBuffers.length > 1) {
616
+ await delay("paintLoop");
617
+ }
616
618
  }
617
619
  strings = stringArrays.flat();
618
620
  }
@@ -123,7 +123,7 @@ class PathValueCommitter {
123
123
  }
124
124
 
125
125
  private broadcastValues = batchFunction(
126
- { delay: 10, throttleWindow: 500 },
126
+ { delay: 10, throttleWindow: 500, noMeasure: true },
127
127
  async function internal_forwardWrites(valuesBatched: Set<PathValue>[]) {
128
128
  let values = new Set(valuesBatched.flatMap(x => Array.from(x)));
129
129
 
@@ -226,7 +226,7 @@ class PathValueCommitter {
226
226
  * at a rate faster than that will just pointlessly add lag.
227
227
  */
228
228
  public ingestRemoteValues = batchFunction(
229
- { delay: 16, throttleWindow: 1000, name: "ingestRemoteValues" },
229
+ { delay: 16, throttleWindow: 1000, name: "ingestRemoteValues", noMeasure: true },
230
230
  async (batched: {
231
231
  pathValues: PathValue[],
232
232
  parentsSynced?: string[];
@@ -211,7 +211,8 @@ export const PathValueController = SocketFunction.register(
211
211
  "PathValueController-1e062e2c-81c9-497b-b414-a46d0a4c2313",
212
212
  new PathValueControllerBase(),
213
213
  () => ({
214
- forwardWrites: {},
214
+ forwardWrites: {
215
+ },
215
216
 
216
217
  watchLockValid: {},
217
218
  onValidChange: {},
@@ -226,6 +227,9 @@ export const PathValueController = SocketFunction.register(
226
227
  }),
227
228
  () => ({
228
229
  hooks: [requiresNetworkTrustHook],
229
- })
230
+ }),
231
+ {
232
+ noFunctionMeasure: !isNode(),
233
+ }
230
234
  );
231
235
 
@@ -313,6 +313,7 @@ class TransactionLocker {
313
313
  return path;
314
314
  }
315
315
  private async deleteDataFile(key: string, reason: string): Promise<void> {
316
+ console.log(red(`Deleting data file ${key}, because ${reason}`));
316
317
  //await this.storage.setValue(key + ".reason", Buffer.from(reason));
317
318
  // Delete file, and confirmation as well
318
319
  await this.storage.deleteKey(key);
@@ -370,6 +371,8 @@ class TransactionLocker {
370
371
  let value = Buffer.from(JSON.stringify(strippedTransaction));
371
372
  await this.storage.setValue(key, value);
372
373
  }
374
+
375
+ private lastFilesRead: FileInfo[] | undefined;
373
376
  private async readDataState(): Promise<{
374
377
  rawDataFiles: FileInfo[];
375
378
  /** Confirmed FileInfos are === the FileInfos in rawDataFiles */
@@ -384,6 +387,16 @@ class TransactionLocker {
384
387
  let bufferCache = new Map<string, Buffer>();
385
388
  const tryToRead = async () => {
386
389
  let files = await this.storage.getKeys();
390
+ if (this.lastFilesRead) {
391
+ let prevFiles = new Set(this.lastFilesRead.map(a => a.file));
392
+ let newFiles = files.filter(a => !prevFiles.has(a.file));
393
+ let pastTime = Date.now() - ARCHIVE_PROPAGATION_TIME * 10;
394
+ let veryBadFiles = newFiles.filter(x => x.createTime < pastTime);
395
+ if (veryBadFiles.length > 0) {
396
+ console.error(`Old files suddenly appeared. This isn't possible, if they are old, they should have appeared when they were created! This likely means that our getKeys() failed to actually read all of the files. This is bad and can result in us deleting seemingly broken files for missing a confirmation, when they in fact had a confirmation.`, { files: veryBadFiles.map(x => x.file) });
397
+ }
398
+ }
399
+ this.lastFilesRead = files;
387
400
 
388
401
  let transactions: (Transaction & {
389
402
  seqNum: number;
@@ -550,7 +563,7 @@ class TransactionLocker {
550
563
  let createCount = transaction.ops.filter(a => a.type === "create").length;
551
564
  let deleteCount = transaction.ops.filter(a => a.type === "delete").length;
552
565
  let lockedFiles = transaction.lockedFilesMustEqual?.length;
553
- diskLog(`Applying transaction with ${createCount} creates and ${deleteCount} deletes. ${lockedFiles !== undefined && `Lock state depends on ${lockedFiles} files` || ""}`, {
566
+ diskLog(`Applying transaction with ${createCount} file creates and ${deleteCount} file deletes. ${lockedFiles !== undefined && `Lock state depends on ${lockedFiles} files` || ""}`, {
554
567
  transactions: transaction.ops.map(x => JSON.stringify(x)),
555
568
  });
556
569
  logNodeStats(`archives|TΔ Apply`, formatNumber, 1);
@@ -576,7 +589,7 @@ class TransactionLocker {
576
589
  };
577
590
  await Promise.all(list(CONCURRENT_WRITE_COUNT).map(runThread));
578
591
 
579
- diskLog(`Applied transaction with ${createCount} creates and ${deleteCount} deletes. ${lockedFiles !== undefined && `Lock state depends on ${lockedFiles} files` || ""}`, {
592
+ diskLog(`Applied transaction with ${createCount} file creates and file ${deleteCount} deletes. ${lockedFiles !== undefined && `Lock state depends on ${lockedFiles} files` || ""}`, {
580
593
  transactions: transaction.ops.map(x => JSON.stringify(x)),
581
594
  });
582
595
  }
@@ -681,7 +694,7 @@ class TransactionLocker {
681
694
  // where backblaze returns no files? Which it might be doing, as multiple times this code has
682
695
  // triggered (without this check), and deletes all of our files...
683
696
  let unconfirmedOldFiles2 = veryOldFiles.filter(a => !doubleCheckLookup.has(a) && doubleCheckDataFiles.has(a.file));
684
- console.warn(red(`Deleted ${unconfirmedOldFiles2.length} very old unconfirmed files`));
697
+ console.warn(red(`Deleted ${unconfirmedOldFiles2.length} very old unconfirmed files`), { files: unconfirmedOldFiles2.map(x => x.file) });
685
698
  diskLog(`archives|TΔ Delete Old Rejected File`, formatNumber, unconfirmedOldFiles2.length);
686
699
  // At the point the file was very old when we started reading, not part of the active transaction.
687
700
  for (let file of unconfirmedOldFiles2) {
@@ -697,11 +710,20 @@ class TransactionLocker {
697
710
  let usedConfirmations = new Set(dataState.confirmedDataFiles.map(a => this.getConfirmKey(a.file)));
698
711
  let oldEnoughConfirms = dataState.rawDataFiles.filter(x => x.file.endsWith(".confirm") && x.createTime < oldThreshold);
699
712
  let deprecatedFiles = oldEnoughConfirms.filter(a => !usedConfirmations.has(a.file));
700
- if (deprecatedFiles.length > 0) {
701
- console.warn(red(`Deleted ${deprecatedFiles.length} / ${oldEnoughConfirms.length} confirmations, for not having corresponding data files`));
702
- logNodeStats(`archives|TΔ Delete Deprecated Confirm`, formatNumber, deprecatedFiles.length);
703
- for (let file of deprecatedFiles) {
704
- await this.storage.deleteKey(file.file);
713
+ if (deprecatedFiles.length > 0 && dataState.rawDataFiles.length > 0) {
714
+ let dataState = await this.readDataState();
715
+
716
+ let usedConfirmations = new Set(dataState.confirmedDataFiles.map(a => this.getConfirmKey(a.file)));
717
+ let oldEnoughConfirms = dataState.rawDataFiles.filter(x => x.file.endsWith(".confirm") && x.createTime < oldThreshold);
718
+ let doubleCheckLookup = new Set(oldEnoughConfirms.filter(a => !usedConfirmations.has(a.file)).map(a => a.file));
719
+
720
+ deprecatedFiles = deprecatedFiles.filter(a => doubleCheckLookup.has(a.file));
721
+ if (deprecatedFiles.length > 0 && dataState.rawDataFiles.length > 0) {
722
+ console.warn(red(`Deleted ${deprecatedFiles.length} / ${oldEnoughConfirms.length} confirmations, for not having corresponding data files`), { files: deprecatedFiles.map(x => x.file) });
723
+ logNodeStats(`archives|TΔ Delete Deprecated Confirm`, formatNumber, deprecatedFiles.length);
724
+ for (let file of deprecatedFiles) {
725
+ await this.storage.deleteKey(file.file);
726
+ }
705
727
  }
706
728
  }
707
729
  }
@@ -737,7 +759,10 @@ class TransactionLocker {
737
759
  let dels = transaction.ops.filter(a => a.type === "delete").length;
738
760
  let creates = transaction.ops.filter(a => a.type === "create").length;
739
761
  let createBytes = transaction.ops.map(a => a.type === "create" && a.value?.length || 0).reduce((a, b) => a + b, 0);
740
- diskLog(`Starting transaction with ${creates} creates and ${dels} deletes, ${formatNumber(createBytes)}B`);
762
+ diskLog(`Starting transaction with ${creates} file creates and ${dels} file deletes, ${formatNumber(createBytes)}B`, {
763
+ createFilesNames: transaction.ops.filter(a => a.type === "create").map(a => a.key),
764
+ deleteFilesNames: transaction.ops.filter(a => a.type === "delete").map(a => a.key),
765
+ });
741
766
  transaction = { ...transaction, ops: transaction.ops.slice() };
742
767
  function normalizePath(path: string) {
743
768
  // Replace duplicate slashes with a single slash
@@ -326,12 +326,19 @@ export class PathValueArchives {
326
326
 
327
327
  let dataValues: PathValue[] = [];
328
328
 
329
+ let decodedObj = pathValueArchives.decodeDataPath(path);
330
+
329
331
  try {
330
332
  dataValues = await pathValueSerializer.deserialize([data], {
331
333
  singleBuffer: true,
332
334
  skipStrings: config.skipStrings,
333
335
  skipValues: config.skipValues,
334
336
  });
337
+ if (dataValues.length !== decodedObj.valueCount) {
338
+ require("debugbreak")(2);
339
+ debugger;
340
+ console.error(`Bad archive data file at ${config.path}, Decoded count ${formatNumber(decodedObj.valueCount)} !== count in file name ${formatNumber(dataValues.length)} (${decodedObj.valueCount} !== ${dataValues.length})`);
341
+ }
335
342
  } catch (e: any) {
336
343
  console.log(red(`Bad archive data file at ${config.path}, error: ${e.stack}`));
337
344
  }
@@ -224,7 +224,7 @@ export type PathValue = {
224
224
  time: Time;
225
225
  /** @deprecated NOT deprecated, just remember when you set this ALWAYS set lockCount, otherwise
226
226
  * valid checking breaks!
227
- * IMPORTANT! DO NOT check for locks.length === 0. Locks are lost during serialization, so this
227
+ * IMPORTANT! DO NOT check for locks.length === 0. Locks are often dropped during serialization, so this
228
228
  * will be irrelevant. lockCount on the other hand, should be accurate.
229
229
  */
230
230
  locks: ReadLock[];
@@ -148,7 +148,7 @@ export class RemoteWatcher {
148
148
  }
149
149
 
150
150
  private watchUnwatchSerial = runInSerial((name: string, fnc: () => Promise<unknown>) =>
151
- measureBlock(fnc, name)
151
+ fnc()
152
152
  );
153
153
 
154
154
  /** NOTE: We dedupe duplicate watches in watchLatest. */