astreum 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of astreum might be problematic. Click here for more details.

@@ -56,191 +56,139 @@ def _serialize_expr(expr: Any, objects: Dict[bytes, bytes]) -> bytes:
56
56
  """
57
57
  if expr is None:
58
58
  # None type
59
+ is_leaf = True
59
60
  type_bytes = b'N' # N for None
60
- type_hash = hashlib.sha256(type_bytes).digest()
61
- objects[type_hash] = type_bytes
62
61
 
63
- # None values don't need a value leaf, just return the type hash
64
- return type_hash
62
+ # Create the object with leaf flag and body
63
+ object_bytes = struct.pack("?", is_leaf) + type_bytes
64
+ object_hash = hashlib.sha256(object_bytes).digest()
65
+ objects[object_hash] = object_bytes
66
+
67
+ return object_hash
65
68
 
66
69
  elif isinstance(expr, Expr.ListExpr):
67
- # Create type leaf
70
+ # Create type object
71
+ is_leaf = False
68
72
  type_bytes = b'L' # L for List
69
- type_hash = hashlib.sha256(type_bytes).digest()
70
- objects[type_hash] = type_bytes
71
73
 
72
74
  # Serialize each element and collect their hashes
73
75
  element_hashes = []
74
76
  for elem in expr.elements:
75
77
  elem_hash = _serialize_expr(elem, objects)
76
78
  element_hashes.append(elem_hash)
77
-
79
+
78
80
  # Create value leaf with all element hashes
79
81
  value_bytes = b''.join(element_hashes)
80
- value_hash = hashlib.sha256(value_bytes).digest()
81
- objects[value_hash] = value_bytes
82
82
 
83
- # Create the tree node with type and value
84
- tree_bytes = type_hash + value_hash
85
- tree_hash = hashlib.sha256(tree_bytes).digest()
86
- objects[tree_hash] = tree_bytes
83
+ # Create the object with leaf flag and body
84
+ object_bytes = struct.pack("?", is_leaf) + type_bytes + value_bytes
85
+ object_hash = hashlib.sha256(object_bytes).digest()
86
+ objects[object_hash] = object_bytes
87
87
 
88
- return tree_hash
88
+ return object_hash
89
89
 
90
90
  elif isinstance(expr, Expr.Symbol):
91
- # Create type leaf
91
+ # Create the object - symbols are leaf nodes
92
+ is_leaf = True
92
93
  type_bytes = b'S' # S for Symbol
93
- type_hash = hashlib.sha256(type_bytes).digest()
94
- objects[type_hash] = type_bytes
95
-
96
- # Create value leaf
97
94
  value_bytes = expr.value.encode('utf-8')
98
- value_hash = hashlib.sha256(value_bytes).digest()
99
- objects[value_hash] = value_bytes
100
95
 
101
- # Create the tree node with type and value
102
- tree_bytes = type_hash + value_hash
103
- tree_hash = hashlib.sha256(tree_bytes).digest()
104
- objects[tree_hash] = tree_bytes
96
+ # Create the object with leaf flag and body
97
+ object_bytes = struct.pack("?", is_leaf) + type_bytes + value_bytes
98
+ object_hash = hashlib.sha256(object_bytes).digest()
99
+ objects[object_hash] = object_bytes
105
100
 
106
- return tree_hash
101
+ return object_hash
107
102
 
108
103
  elif isinstance(expr, Expr.Integer):
109
- # Create type leaf
104
+ # Create the object - integers are leaf nodes
105
+ is_leaf = True
110
106
  type_bytes = b'I' # I for Integer
111
- type_hash = hashlib.sha256(type_bytes).digest()
112
- objects[type_hash] = type_bytes
113
-
114
- # Create value leaf - use 2's complement little endian for integers
115
107
  value_bytes = struct.pack("<q", expr.value) # 8-byte little endian
116
- value_hash = hashlib.sha256(value_bytes).digest()
117
- objects[value_hash] = value_bytes
118
108
 
119
- # Create the tree node with type and value
120
- tree_bytes = type_hash + value_hash
121
- tree_hash = hashlib.sha256(tree_bytes).digest()
122
- objects[tree_hash] = tree_bytes
109
+ # Create the object with leaf flag and body
110
+ object_bytes = struct.pack("?", is_leaf) + type_bytes + value_bytes
111
+ object_hash = hashlib.sha256(object_bytes).digest()
112
+ objects[object_hash] = object_bytes
123
113
 
124
- return tree_hash
114
+ return object_hash
125
115
 
126
116
  elif isinstance(expr, Expr.String):
127
- # Create type leaf
117
+ # Create the object - strings are leaf nodes
118
+ is_leaf = True
128
119
  type_bytes = b'T' # T for Text/String
129
- type_hash = hashlib.sha256(type_bytes).digest()
130
- objects[type_hash] = type_bytes
131
-
132
- # Create value leaf
133
120
  value_bytes = expr.value.encode('utf-8')
134
- value_hash = hashlib.sha256(value_bytes).digest()
135
- objects[value_hash] = value_bytes
136
121
 
137
- # Create the tree node with type and value
138
- tree_bytes = type_hash + value_hash
139
- tree_hash = hashlib.sha256(tree_bytes).digest()
140
- objects[tree_hash] = tree_bytes
122
+ # Create the object with leaf flag and body
123
+ object_bytes = struct.pack("?", is_leaf) + type_bytes + value_bytes
124
+ object_hash = hashlib.sha256(object_bytes).digest()
125
+ objects[object_hash] = object_bytes
141
126
 
142
- return tree_hash
127
+ return object_hash
143
128
 
144
129
  elif isinstance(expr, Expr.Boolean):
145
- # Create type leaf
130
+ # Create the object - booleans are leaf nodes
131
+ is_leaf = True
146
132
  type_bytes = b'B' # B for Boolean
147
- type_hash = hashlib.sha256(type_bytes).digest()
148
- objects[type_hash] = type_bytes
149
-
150
- # Create value leaf
151
133
  value_bytes = b'1' if expr.value else b'0'
152
- value_hash = hashlib.sha256(value_bytes).digest()
153
- objects[value_hash] = value_bytes
154
134
 
155
- # Create the tree node with type and value
156
- tree_bytes = type_hash + value_hash
157
- tree_hash = hashlib.sha256(tree_bytes).digest()
158
- objects[tree_hash] = tree_bytes
135
+ # Create the object with leaf flag and body
136
+ object_bytes = struct.pack("?", is_leaf) + type_bytes + value_bytes
137
+ object_hash = hashlib.sha256(object_bytes).digest()
138
+ objects[object_hash] = object_bytes
159
139
 
160
- return tree_hash
140
+ return object_hash
161
141
 
162
142
  elif isinstance(expr, Expr.Function):
163
- # Create type leaf
143
+ # Create the object - functions are not leaf nodes
144
+ is_leaf = False
164
145
  type_bytes = b'F' # F for Function
165
- type_hash = hashlib.sha256(type_bytes).digest()
166
- objects[type_hash] = type_bytes
167
146
 
168
147
  # Serialize params
169
148
  params_list = []
170
149
  for param in expr.params:
171
150
  params_list.append(param.encode('utf-8'))
172
151
  params_bytes = b','.join(params_list)
173
- params_hash = hashlib.sha256(params_bytes).digest()
174
- objects[params_hash] = params_bytes
175
152
 
176
153
  # Serialize body recursively
177
154
  body_hash = _serialize_expr(expr.body, objects)
178
155
 
179
- # Combine params and body hashes for the value
180
- value_bytes = params_hash + body_hash
181
- value_hash = hashlib.sha256(value_bytes).digest()
182
- objects[value_hash] = value_bytes
183
-
184
- # Create the tree node with type and value
185
- tree_bytes = type_hash + value_hash
186
- tree_hash = hashlib.sha256(tree_bytes).digest()
187
- objects[tree_hash] = tree_bytes
156
+ # Create the object with leaf flag and body
157
+ object_bytes = struct.pack("?", is_leaf) + type_bytes + params_bytes + body_hash
158
+ object_hash = hashlib.sha256(object_bytes).digest()
159
+ objects[object_hash] = object_bytes
188
160
 
189
- return tree_hash
161
+ return object_hash
190
162
 
191
163
  elif isinstance(expr, Expr.Error):
192
- # Create type leaf
164
+ # Create the object - errors are not leaf nodes
165
+ is_leaf = False
193
166
  type_bytes = b'E' # E for Error
194
- type_hash = hashlib.sha256(type_bytes).digest()
195
- objects[type_hash] = type_bytes
196
167
 
197
168
  # Serialize error components
198
169
  category_bytes = expr.category.encode('utf-8')
199
- category_hash = hashlib.sha256(category_bytes).digest()
200
- objects[category_hash] = category_bytes
201
-
202
170
  message_bytes = expr.message.encode('utf-8')
203
- message_hash = hashlib.sha256(message_bytes).digest()
204
- objects[message_hash] = message_bytes
205
-
206
- if expr.details:
207
- details_bytes = expr.details.encode('utf-8')
208
- details_hash = hashlib.sha256(details_bytes).digest()
209
- objects[details_hash] = details_bytes
210
-
211
- # Combine all three components
212
- value_bytes = category_hash + message_hash + details_hash
213
- else:
214
- # Just combine category and message
215
- value_bytes = category_hash + message_hash
216
-
217
- value_hash = hashlib.sha256(value_bytes).digest()
218
- objects[value_hash] = value_bytes
171
+ details_bytes = b'' if expr.details is None else expr.details.encode('utf-8')
219
172
 
220
- # Create the tree node with type and value
221
- tree_bytes = type_hash + value_hash
222
- tree_hash = hashlib.sha256(tree_bytes).digest()
223
- objects[tree_hash] = tree_bytes
224
-
225
- return tree_hash
173
+ # Create the object with leaf flag and body
174
+ object_bytes = struct.pack("?", is_leaf) + type_bytes + category_bytes + b'\0' + message_bytes + b'\0' + details_bytes
175
+ object_hash = hashlib.sha256(object_bytes).digest()
176
+ objects[object_hash] = object_bytes
226
177
 
178
+ return object_hash
179
+
227
180
  else:
228
- # Unknown type - serialize as string
181
+ # Default fallback for unknown types
182
+ is_leaf = True
229
183
  type_bytes = b'U' # U for Unknown
230
- type_hash = hashlib.sha256(type_bytes).digest()
231
- objects[type_hash] = type_bytes
232
-
233
- # Create value leaf with string representation
234
184
  value_bytes = str(expr).encode('utf-8')
235
- value_hash = hashlib.sha256(value_bytes).digest()
236
- objects[value_hash] = value_bytes
237
185
 
238
- # Create the tree node with type and value
239
- tree_bytes = type_hash + value_hash
240
- tree_hash = hashlib.sha256(tree_bytes).digest()
241
- objects[tree_hash] = tree_bytes
186
+ # Create the object with leaf flag and body
187
+ object_bytes = struct.pack("?", is_leaf) + type_bytes + value_bytes
188
+ object_hash = hashlib.sha256(object_bytes).digest()
189
+ objects[object_hash] = object_bytes
242
190
 
243
- return tree_hash
191
+ return object_hash
244
192
 
245
193
 
246
194
  def _deserialize_expr(obj_hash: bytes, objects: Dict[bytes, bytes]) -> Any:
@@ -257,93 +205,92 @@ def _deserialize_expr(obj_hash: bytes, objects: Dict[bytes, bytes]) -> Any:
257
205
  if obj_hash not in objects:
258
206
  return None
259
207
 
260
- obj_data = objects[obj_hash]
208
+ obj_bytes = objects[obj_hash]
209
+
210
+ # Extract leaf flag
211
+ is_leaf = struct.unpack("?", obj_bytes[0:1])[0]
261
212
 
262
- # Check if this is a type-only node (for None)
263
- if len(obj_data) == 1:
264
- if obj_data == b'N':
265
- return None
266
- return None # Unrecognized single-byte type
213
+ # Get type indicator
214
+ type_indicator = obj_bytes[1:2]
267
215
 
268
- # For regular nodes, expect 64 bytes (two 32-byte hashes)
269
- if len(obj_data) == 64:
270
- type_hash = obj_data[:32]
271
- value_hash = obj_data[32:]
216
+ # Deserialize based on type
217
+ if type_indicator == b'N': # None
218
+ return None
272
219
 
273
- if type_hash not in objects or value_hash not in objects:
274
- return None
275
-
276
- type_data = objects[type_hash]
277
- value_data = objects[value_hash]
278
-
279
- # Switch based on the type marker
280
- if type_data == b'L': # List
281
- elements = []
282
- # Each hash is 32 bytes
283
- hash_size = 32
284
- for i in range(0, len(value_data), hash_size):
285
- elem_hash = value_data[i:i+hash_size]
286
- if elem_hash:
287
- elem = _deserialize_expr(elem_hash, objects)
288
- elements.append(elem)
289
- return Expr.ListExpr(elements)
290
-
291
- elif type_data == b'S': # Symbol
292
- return Expr.Symbol(value_data.decode('utf-8'))
293
-
294
- elif type_data == b'I': # Integer
295
- int_value = struct.unpack("<q", value_data)[0]
296
- return Expr.Integer(int_value)
297
-
298
- elif type_data == b'T': # String (Text)
299
- return Expr.String(value_data.decode('utf-8'))
300
-
301
- elif type_data == b'B': # Boolean
302
- return Expr.Boolean(value_data == b'1')
303
-
304
- elif type_data == b'F': # Function
305
- # Value contains params_hash and body_hash
306
- params_hash = value_data[:32]
307
- body_hash = value_data[32:]
308
-
309
- if params_hash not in objects:
310
- return None
311
-
312
- params_data = objects[params_hash]
313
- params = [p.decode('utf-8') for p in params_data.split(b',') if p]
314
-
315
- body = _deserialize_expr(body_hash, objects)
316
- return Expr.Function(params, body)
317
-
318
- elif type_data == b'E': # Error
319
- # Check if we have details or just category and message
320
- if len(value_data) == 64: # No details
321
- category_hash = value_data[:32]
322
- message_hash = value_data[32:]
323
- details_hash = None
324
- elif len(value_data) == 96: # With details
325
- category_hash = value_data[:32]
326
- message_hash = value_data[32:64]
327
- details_hash = value_data[64:]
328
- else:
329
- return None
330
-
331
- if category_hash not in objects or message_hash not in objects:
332
- return None
333
-
334
- category = objects[category_hash].decode('utf-8')
335
- message = objects[message_hash].decode('utf-8')
336
-
337
- details = None
338
- if details_hash and details_hash in objects:
339
- details = objects[details_hash].decode('utf-8')
340
-
341
- return Expr.Error(category, message, details)
220
+ elif type_indicator == b'L': # List
221
+ if is_leaf:
222
+ # Empty list
223
+ return Expr.ListExpr([])
224
+
225
+ # Non-leaf list has child element hashes
226
+ elements_bytes = obj_bytes[2:]
227
+ element_hashes = [elements_bytes[i:i+32] for i in range(0, len(elements_bytes), 32)]
228
+
229
+ # Deserialize each element
230
+ elements = []
231
+ for elem_hash in element_hashes:
232
+ elem = _deserialize_expr(elem_hash, objects)
233
+ elements.append(elem)
342
234
 
343
- elif type_data == b'U': # Unknown
344
- return Expr.String(value_data.decode('utf-8'))
235
+ return Expr.ListExpr(elements)
236
+
237
+ elif type_indicator == b'S': # Symbol
238
+ value_bytes = obj_bytes[2:]
239
+ return Expr.Symbol(value_bytes.decode('utf-8'))
240
+
241
+ elif type_indicator == b'I': # Integer
242
+ value_bytes = obj_bytes[2:10] # 8 bytes for int64
243
+ value = struct.unpack("<q", value_bytes)[0]
244
+ return Expr.Integer(value)
245
+
246
+ elif type_indicator == b'T': # Text/String
247
+ value_bytes = obj_bytes[2:]
248
+ return Expr.String(value_bytes.decode('utf-8'))
249
+
250
+ elif type_indicator == b'B': # Boolean
251
+ value_bytes = obj_bytes[2:3]
252
+ return Expr.Boolean(value_bytes == b'1')
253
+
254
+ elif type_indicator == b'F': # Function
255
+ # Non-leaf function
256
+ remaining_bytes = obj_bytes[2:]
257
+
258
+ # Find the separator between params and body hash
259
+ params_end = remaining_bytes.find(b',', remaining_bytes.rfind(b','))
260
+ if params_end == -1:
261
+ params_end = 0 # No params
345
262
 
346
- return None # Unrecognized format
263
+ params_bytes = remaining_bytes[:params_end+1]
264
+ body_hash = remaining_bytes[params_end+1:]
265
+
266
+ # Parse params
267
+ params = []
268
+ if params_bytes:
269
+ for param_bytes in params_bytes.split(b','):
270
+ if param_bytes: # Skip empty strings
271
+ params.append(param_bytes.decode('utf-8'))
272
+
273
+ # Deserialize body
274
+ body = _deserialize_expr(body_hash, objects)
275
+
276
+ return Expr.Function(params, body)
277
+
278
+ elif type_indicator == b'E': # Error
279
+ remaining_bytes = obj_bytes[2:]
280
+
281
+ # Split by null bytes to get category, message, and details
282
+ parts = remaining_bytes.split(b'\0', 2)
283
+
284
+ category = parts[0].decode('utf-8')
285
+ message = parts[1].decode('utf-8') if len(parts) > 1 else ""
286
+ details = parts[2].decode('utf-8') if len(parts) > 2 else None
287
+
288
+ return Expr.Error(category, message, details)
289
+
290
+ else: # Unknown
291
+ value_bytes = obj_bytes[2:]
292
+ # Return as a string
293
+ return value_bytes.decode('utf-8')
347
294
 
348
295
 
349
296
  def store_expr(expr: Any, storage) -> bytes:
@@ -357,101 +304,107 @@ def store_expr(expr: Any, storage) -> bytes:
357
304
  Returns:
358
305
  The hash of the root object
359
306
  """
307
+ # Convert expression to objects
360
308
  root_hash, objects = expr_to_objects(expr)
361
309
 
362
- # Store all objects in the storage
310
+ # Store each object in the storage
363
311
  for obj_hash, obj_data in objects.items():
364
312
  storage.put(obj_hash, obj_data)
365
-
313
+
366
314
  return root_hash
367
315
 
368
316
 
369
- def get_expr_from_storage(root_hash: bytes, storage) -> Any:
317
+ def get_expr_from_storage(root_hash: bytes, storage, max_depth: int = 50) -> Any:
370
318
  """
371
- Load a Lispeum expression from storage.
319
+ Load a Lispeum expression from storage. Will recursively resolve
320
+ objects from the storage until a leaf node is reached.
372
321
 
373
322
  Args:
374
323
  root_hash: The hash of the root object
375
- storage: Storage interface with get(key) method
324
+ storage: Storage interface with get method and get_recursive method
325
+ max_depth: Maximum recursion depth for resolution
376
326
 
377
327
  Returns:
378
328
  The loaded Lispeum expression, or None if not found
379
329
  """
380
- if not root_hash:
330
+ # Check if storage has the get_recursive method (newer storage interface)
331
+ if hasattr(storage, 'get_recursive'):
332
+ # Use the storage's built-in recursive retrieval
333
+ objects = storage.get_recursive(root_hash, max_depth)
334
+ else:
335
+ # Fallback to manual recursive retrieval for older storage interfaces
336
+ objects = {}
337
+ _load_objects_recursive(root_hash, storage, objects, max_depth)
338
+
339
+ # If no objects were retrieved, return None
340
+ if not objects:
381
341
  return None
382
342
 
383
- # Build the objects dictionary from storage
384
- objects = {}
385
- queue = [root_hash]
386
- visited = set()
343
+ # Deserialize the expression
344
+ return objects_to_expr(root_hash, objects)
345
+
346
+
347
+ def _load_objects_recursive(obj_hash: bytes, storage, objects: Dict[bytes, bytes], max_depth: int, current_depth: int = 0) -> bool:
348
+ """
349
+ Recursively load objects from storage.
387
350
 
388
- while queue:
389
- current_hash = queue.pop(0)
390
- if current_hash in visited:
391
- continue
392
-
393
- visited.add(current_hash)
394
- obj_data = storage.get(current_hash)
395
-
396
- if not obj_data:
397
- # Can't find an object, return None
398
- return None
399
-
400
- objects[current_hash] = obj_data
351
+ Args:
352
+ obj_hash: The hash of the object to load
353
+ storage: Storage interface with get(key) method
354
+ objects: Dictionary to store loaded objects
355
+ max_depth: Maximum recursion depth
356
+ current_depth: Current recursion depth
401
357
 
402
- # For single-byte nodes (e.g., None), no further processing needed
403
- if len(obj_data) == 1:
404
- continue
358
+ Returns:
359
+ True if object was loaded, False otherwise
360
+ """
361
+ # Check if we've reached max recursion depth
362
+ if current_depth >= max_depth:
363
+ print(f"Warning: Max recursion depth {max_depth} reached while loading objects")
364
+ return False
365
+
366
+ # Check if we already have this object
367
+ if obj_hash in objects:
368
+ return True
369
+
370
+ # Load the object from storage
371
+ obj_data = storage.get(obj_hash)
372
+ if obj_data is None:
373
+ return False
374
+
375
+ # Store the object
376
+ objects[obj_hash] = obj_data
377
+
378
+ # Check if this is a leaf node
379
+ is_leaf = struct.unpack("?", obj_data[0:1])[0]
380
+ if is_leaf:
381
+ # Leaf node, no need to recurse
382
+ return True
383
+
384
+ # For non-leaf nodes, recursively load child objects
385
+ type_indicator = obj_data[1:2]
386
+
387
+ if type_indicator == b'L': # List
388
+ # Non-leaf list has child element hashes
389
+ elements_bytes = obj_data[2:]
390
+ element_hashes = [elements_bytes[i:i+32] for i in range(0, len(elements_bytes), 32)]
391
+
392
+ # Load each element
393
+ for elem_hash in element_hashes:
394
+ _load_objects_recursive(elem_hash, storage, objects, max_depth, current_depth + 1)
405
395
 
406
- # For regular tree nodes (type + value)
407
- if len(obj_data) == 64:
408
- # Add both hashes to the queue
409
- type_hash = obj_data[:32]
410
- value_hash = obj_data[32:]
396
+ elif type_indicator == b'F': # Function
397
+ # Non-leaf function has body hash
398
+ remaining_bytes = obj_data[2:]
399
+
400
+ # Find the separator between params and body hash
401
+ params_end = remaining_bytes.find(b',', remaining_bytes.rfind(b','))
402
+ if params_end == -1:
403
+ params_end = 0 # No params
411
404
 
412
- if type_hash not in visited:
413
- queue.append(type_hash)
414
-
415
- if value_hash not in visited:
416
- queue.append(value_hash)
417
-
418
- # For function and error types, we need to check the value fields
419
- # which might contain additional hashes
420
- if type_hash in objects:
421
- type_data = objects[type_hash]
422
-
423
- # For Function type, the value contains params_hash + body_hash
424
- if type_data == b'F' and value_hash in objects:
425
- value_data = objects[value_hash]
426
- if len(value_data) == 64:
427
- params_hash = value_data[:32]
428
- body_hash = value_data[32:]
429
-
430
- if params_hash not in visited:
431
- queue.append(params_hash)
432
-
433
- if body_hash not in visited:
434
- queue.append(body_hash)
435
-
436
- # For Error type, the value contains category_hash + message_hash + [details_hash]
437
- elif type_data == b'E' and value_hash in objects:
438
- value_data = objects[value_hash]
439
- hash_size = 32
440
-
441
- for i in range(0, len(value_data), hash_size):
442
- component_hash = value_data[i:i+hash_size]
443
- if component_hash and component_hash not in visited:
444
- queue.append(component_hash)
445
-
446
- # For List type, the value contains all element hashes
447
- elif type_data == b'L' and value_hash in objects:
448
- value_data = objects[value_hash]
449
- hash_size = 32
450
-
451
- for i in range(0, len(value_data), hash_size):
452
- elem_hash = value_data[i:i+hash_size]
453
- if elem_hash and elem_hash not in visited:
454
- queue.append(elem_hash)
405
+ body_hash = remaining_bytes[params_end+1:]
406
+
407
+ # Load body
408
+ _load_objects_recursive(body_hash, storage, objects, max_depth, current_depth + 1)
455
409
 
456
- # Reconstruct the expression from objects
457
- return objects_to_expr(root_hash, objects)
410
+ return True