orbynt-protocol 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- omp/__init__.py +44 -0
- omp/cli/__init__.py +2 -0
- omp/cli/main.py +210 -0
- omp/core/__init__.py +26 -0
- omp/core/checksum.py +12 -0
- omp/core/compressor.py +24 -0
- omp/core/framer.py +278 -0
- omp/core/reader.py +104 -0
- omp/core/writer.py +131 -0
- omp/exceptions.py +15 -0
- omp/inject/__init__.py +23 -0
- omp/inject/adapters/__init__.py +14 -0
- omp/inject/adapters/anthropic.py +19 -0
- omp/inject/adapters/base.py +43 -0
- omp/inject/adapters/gemini.py +17 -0
- omp/inject/adapters/generic.py +14 -0
- omp/inject/adapters/openai.py +18 -0
- omp/inject/prompt.py +83 -0
- omp/inject/templates.py +2 -0
- omp/inspect.py +39 -0
- omp/merge.py +82 -0
- omp/proto/__init__.py +2 -0
- omp/proto/adapter.proto +59 -0
- omp/proto/adapter_pb2.py +42 -0
- omp/proto/common.proto +45 -0
- omp/proto/common_pb2.py +42 -0
- omp/proto/embedding.proto +31 -0
- omp/proto/embedding_pb2.py +34 -0
- omp/proto/encryption.proto +25 -0
- omp/proto/encryption_pb2.py +32 -0
- omp/proto/graph.proto +42 -0
- omp/proto/graph_pb2.py +36 -0
- omp/proto/identity.proto +58 -0
- omp/proto/identity_pb2.py +41 -0
- omp/proto/memory.proto +53 -0
- omp/proto/memory_pb2.py +40 -0
- omp/proto/orb.proto +33 -0
- omp/proto/orb_pb2.py +35 -0
- omp/proto/passport.proto +105 -0
- omp/proto/passport_pb2.py +58 -0
- omp/proto/query.proto +23 -0
- omp/proto/query_pb2.py +32 -0
- omp/proto/registry.proto +31 -0
- omp/proto/registry_pb2.py +34 -0
- omp/proto/response.proto +22 -0
- omp/proto/response_pb2.py +33 -0
- omp/protos.py +81 -0
- omp/query/__init__.py +4 -0
- omp/query/engine.py +73 -0
- omp/schema/__init__.py +22 -0
- omp/schema/builder.py +339 -0
- omp/schema/convert.py +103 -0
- omp/schema/validator.py +21 -0
- omp/schema/version.py +10 -0
- orbynt_protocol-0.1.0.dist-info/METADATA +712 -0
- orbynt_protocol-0.1.0.dist-info/RECORD +60 -0
- orbynt_protocol-0.1.0.dist-info/WHEEL +5 -0
- orbynt_protocol-0.1.0.dist-info/entry_points.txt +2 -0
- orbynt_protocol-0.1.0.dist-info/licenses/LICENSE +184 -0
- orbynt_protocol-0.1.0.dist-info/top_level.txt +1 -0
omp/__init__.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from omp.core.reader import read
|
|
4
|
+
from omp.core.writer import write
|
|
5
|
+
from omp.exceptions import InvalidOrbFile, OMPError, UnsupportedFeature, VersionMismatch
|
|
6
|
+
from omp.inject import inject
|
|
7
|
+
from omp.inspect import inspect
|
|
8
|
+
from omp.merge import merge
|
|
9
|
+
from omp.protos import (
|
|
10
|
+
export_proto_bundle,
|
|
11
|
+
get_ai_schema_context,
|
|
12
|
+
get_all_proto_sources,
|
|
13
|
+
get_proto_bundle,
|
|
14
|
+
get_proto_source,
|
|
15
|
+
list_proto_files,
|
|
16
|
+
)
|
|
17
|
+
from omp.query import MemoryMatch, query
|
|
18
|
+
from omp.schema.convert import export_passport as export
|
|
19
|
+
from omp.schema.version import PACKAGE_VERSION, PROTOCOL_VERSION
|
|
20
|
+
|
|
21
|
+
__version__ = PACKAGE_VERSION
|
|
22
|
+
|
|
23
|
+
__all__ = [
|
|
24
|
+
"InvalidOrbFile",
|
|
25
|
+
"MemoryMatch",
|
|
26
|
+
"OMPError",
|
|
27
|
+
"PROTOCOL_VERSION",
|
|
28
|
+
"UnsupportedFeature",
|
|
29
|
+
"VersionMismatch",
|
|
30
|
+
"__version__",
|
|
31
|
+
"export",
|
|
32
|
+
"export_proto_bundle",
|
|
33
|
+
"get_ai_schema_context",
|
|
34
|
+
"get_all_proto_sources",
|
|
35
|
+
"get_proto_bundle",
|
|
36
|
+
"get_proto_source",
|
|
37
|
+
"inject",
|
|
38
|
+
"inspect",
|
|
39
|
+
"list_proto_files",
|
|
40
|
+
"merge",
|
|
41
|
+
"query",
|
|
42
|
+
"read",
|
|
43
|
+
"write",
|
|
44
|
+
]
|
omp/cli/__init__.py
ADDED
omp/cli/main.py
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import sys
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import omp
|
|
8
|
+
from omp.exceptions import OMPError
|
|
9
|
+
from omp.schema.convert import to_json, to_markdown
|
|
10
|
+
from omp.schema.version import PACKAGE_VERSION, PROTOCOL_VERSION
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def main(argv: list[str] | None = None) -> int:
|
|
14
|
+
parser = _build_parser()
|
|
15
|
+
args = parser.parse_args(argv)
|
|
16
|
+
try:
|
|
17
|
+
return args.func(args)
|
|
18
|
+
except OMPError as exc:
|
|
19
|
+
print(f"omp: {exc}", file=sys.stderr)
|
|
20
|
+
return 1
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _build_parser() -> argparse.ArgumentParser:
|
|
24
|
+
parser = argparse.ArgumentParser(prog="omp")
|
|
25
|
+
sub = parser.add_subparsers(required=True)
|
|
26
|
+
|
|
27
|
+
write = sub.add_parser("write", help="create a .orb file")
|
|
28
|
+
write.add_argument("--name", default="", help="identity display name")
|
|
29
|
+
write.add_argument("--out", required=True, help="output .orb path")
|
|
30
|
+
write.add_argument("--memory", action="append", default=[], help="kind:fact|summary:...|tags:a,b")
|
|
31
|
+
write.add_argument("--pref", action="append", default=[], help="key=value")
|
|
32
|
+
write.add_argument("--compress", action="store_true", help="zstd-compress chunks")
|
|
33
|
+
write.set_defaults(func=_cmd_write)
|
|
34
|
+
|
|
35
|
+
read = sub.add_parser("read", help="read a .orb file")
|
|
36
|
+
read.add_argument("path")
|
|
37
|
+
read.add_argument("--format", choices=["json", "text", "markdown"], default="text")
|
|
38
|
+
read.add_argument("--out")
|
|
39
|
+
read.set_defaults(func=_cmd_read)
|
|
40
|
+
|
|
41
|
+
inspect = sub.add_parser("inspect", help="show a human-readable summary")
|
|
42
|
+
inspect.add_argument("path")
|
|
43
|
+
inspect.set_defaults(func=_cmd_inspect)
|
|
44
|
+
|
|
45
|
+
query = sub.add_parser("query", help="query memories")
|
|
46
|
+
query.add_argument("path")
|
|
47
|
+
query.add_argument("--text", default="")
|
|
48
|
+
query.add_argument("--kind")
|
|
49
|
+
query.add_argument("--tag", action="append", default=[])
|
|
50
|
+
query.add_argument("--limit", type=int, default=5)
|
|
51
|
+
query.set_defaults(func=_cmd_query)
|
|
52
|
+
|
|
53
|
+
merge = sub.add_parser("merge", help="merge two passports")
|
|
54
|
+
merge.add_argument("a")
|
|
55
|
+
merge.add_argument("b")
|
|
56
|
+
merge.add_argument("--out", required=True)
|
|
57
|
+
merge.add_argument(
|
|
58
|
+
"--strategy",
|
|
59
|
+
choices=["latest", "union", "source_a", "source_b"],
|
|
60
|
+
default="latest",
|
|
61
|
+
)
|
|
62
|
+
merge.set_defaults(func=_cmd_merge)
|
|
63
|
+
|
|
64
|
+
export = sub.add_parser("export", help="export a .orb file")
|
|
65
|
+
export.add_argument("path")
|
|
66
|
+
export.add_argument("--format", choices=["json", "markdown"], required=True)
|
|
67
|
+
export.add_argument("--out", required=True)
|
|
68
|
+
export.set_defaults(func=_cmd_export)
|
|
69
|
+
|
|
70
|
+
proto = sub.add_parser("proto", help="inspect bundled protobuf schemas")
|
|
71
|
+
proto_sub = proto.add_subparsers(required=True)
|
|
72
|
+
proto_list = proto_sub.add_parser("list", help="list bundled .proto files")
|
|
73
|
+
proto_list.set_defaults(func=_cmd_proto_list)
|
|
74
|
+
proto_show = proto_sub.add_parser("show", help="print one bundled .proto file")
|
|
75
|
+
proto_show.add_argument("name")
|
|
76
|
+
proto_show.set_defaults(func=_cmd_proto_show)
|
|
77
|
+
proto_bundle = proto_sub.add_parser("bundle", help="print or write all bundled .proto files")
|
|
78
|
+
proto_bundle.add_argument("--out")
|
|
79
|
+
proto_bundle.set_defaults(func=_cmd_proto_bundle)
|
|
80
|
+
|
|
81
|
+
verify = sub.add_parser("verify", help="verify checksum and open-tier status")
|
|
82
|
+
verify.add_argument("path")
|
|
83
|
+
verify.set_defaults(func=_cmd_verify)
|
|
84
|
+
|
|
85
|
+
version = sub.add_parser("version", help="show package and protocol versions")
|
|
86
|
+
version.set_defaults(func=_cmd_version)
|
|
87
|
+
return parser
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _cmd_write(args: argparse.Namespace) -> int:
|
|
91
|
+
memories = [_parse_memory(value) for value in args.memory]
|
|
92
|
+
preferences = dict(_parse_pref(value) for value in args.pref)
|
|
93
|
+
omp.write(
|
|
94
|
+
args.out,
|
|
95
|
+
identity={"display_name": args.name},
|
|
96
|
+
memories=memories,
|
|
97
|
+
preferences=preferences,
|
|
98
|
+
compress=args.compress,
|
|
99
|
+
)
|
|
100
|
+
print(f"wrote {args.out}")
|
|
101
|
+
return 0
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _cmd_read(args: argparse.Namespace) -> int:
|
|
105
|
+
passport = omp.read(args.path)
|
|
106
|
+
if args.format == "json":
|
|
107
|
+
output = to_json(passport)
|
|
108
|
+
elif args.format == "markdown":
|
|
109
|
+
output = to_markdown(passport)
|
|
110
|
+
else:
|
|
111
|
+
output = omp.inject(passport, target="generic")
|
|
112
|
+
_write_or_print(output, args.out)
|
|
113
|
+
return 0
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _cmd_inspect(args: argparse.Namespace) -> int:
|
|
117
|
+
omp.inspect(args.path)
|
|
118
|
+
return 0
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _cmd_query(args: argparse.Namespace) -> int:
|
|
122
|
+
passport = omp.read(args.path)
|
|
123
|
+
matches = omp.query(
|
|
124
|
+
passport,
|
|
125
|
+
text=args.text,
|
|
126
|
+
kind=args.kind,
|
|
127
|
+
tags=args.tag,
|
|
128
|
+
limit=args.limit,
|
|
129
|
+
)
|
|
130
|
+
for match in matches:
|
|
131
|
+
print(f"{match.score:.3f}\t{match.memory.summary}")
|
|
132
|
+
return 0
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _cmd_merge(args: argparse.Namespace) -> int:
|
|
136
|
+
merged = omp.merge(omp.read(args.a), omp.read(args.b), strategy=args.strategy)
|
|
137
|
+
omp.write(args.out, passport=merged)
|
|
138
|
+
print(f"wrote {args.out}")
|
|
139
|
+
return 0
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _cmd_export(args: argparse.Namespace) -> int:
|
|
143
|
+
passport = omp.read(args.path)
|
|
144
|
+
omp.export(passport, format=args.format, path=args.out)
|
|
145
|
+
print(f"wrote {args.out}")
|
|
146
|
+
return 0
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _cmd_proto_list(args: argparse.Namespace) -> int:
|
|
150
|
+
for name in omp.list_proto_files():
|
|
151
|
+
print(name)
|
|
152
|
+
return 0
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _cmd_proto_show(args: argparse.Namespace) -> int:
|
|
156
|
+
print(omp.get_proto_source(args.name), end="")
|
|
157
|
+
return 0
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _cmd_proto_bundle(args: argparse.Namespace) -> int:
|
|
161
|
+
output = omp.get_proto_bundle()
|
|
162
|
+
_write_or_print(output, args.out)
|
|
163
|
+
return 0
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _cmd_verify(args: argparse.Namespace) -> int:
|
|
167
|
+
passport = omp.read(args.path)
|
|
168
|
+
print("checksum valid")
|
|
169
|
+
print("signature none (open tier)")
|
|
170
|
+
print(f"verified {passport.verified}")
|
|
171
|
+
return 0
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _cmd_version(args: argparse.Namespace) -> int:
|
|
175
|
+
print(f"omp package {PACKAGE_VERSION}")
|
|
176
|
+
print(f"protocol {PROTOCOL_VERSION}")
|
|
177
|
+
return 0
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _parse_memory(value: str) -> dict:
|
|
181
|
+
result: dict[str, object] = {}
|
|
182
|
+
for part in value.split("|"):
|
|
183
|
+
if not part:
|
|
184
|
+
continue
|
|
185
|
+
key, sep, raw = part.partition(":")
|
|
186
|
+
if not sep:
|
|
187
|
+
raise OMPError(f"invalid --memory segment: {part}")
|
|
188
|
+
if key == "tags":
|
|
189
|
+
result[key] = [tag.strip() for tag in raw.split(",") if tag.strip()]
|
|
190
|
+
else:
|
|
191
|
+
result[key] = raw
|
|
192
|
+
return result
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def _parse_pref(value: str) -> tuple[str, str]:
|
|
196
|
+
key, sep, raw = value.partition("=")
|
|
197
|
+
if not sep:
|
|
198
|
+
raise OMPError(f"invalid --pref value: {value}")
|
|
199
|
+
return key, raw
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def _write_or_print(output: str, path: str | None) -> None:
|
|
203
|
+
if path:
|
|
204
|
+
Path(path).write_text(output, encoding="utf-8")
|
|
205
|
+
else:
|
|
206
|
+
print(output)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
if __name__ == "__main__":
|
|
210
|
+
raise SystemExit(main())
|
omp/core/__init__.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from omp.core.framer import (
|
|
2
|
+
Chunk,
|
|
3
|
+
ChunkEntry,
|
|
4
|
+
ChunkFlag,
|
|
5
|
+
ChunkType,
|
|
6
|
+
FileFlag,
|
|
7
|
+
FramedOrb,
|
|
8
|
+
frame_chunks,
|
|
9
|
+
parse_framed,
|
|
10
|
+
read_framed_file,
|
|
11
|
+
write_framed_file,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"Chunk",
|
|
16
|
+
"ChunkEntry",
|
|
17
|
+
"ChunkFlag",
|
|
18
|
+
"ChunkType",
|
|
19
|
+
"FileFlag",
|
|
20
|
+
"FramedOrb",
|
|
21
|
+
"frame_chunks",
|
|
22
|
+
"parse_framed",
|
|
23
|
+
"read_framed_file",
|
|
24
|
+
"write_framed_file",
|
|
25
|
+
]
|
|
26
|
+
|
omp/core/checksum.py
ADDED
omp/core/compressor.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from omp.exceptions import UnsupportedFeature
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def _zstd():
|
|
7
|
+
try:
|
|
8
|
+
import zstandard as zstd
|
|
9
|
+
except ImportError as exc:
|
|
10
|
+
raise UnsupportedFeature(
|
|
11
|
+
"zstandard is required for compressed .orb chunks. Install omp with zstandard."
|
|
12
|
+
) from exc
|
|
13
|
+
return zstd
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def compress(data: bytes) -> bytes:
|
|
17
|
+
zstd = _zstd()
|
|
18
|
+
return zstd.ZstdCompressor().compress(data)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def decompress(data: bytes) -> bytes:
|
|
22
|
+
zstd = _zstd()
|
|
23
|
+
return zstd.ZstdDecompressor().decompress(data)
|
|
24
|
+
|
omp/core/framer.py
ADDED
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import struct
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from enum import IntEnum, IntFlag
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Iterable
|
|
8
|
+
|
|
9
|
+
from omp.core.checksum import sha256
|
|
10
|
+
from omp.exceptions import InvalidOrbFile, VersionMismatch
|
|
11
|
+
from omp.schema.version import PROTOCOL_MAJOR
|
|
12
|
+
|
|
13
|
+
MAGIC = b"ORB1"
|
|
14
|
+
FOOTER_MAGIC = b"ORBF"
|
|
15
|
+
|
|
16
|
+
HEADER_STRUCT = struct.Struct("<4sHHHHII")
|
|
17
|
+
CHUNK_ENTRY_STRUCT = struct.Struct("<IIQQ")
|
|
18
|
+
CHUNK_PREAMBLE_STRUCT = struct.Struct("<IQ")
|
|
19
|
+
FOOTER_STRUCT = struct.Struct("<Q32s4s")
|
|
20
|
+
|
|
21
|
+
HEADER_FIXED_LENGTH = HEADER_STRUCT.size
|
|
22
|
+
CHUNK_ENTRY_LENGTH = CHUNK_ENTRY_STRUCT.size
|
|
23
|
+
CHUNK_PREAMBLE_LENGTH = CHUNK_PREAMBLE_STRUCT.size
|
|
24
|
+
FOOTER_LENGTH = FOOTER_STRUCT.size
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class FileFlag(IntFlag):
|
|
28
|
+
HAS_COMPRESSION = 1 << 0
|
|
29
|
+
HAS_ENCRYPTION = 1 << 1
|
|
30
|
+
HAS_SIGNATURE = 1 << 2
|
|
31
|
+
HAS_GRAPH = 1 << 3
|
|
32
|
+
HAS_EMBEDDINGS = 1 << 4
|
|
33
|
+
HAS_ADAPTERS = 1 << 5
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class ChunkFlag(IntFlag):
|
|
37
|
+
COMPRESSED = 1 << 0
|
|
38
|
+
ENCRYPTED = 1 << 1
|
|
39
|
+
OPTIONAL = 1 << 2
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class ChunkType(IntEnum):
|
|
43
|
+
PROTOCOL_METADATA = 1
|
|
44
|
+
IDENTITY = 2
|
|
45
|
+
MEMORY_BUNDLE = 3
|
|
46
|
+
MEMORY_GRAPH = 4
|
|
47
|
+
EMBEDDING_INDEX = 5
|
|
48
|
+
ADAPTER_METADATA = 6
|
|
49
|
+
REGISTRY_SNAPSHOT = 7
|
|
50
|
+
EXPORT_POLICY = 8
|
|
51
|
+
PROFILE = 9
|
|
52
|
+
PREFERENCES = 10
|
|
53
|
+
GOALS = 11
|
|
54
|
+
VALUES = 12
|
|
55
|
+
SKILLS = 13
|
|
56
|
+
INTERESTS = 14
|
|
57
|
+
RELATIONSHIPS = 15
|
|
58
|
+
PROVENANCE = 16
|
|
59
|
+
INTEGRITY_METADATA = 17
|
|
60
|
+
CUSTOM_EXTENSION = 1000
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
KNOWN_CHUNK_TYPES = {item.value for item in ChunkType}
|
|
64
|
+
OPEN_FORBIDDEN_FILE_FLAGS = FileFlag.HAS_ENCRYPTION | FileFlag.HAS_SIGNATURE
|
|
65
|
+
OPEN_FORBIDDEN_CHUNK_FLAGS = ChunkFlag.ENCRYPTED
|
|
66
|
+
RESERVED_FILE_FLAG_MASK = ~sum(flag.value for flag in FileFlag)
|
|
67
|
+
RESERVED_CHUNK_FLAG_MASK = ~sum(flag.value for flag in ChunkFlag)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@dataclass(frozen=True)
|
|
71
|
+
class Chunk:
|
|
72
|
+
chunk_type: int
|
|
73
|
+
data: bytes
|
|
74
|
+
flags: int = 0
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@dataclass(frozen=True)
|
|
78
|
+
class ChunkEntry:
|
|
79
|
+
chunk_type: int
|
|
80
|
+
chunk_flags: int
|
|
81
|
+
chunk_offset: int
|
|
82
|
+
chunk_length: int
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@dataclass(frozen=True)
|
|
86
|
+
class FramedOrb:
|
|
87
|
+
version: tuple[int, int, int]
|
|
88
|
+
flags: int
|
|
89
|
+
chunks: list[Chunk]
|
|
90
|
+
entries: list[ChunkEntry]
|
|
91
|
+
checksum: bytes
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def frame_chunks(
|
|
95
|
+
chunks: Iterable[Chunk],
|
|
96
|
+
*,
|
|
97
|
+
version: tuple[int, int, int] = (1, 0, 0),
|
|
98
|
+
) -> bytes:
|
|
99
|
+
chunk_list = list(chunks)
|
|
100
|
+
flags = _derive_file_flags(chunk_list)
|
|
101
|
+
header_length = HEADER_FIXED_LENGTH + len(chunk_list) * CHUNK_ENTRY_LENGTH
|
|
102
|
+
|
|
103
|
+
entries: list[ChunkEntry] = []
|
|
104
|
+
records: list[bytes] = []
|
|
105
|
+
offset = header_length
|
|
106
|
+
for chunk in chunk_list:
|
|
107
|
+
_validate_chunk_for_write(chunk)
|
|
108
|
+
record = CHUNK_PREAMBLE_STRUCT.pack(chunk.chunk_type, len(chunk.data)) + chunk.data
|
|
109
|
+
entries.append(
|
|
110
|
+
ChunkEntry(
|
|
111
|
+
chunk_type=chunk.chunk_type,
|
|
112
|
+
chunk_flags=chunk.flags,
|
|
113
|
+
chunk_offset=offset,
|
|
114
|
+
chunk_length=len(chunk.data),
|
|
115
|
+
)
|
|
116
|
+
)
|
|
117
|
+
records.append(record)
|
|
118
|
+
offset += len(record)
|
|
119
|
+
|
|
120
|
+
major, minor, patch = version
|
|
121
|
+
header = HEADER_STRUCT.pack(
|
|
122
|
+
MAGIC,
|
|
123
|
+
major,
|
|
124
|
+
minor,
|
|
125
|
+
patch,
|
|
126
|
+
flags,
|
|
127
|
+
header_length,
|
|
128
|
+
len(chunk_list),
|
|
129
|
+
)
|
|
130
|
+
table = b"".join(
|
|
131
|
+
CHUNK_ENTRY_STRUCT.pack(
|
|
132
|
+
entry.chunk_type,
|
|
133
|
+
entry.chunk_flags,
|
|
134
|
+
entry.chunk_offset,
|
|
135
|
+
entry.chunk_length,
|
|
136
|
+
)
|
|
137
|
+
for entry in entries
|
|
138
|
+
)
|
|
139
|
+
body = header + table + b"".join(records)
|
|
140
|
+
footer = FOOTER_STRUCT.pack(len(body) + FOOTER_LENGTH, sha256(body), FOOTER_MAGIC)
|
|
141
|
+
return body + footer
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def parse_framed(data: bytes) -> FramedOrb:
|
|
145
|
+
if len(data) < HEADER_FIXED_LENGTH + FOOTER_LENGTH:
|
|
146
|
+
raise InvalidOrbFile("file is too small to be a valid .orb container")
|
|
147
|
+
|
|
148
|
+
magic, major, minor, patch, flags, header_length, chunk_count = HEADER_STRUCT.unpack_from(
|
|
149
|
+
data, 0
|
|
150
|
+
)
|
|
151
|
+
if magic != MAGIC:
|
|
152
|
+
raise InvalidOrbFile("invalid .orb magic bytes")
|
|
153
|
+
if major > PROTOCOL_MAJOR:
|
|
154
|
+
raise VersionMismatch(f"unsupported OMP major version {major}")
|
|
155
|
+
_validate_file_flags(flags)
|
|
156
|
+
|
|
157
|
+
minimum_header_length = HEADER_FIXED_LENGTH + chunk_count * CHUNK_ENTRY_LENGTH
|
|
158
|
+
if header_length < minimum_header_length:
|
|
159
|
+
raise InvalidOrbFile("header_length is shorter than the chunk table")
|
|
160
|
+
if header_length > len(data) - FOOTER_LENGTH:
|
|
161
|
+
raise InvalidOrbFile("header_length points beyond the file body")
|
|
162
|
+
|
|
163
|
+
footer_start = len(data) - FOOTER_LENGTH
|
|
164
|
+
total_length, digest, footer_magic = FOOTER_STRUCT.unpack_from(data, footer_start)
|
|
165
|
+
if footer_magic != FOOTER_MAGIC:
|
|
166
|
+
raise InvalidOrbFile("invalid .orb footer magic")
|
|
167
|
+
if total_length != len(data):
|
|
168
|
+
raise InvalidOrbFile("footer total_file_length does not match actual file size")
|
|
169
|
+
if sha256(data[:footer_start]) != digest:
|
|
170
|
+
raise InvalidOrbFile("footer SHA-256 checksum mismatch")
|
|
171
|
+
|
|
172
|
+
entries = _parse_entries(data, chunk_count)
|
|
173
|
+
chunks: list[Chunk] = []
|
|
174
|
+
for entry in entries:
|
|
175
|
+
_validate_entry(entry, footer_start, header_length)
|
|
176
|
+
if entry.chunk_type not in KNOWN_CHUNK_TYPES:
|
|
177
|
+
if entry.chunk_flags & ChunkFlag.OPTIONAL:
|
|
178
|
+
continue
|
|
179
|
+
raise VersionMismatch(f"unknown required chunk type {entry.chunk_type}")
|
|
180
|
+
|
|
181
|
+
actual_type, actual_length = CHUNK_PREAMBLE_STRUCT.unpack_from(data, entry.chunk_offset)
|
|
182
|
+
if actual_type != entry.chunk_type:
|
|
183
|
+
raise InvalidOrbFile("chunk table type does not match chunk payload type")
|
|
184
|
+
if actual_length != entry.chunk_length:
|
|
185
|
+
raise InvalidOrbFile("chunk table length does not match chunk payload length")
|
|
186
|
+
|
|
187
|
+
payload_start = entry.chunk_offset + CHUNK_PREAMBLE_LENGTH
|
|
188
|
+
payload_end = payload_start + entry.chunk_length
|
|
189
|
+
chunks.append(
|
|
190
|
+
Chunk(
|
|
191
|
+
chunk_type=entry.chunk_type,
|
|
192
|
+
flags=entry.chunk_flags,
|
|
193
|
+
data=data[payload_start:payload_end],
|
|
194
|
+
)
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
return FramedOrb(
|
|
198
|
+
version=(major, minor, patch),
|
|
199
|
+
flags=flags,
|
|
200
|
+
chunks=chunks,
|
|
201
|
+
entries=entries,
|
|
202
|
+
checksum=digest,
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def write_framed_file(
|
|
207
|
+
path: str | Path,
|
|
208
|
+
chunks: Iterable[Chunk],
|
|
209
|
+
*,
|
|
210
|
+
version: tuple[int, int, int] = (1, 0, 0),
|
|
211
|
+
) -> bytes:
|
|
212
|
+
data = frame_chunks(chunks, version=version)
|
|
213
|
+
Path(path).write_bytes(data)
|
|
214
|
+
return data
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def read_framed_file(path: str | Path) -> FramedOrb:
|
|
218
|
+
return parse_framed(Path(path).read_bytes())
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def _derive_file_flags(chunks: list[Chunk]) -> int:
|
|
222
|
+
flags = FileFlag(0)
|
|
223
|
+
for chunk in chunks:
|
|
224
|
+
if chunk.flags & ChunkFlag.COMPRESSED:
|
|
225
|
+
flags |= FileFlag.HAS_COMPRESSION
|
|
226
|
+
if chunk.flags & ChunkFlag.ENCRYPTED:
|
|
227
|
+
flags |= FileFlag.HAS_ENCRYPTION
|
|
228
|
+
if chunk.chunk_type == ChunkType.MEMORY_GRAPH:
|
|
229
|
+
flags |= FileFlag.HAS_GRAPH
|
|
230
|
+
if chunk.chunk_type == ChunkType.EMBEDDING_INDEX:
|
|
231
|
+
flags |= FileFlag.HAS_EMBEDDINGS
|
|
232
|
+
if chunk.chunk_type == ChunkType.ADAPTER_METADATA:
|
|
233
|
+
flags |= FileFlag.HAS_ADAPTERS
|
|
234
|
+
_validate_file_flags(int(flags))
|
|
235
|
+
return int(flags)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def _validate_chunk_for_write(chunk: Chunk) -> None:
|
|
239
|
+
if chunk.flags & RESERVED_CHUNK_FLAG_MASK:
|
|
240
|
+
raise InvalidOrbFile(f"reserved chunk flags are set: {chunk.flags}")
|
|
241
|
+
if chunk.flags & OPEN_FORBIDDEN_CHUNK_FLAGS:
|
|
242
|
+
raise InvalidOrbFile("open-tier .orb files cannot contain encrypted chunks")
|
|
243
|
+
if chunk.chunk_type == ChunkType.CUSTOM_EXTENSION and not (
|
|
244
|
+
chunk.flags & ChunkFlag.OPTIONAL
|
|
245
|
+
):
|
|
246
|
+
raise InvalidOrbFile("CUSTOM_EXTENSION chunks must be flagged OPTIONAL")
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _validate_file_flags(flags: int) -> None:
|
|
250
|
+
if flags & RESERVED_FILE_FLAG_MASK:
|
|
251
|
+
raise InvalidOrbFile(f"reserved file flags are set: {flags}")
|
|
252
|
+
if flags & OPEN_FORBIDDEN_FILE_FLAGS:
|
|
253
|
+
raise InvalidOrbFile("open-tier .orb files cannot set encryption/signature flags")
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def _parse_entries(data: bytes, chunk_count: int) -> list[ChunkEntry]:
|
|
257
|
+
entries: list[ChunkEntry] = []
|
|
258
|
+
offset = HEADER_FIXED_LENGTH
|
|
259
|
+
for _ in range(chunk_count):
|
|
260
|
+
chunk_type, chunk_flags, chunk_offset, chunk_length = CHUNK_ENTRY_STRUCT.unpack_from(
|
|
261
|
+
data, offset
|
|
262
|
+
)
|
|
263
|
+
if chunk_flags & RESERVED_CHUNK_FLAG_MASK:
|
|
264
|
+
raise InvalidOrbFile(f"reserved chunk flags are set: {chunk_flags}")
|
|
265
|
+
if chunk_flags & OPEN_FORBIDDEN_CHUNK_FLAGS:
|
|
266
|
+
raise InvalidOrbFile("open-tier .orb files cannot contain encrypted chunks")
|
|
267
|
+
entries.append(ChunkEntry(chunk_type, chunk_flags, chunk_offset, chunk_length))
|
|
268
|
+
offset += CHUNK_ENTRY_LENGTH
|
|
269
|
+
return entries
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def _validate_entry(entry: ChunkEntry, footer_start: int, header_length: int) -> None:
|
|
273
|
+
if entry.chunk_offset < header_length:
|
|
274
|
+
raise InvalidOrbFile("chunk offset points inside the header")
|
|
275
|
+
payload_start = entry.chunk_offset + CHUNK_PREAMBLE_LENGTH
|
|
276
|
+
payload_end = payload_start + entry.chunk_length
|
|
277
|
+
if payload_start > footer_start or payload_end > footer_start:
|
|
278
|
+
raise InvalidOrbFile("chunk extends beyond the file body")
|