code-puppy-core 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_puppy_core-0.1.0/Cargo.toml +13 -0
- code_puppy_core-0.1.0/PKG-INFO +4 -0
- code_puppy_core-0.1.0/code_puppy_core/.gitignore +7 -0
- code_puppy_core-0.1.0/code_puppy_core/Cargo.lock +245 -0
- code_puppy_core-0.1.0/code_puppy_core/Cargo.toml +16 -0
- code_puppy_core-0.1.0/code_puppy_core/src/hashline.rs +199 -0
- code_puppy_core-0.1.0/code_puppy_core/src/lib.rs +182 -0
- code_puppy_core-0.1.0/code_puppy_core/src/message_hashing.rs +121 -0
- code_puppy_core-0.1.0/code_puppy_core/src/pruning.rs +187 -0
- code_puppy_core-0.1.0/code_puppy_core/src/serialization.rs +91 -0
- code_puppy_core-0.1.0/code_puppy_core/src/token_estimation.rs +316 -0
- code_puppy_core-0.1.0/code_puppy_core/src/types.rs +125 -0
- code_puppy_core-0.1.0/code_puppy_core/uv.lock +8 -0
- code_puppy_core-0.1.0/pyproject.toml +12 -0
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
[workspace]
|
|
2
|
+
members = ["code_puppy_core"]
|
|
3
|
+
resolver = "2"
|
|
4
|
+
|
|
5
|
+
[workspace.dependencies]
|
|
6
|
+
pyo3 = { version = "0.28", features = ["extension-module", "abi3-py311"] }
|
|
7
|
+
serde = { version = "1", features = ["derive"] }
|
|
8
|
+
serde_json = "1"
|
|
9
|
+
|
|
10
|
+
[profile.release]
|
|
11
|
+
opt-level = 3
|
|
12
|
+
lto = true
|
|
13
|
+
codegen-units = 1
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
# This file is automatically @generated by Cargo.
|
|
2
|
+
# It is not intended for manual editing.
|
|
3
|
+
version = 4
|
|
4
|
+
|
|
5
|
+
[[package]]
|
|
6
|
+
name = "autocfg"
|
|
7
|
+
version = "1.5.0"
|
|
8
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
9
|
+
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
|
|
10
|
+
|
|
11
|
+
[[package]]
|
|
12
|
+
name = "code_puppy_core"
|
|
13
|
+
version = "0.1.0"
|
|
14
|
+
dependencies = [
|
|
15
|
+
"pyo3",
|
|
16
|
+
"rmp-serde",
|
|
17
|
+
"rustc-hash",
|
|
18
|
+
"serde",
|
|
19
|
+
"serde_json",
|
|
20
|
+
"xxhash-rust",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
[[package]]
|
|
24
|
+
name = "heck"
|
|
25
|
+
version = "0.5.0"
|
|
26
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
27
|
+
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
|
28
|
+
|
|
29
|
+
[[package]]
|
|
30
|
+
name = "itoa"
|
|
31
|
+
version = "1.0.18"
|
|
32
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
33
|
+
checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
|
|
34
|
+
|
|
35
|
+
[[package]]
|
|
36
|
+
name = "libc"
|
|
37
|
+
version = "0.2.183"
|
|
38
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
39
|
+
checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d"
|
|
40
|
+
|
|
41
|
+
[[package]]
|
|
42
|
+
name = "memchr"
|
|
43
|
+
version = "2.8.0"
|
|
44
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
45
|
+
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
|
|
46
|
+
|
|
47
|
+
[[package]]
|
|
48
|
+
name = "num-traits"
|
|
49
|
+
version = "0.2.19"
|
|
50
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
51
|
+
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
|
|
52
|
+
dependencies = [
|
|
53
|
+
"autocfg",
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
[[package]]
|
|
57
|
+
name = "once_cell"
|
|
58
|
+
version = "1.21.4"
|
|
59
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
60
|
+
checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
|
|
61
|
+
|
|
62
|
+
[[package]]
|
|
63
|
+
name = "portable-atomic"
|
|
64
|
+
version = "1.13.1"
|
|
65
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
66
|
+
checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
|
|
67
|
+
|
|
68
|
+
[[package]]
|
|
69
|
+
name = "proc-macro2"
|
|
70
|
+
version = "1.0.106"
|
|
71
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
72
|
+
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
|
|
73
|
+
dependencies = [
|
|
74
|
+
"unicode-ident",
|
|
75
|
+
]
|
|
76
|
+
|
|
77
|
+
[[package]]
|
|
78
|
+
name = "pyo3"
|
|
79
|
+
version = "0.28.2"
|
|
80
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
81
|
+
checksum = "cf85e27e86080aafd5a22eae58a162e133a589551542b3e5cee4beb27e54f8e1"
|
|
82
|
+
dependencies = [
|
|
83
|
+
"libc",
|
|
84
|
+
"once_cell",
|
|
85
|
+
"portable-atomic",
|
|
86
|
+
"pyo3-build-config",
|
|
87
|
+
"pyo3-ffi",
|
|
88
|
+
"pyo3-macros",
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
[[package]]
|
|
92
|
+
name = "pyo3-build-config"
|
|
93
|
+
version = "0.28.2"
|
|
94
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
95
|
+
checksum = "8bf94ee265674bf76c09fa430b0e99c26e319c945d96ca0d5a8215f31bf81cf7"
|
|
96
|
+
dependencies = [
|
|
97
|
+
"target-lexicon",
|
|
98
|
+
]
|
|
99
|
+
|
|
100
|
+
[[package]]
|
|
101
|
+
name = "pyo3-ffi"
|
|
102
|
+
version = "0.28.2"
|
|
103
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
104
|
+
checksum = "491aa5fc66d8059dd44a75f4580a2962c1862a1c2945359db36f6c2818b748dc"
|
|
105
|
+
dependencies = [
|
|
106
|
+
"libc",
|
|
107
|
+
"pyo3-build-config",
|
|
108
|
+
]
|
|
109
|
+
|
|
110
|
+
[[package]]
|
|
111
|
+
name = "pyo3-macros"
|
|
112
|
+
version = "0.28.2"
|
|
113
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
114
|
+
checksum = "f5d671734e9d7a43449f8480f8b38115df67bef8d21f76837fa75ee7aaa5e52e"
|
|
115
|
+
dependencies = [
|
|
116
|
+
"proc-macro2",
|
|
117
|
+
"pyo3-macros-backend",
|
|
118
|
+
"quote",
|
|
119
|
+
"syn",
|
|
120
|
+
]
|
|
121
|
+
|
|
122
|
+
[[package]]
|
|
123
|
+
name = "pyo3-macros-backend"
|
|
124
|
+
version = "0.28.2"
|
|
125
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
126
|
+
checksum = "22faaa1ce6c430a1f71658760497291065e6450d7b5dc2bcf254d49f66ee700a"
|
|
127
|
+
dependencies = [
|
|
128
|
+
"heck",
|
|
129
|
+
"proc-macro2",
|
|
130
|
+
"pyo3-build-config",
|
|
131
|
+
"quote",
|
|
132
|
+
"syn",
|
|
133
|
+
]
|
|
134
|
+
|
|
135
|
+
[[package]]
|
|
136
|
+
name = "quote"
|
|
137
|
+
version = "1.0.45"
|
|
138
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
139
|
+
checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
|
|
140
|
+
dependencies = [
|
|
141
|
+
"proc-macro2",
|
|
142
|
+
]
|
|
143
|
+
|
|
144
|
+
[[package]]
|
|
145
|
+
name = "rmp"
|
|
146
|
+
version = "0.8.15"
|
|
147
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
148
|
+
checksum = "4ba8be72d372b2c9b35542551678538b562e7cf86c3315773cae48dfbfe7790c"
|
|
149
|
+
dependencies = [
|
|
150
|
+
"num-traits",
|
|
151
|
+
]
|
|
152
|
+
|
|
153
|
+
[[package]]
|
|
154
|
+
name = "rmp-serde"
|
|
155
|
+
version = "1.3.1"
|
|
156
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
157
|
+
checksum = "72f81bee8c8ef9b577d1681a70ebbc962c232461e397b22c208c43c04b67a155"
|
|
158
|
+
dependencies = [
|
|
159
|
+
"rmp",
|
|
160
|
+
"serde",
|
|
161
|
+
]
|
|
162
|
+
|
|
163
|
+
[[package]]
|
|
164
|
+
name = "rustc-hash"
|
|
165
|
+
version = "2.1.2"
|
|
166
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
167
|
+
checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe"
|
|
168
|
+
|
|
169
|
+
[[package]]
|
|
170
|
+
name = "serde"
|
|
171
|
+
version = "1.0.228"
|
|
172
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
173
|
+
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
|
|
174
|
+
dependencies = [
|
|
175
|
+
"serde_core",
|
|
176
|
+
"serde_derive",
|
|
177
|
+
]
|
|
178
|
+
|
|
179
|
+
[[package]]
|
|
180
|
+
name = "serde_core"
|
|
181
|
+
version = "1.0.228"
|
|
182
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
183
|
+
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
|
|
184
|
+
dependencies = [
|
|
185
|
+
"serde_derive",
|
|
186
|
+
]
|
|
187
|
+
|
|
188
|
+
[[package]]
|
|
189
|
+
name = "serde_derive"
|
|
190
|
+
version = "1.0.228"
|
|
191
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
192
|
+
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
|
|
193
|
+
dependencies = [
|
|
194
|
+
"proc-macro2",
|
|
195
|
+
"quote",
|
|
196
|
+
"syn",
|
|
197
|
+
]
|
|
198
|
+
|
|
199
|
+
[[package]]
|
|
200
|
+
name = "serde_json"
|
|
201
|
+
version = "1.0.149"
|
|
202
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
203
|
+
checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
|
|
204
|
+
dependencies = [
|
|
205
|
+
"itoa",
|
|
206
|
+
"memchr",
|
|
207
|
+
"serde",
|
|
208
|
+
"serde_core",
|
|
209
|
+
"zmij",
|
|
210
|
+
]
|
|
211
|
+
|
|
212
|
+
[[package]]
|
|
213
|
+
name = "syn"
|
|
214
|
+
version = "2.0.117"
|
|
215
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
216
|
+
checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
|
|
217
|
+
dependencies = [
|
|
218
|
+
"proc-macro2",
|
|
219
|
+
"quote",
|
|
220
|
+
"unicode-ident",
|
|
221
|
+
]
|
|
222
|
+
|
|
223
|
+
[[package]]
|
|
224
|
+
name = "target-lexicon"
|
|
225
|
+
version = "0.13.5"
|
|
226
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
227
|
+
checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca"
|
|
228
|
+
|
|
229
|
+
[[package]]
|
|
230
|
+
name = "unicode-ident"
|
|
231
|
+
version = "1.0.24"
|
|
232
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
233
|
+
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
|
|
234
|
+
|
|
235
|
+
[[package]]
|
|
236
|
+
name = "xxhash-rust"
|
|
237
|
+
version = "0.8.15"
|
|
238
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
239
|
+
checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3"
|
|
240
|
+
|
|
241
|
+
[[package]]
|
|
242
|
+
name = "zmij"
|
|
243
|
+
version = "1.0.21"
|
|
244
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
245
|
+
checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
[package]
|
|
2
|
+
name = "code_puppy_core"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
edition = "2021"
|
|
5
|
+
|
|
6
|
+
[lib]
|
|
7
|
+
name = "_code_puppy_core"
|
|
8
|
+
crate-type = ["cdylib"]
|
|
9
|
+
|
|
10
|
+
[dependencies]
|
|
11
|
+
pyo3 = { workspace = true }
|
|
12
|
+
serde = { workspace = true }
|
|
13
|
+
serde_json = { workspace = true }
|
|
14
|
+
rustc-hash = "2"
|
|
15
|
+
rmp-serde = "1"
|
|
16
|
+
xxhash-rust = { version = "0.8", features = ["xxh32"] }
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
/// Hashline: file-edit anchoring via per-line content hashes.
|
|
2
|
+
///
|
|
3
|
+
/// Each line gets a 2-char anchor encoded with NIBBLE_STR so the LLM can
|
|
4
|
+
/// reference lines precisely. Compatible with oh-my-pi's hashline format.
|
|
5
|
+
|
|
6
|
+
// Custom nibble encoding (matches omp's NIBBLE_STR)
|
|
7
|
+
const NIBBLE_STR: &[u8; 16] = b"ZPMQVRWSNKTXJBYH";
|
|
8
|
+
|
|
9
|
+
/// Compute a 2-character hash anchor for a single line.
|
|
10
|
+
///
|
|
11
|
+
/// Algorithm:
|
|
12
|
+
/// 1. Strip trailing whitespace / `\r`
|
|
13
|
+
/// 2. If line has no alphanumeric chars → seed = idx, else seed = 0
|
|
14
|
+
/// 3. xxHash32 of cleaned line bytes with that seed
|
|
15
|
+
/// 4. Take lowest byte of hash
|
|
16
|
+
/// 5. Encode via NIBBLE_STR: high nibble char + low nibble char
|
|
17
|
+
pub fn compute_line_hash(idx: u32, line: &str) -> String {
|
|
18
|
+
// Strip trailing whitespace and \r
|
|
19
|
+
let cleaned = line.trim_end_matches(|c: char| c == '\r' || c.is_whitespace());
|
|
20
|
+
|
|
21
|
+
// Check if line has any alphanumeric character (Unicode-aware)
|
|
22
|
+
let has_alnum = cleaned.chars().any(|c| c.is_alphanumeric());
|
|
23
|
+
|
|
24
|
+
let seed = if has_alnum { 0u32 } else { idx };
|
|
25
|
+
|
|
26
|
+
let hash = xxhash_rust::xxh32::xxh32(cleaned.as_bytes(), seed);
|
|
27
|
+
|
|
28
|
+
let byte = (hash & 0xFF) as usize;
|
|
29
|
+
let hi = NIBBLE_STR[(byte >> 4) & 0xF] as char;
|
|
30
|
+
let lo = NIBBLE_STR[byte & 0xF] as char;
|
|
31
|
+
|
|
32
|
+
format!("{}{}", hi, lo)
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/// Format text with hashline prefixes.
|
|
36
|
+
///
|
|
37
|
+
/// Each line becomes `{line_number}#{hash}:{original_line}`.
|
|
38
|
+
/// `start_line` is 1-based by convention.
|
|
39
|
+
pub fn format_hashlines(text: &str, start_line: u32) -> String {
|
|
40
|
+
text.split('\n')
|
|
41
|
+
.enumerate()
|
|
42
|
+
.map(|(i, line)| {
|
|
43
|
+
let line_num = start_line + i as u32;
|
|
44
|
+
let hash = compute_line_hash(line_num, line);
|
|
45
|
+
format!("{}#{}:{}", line_num, hash, line)
|
|
46
|
+
})
|
|
47
|
+
.collect::<Vec<_>>()
|
|
48
|
+
.join("\n")
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/// Strip hashline prefixes from text, returning plain content.
|
|
52
|
+
///
|
|
53
|
+
/// Lines matching `^\d+#[A-Z]{2}:` have the prefix removed.
|
|
54
|
+
/// Other lines pass through unchanged.
|
|
55
|
+
pub fn strip_hashline_prefixes(text: &str) -> String {
|
|
56
|
+
text.split('\n')
|
|
57
|
+
.map(|line| strip_one_hashline_prefix(line))
|
|
58
|
+
.collect::<Vec<_>>()
|
|
59
|
+
.join("\n")
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
fn strip_one_hashline_prefix(line: &str) -> &str {
|
|
63
|
+
// Fast path: find '#', verify digits before it, 2 uppercase after, then ':'
|
|
64
|
+
let Some(hash_pos) = line.find('#') else {
|
|
65
|
+
return line;
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
// Everything before '#' must be digits
|
|
69
|
+
if !line[..hash_pos].chars().all(|c| c.is_ascii_digit()) || hash_pos == 0 {
|
|
70
|
+
return line;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
let after_hash = &line[hash_pos + 1..];
|
|
74
|
+
|
|
75
|
+
// Need at least 3 chars: 2 uppercase + ':'
|
|
76
|
+
if after_hash.len() < 3 {
|
|
77
|
+
return line;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
let bytes = after_hash.as_bytes();
|
|
81
|
+
if bytes[0].is_ascii_uppercase() && bytes[1].is_ascii_uppercase() && bytes[2] == b':' {
|
|
82
|
+
&after_hash[3..]
|
|
83
|
+
} else {
|
|
84
|
+
line
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/// Validate that a stored hash anchor still matches the current line content.
|
|
89
|
+
pub fn validate_hashline_anchor(idx: u32, line: &str, expected_hash: &str) -> bool {
|
|
90
|
+
compute_line_hash(idx, line) == expected_hash
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
#[cfg(test)]
|
|
94
|
+
mod tests {
|
|
95
|
+
use super::*;
|
|
96
|
+
|
|
97
|
+
#[test]
|
|
98
|
+
fn test_nibble_encoding_range() {
|
|
99
|
+
// All 256 possible byte values should produce 2 uppercase chars in NIBBLE_STR
|
|
100
|
+
for byte in 0u8..=255 {
|
|
101
|
+
let hi = NIBBLE_STR[(byte >> 4) as usize] as char;
|
|
102
|
+
let lo = NIBBLE_STR[(byte & 0xF) as usize] as char;
|
|
103
|
+
assert!(hi.is_ascii_uppercase(), "hi nibble not uppercase for byte {byte}");
|
|
104
|
+
assert!(lo.is_ascii_uppercase(), "lo nibble not uppercase for byte {byte}");
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
#[test]
|
|
109
|
+
fn test_compute_line_hash_returns_two_chars() {
|
|
110
|
+
let h = compute_line_hash(1, "hello world");
|
|
111
|
+
assert_eq!(h.len(), 2);
|
|
112
|
+
assert!(h.chars().all(|c| c.is_ascii_uppercase()));
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
#[test]
|
|
116
|
+
fn test_compute_line_hash_whitespace_only_uses_idx_as_seed() {
|
|
117
|
+
// Two different indices should produce different hashes for whitespace-only lines
|
|
118
|
+
let h1 = compute_line_hash(1, " ");
|
|
119
|
+
let h2 = compute_line_hash(2, " ");
|
|
120
|
+
// They *may* collide (only 256 values), but with different seeds the raw
|
|
121
|
+
// xxh32 values differ, so this is a sanity check that seeds are applied.
|
|
122
|
+
// We just verify the hashes are valid 2-char strings.
|
|
123
|
+
assert_eq!(h1.len(), 2);
|
|
124
|
+
assert_eq!(h2.len(), 2);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
#[test]
|
|
128
|
+
fn test_compute_line_hash_strips_trailing_whitespace() {
|
|
129
|
+
let h1 = compute_line_hash(1, "hello");
|
|
130
|
+
let h2 = compute_line_hash(1, "hello ");
|
|
131
|
+
assert_eq!(h1, h2, "trailing whitespace should be stripped before hashing");
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
#[test]
|
|
135
|
+
fn test_format_hashlines_basic() {
|
|
136
|
+
let result = format_hashlines("foo\nbar", 1);
|
|
137
|
+
let lines: Vec<&str> = result.split('\n').collect();
|
|
138
|
+
assert_eq!(lines.len(), 2);
|
|
139
|
+
assert!(lines[0].starts_with("1#"));
|
|
140
|
+
assert!(lines[0].contains(":foo"));
|
|
141
|
+
assert!(lines[1].starts_with("2#"));
|
|
142
|
+
assert!(lines[1].contains(":bar"));
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
#[test]
|
|
146
|
+
fn test_format_hashlines_start_line() {
|
|
147
|
+
let result = format_hashlines("hello", 10);
|
|
148
|
+
assert!(result.starts_with("10#"));
|
|
149
|
+
assert!(result.ends_with(":hello"));
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
#[test]
|
|
153
|
+
fn test_strip_hashline_prefixes_roundtrip() {
|
|
154
|
+
let original = "line one\nline two\n";
|
|
155
|
+
let formatted = format_hashlines(original, 1);
|
|
156
|
+
let stripped = strip_hashline_prefixes(&formatted);
|
|
157
|
+
assert_eq!(stripped, original);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
#[test]
|
|
161
|
+
fn test_strip_hashline_prefixes_passthrough() {
|
|
162
|
+
// Lines without hashline prefix pass through unchanged
|
|
163
|
+
let text = "no prefix here\njust plain text";
|
|
164
|
+
let stripped = strip_hashline_prefixes(text);
|
|
165
|
+
assert_eq!(stripped, text);
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
#[test]
|
|
169
|
+
fn test_strip_mixed_lines() {
|
|
170
|
+
let formatted = format_hashlines("hello", 1);
|
|
171
|
+
let mixed = format!("{}\nplain line", formatted);
|
|
172
|
+
let stripped = strip_hashline_prefixes(&mixed);
|
|
173
|
+
assert_eq!(stripped, "hello\nplain line");
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
#[test]
|
|
177
|
+
fn test_validate_hashline_anchor_valid() {
|
|
178
|
+
let h = compute_line_hash(5, "some code");
|
|
179
|
+
assert!(validate_hashline_anchor(5, "some code", &h));
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
#[test]
|
|
183
|
+
fn test_validate_hashline_anchor_invalid() {
|
|
184
|
+
let h = compute_line_hash(5, "some code");
|
|
185
|
+
assert!(!validate_hashline_anchor(5, "different code", &h));
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
#[test]
|
|
189
|
+
fn test_validate_hashline_anchor_wrong_idx_for_blank() {
|
|
190
|
+
// Blank line hashes depend on idx — validate round-trip consistency
|
|
191
|
+
let h1 = compute_line_hash(1, "");
|
|
192
|
+
let h2 = compute_line_hash(100, "");
|
|
193
|
+
// Each hash should validate correctly for its own idx
|
|
194
|
+
assert!(validate_hashline_anchor(1, "", &h1));
|
|
195
|
+
assert!(validate_hashline_anchor(100, "", &h2));
|
|
196
|
+
// And the hash for idx=1 must NOT validate as idx=1 with wrong content
|
|
197
|
+
assert!(!validate_hashline_anchor(1, "x", &h1));
|
|
198
|
+
}
|
|
199
|
+
}
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
use pyo3::prelude::*;
|
|
2
|
+
use pyo3::types::PyList;
|
|
3
|
+
|
|
4
|
+
mod hashline;
|
|
5
|
+
mod message_hashing;
|
|
6
|
+
mod pruning;
|
|
7
|
+
mod serialization;
|
|
8
|
+
mod token_estimation;
|
|
9
|
+
mod types;
|
|
10
|
+
|
|
11
|
+
use hashline::{
|
|
12
|
+
compute_line_hash as compute_line_hash_impl,
|
|
13
|
+
format_hashlines as format_hashlines_impl,
|
|
14
|
+
strip_hashline_prefixes as strip_hashline_prefixes_impl,
|
|
15
|
+
validate_hashline_anchor as validate_hashline_anchor_impl,
|
|
16
|
+
};
|
|
17
|
+
use pruning::{prune_and_filter_impl, split_for_summarization_impl, truncation_indices_impl};
|
|
18
|
+
use serialization::{
|
|
19
|
+
deserialize_session_impl, serialize_session_impl, serialize_session_incremental_impl,
|
|
20
|
+
};
|
|
21
|
+
use token_estimation::process_messages_batch_impl;
|
|
22
|
+
|
|
23
|
+
// ── Result types exposed to Python ──────────────────────────────────────────
|
|
24
|
+
|
|
25
|
+
#[pyclass(frozen)]
|
|
26
|
+
#[derive(Debug)]
|
|
27
|
+
pub struct ProcessResult {
|
|
28
|
+
#[pyo3(get)]
|
|
29
|
+
pub per_message_tokens: Vec<i64>,
|
|
30
|
+
#[pyo3(get)]
|
|
31
|
+
pub total_message_tokens: i64,
|
|
32
|
+
#[pyo3(get)]
|
|
33
|
+
pub context_overhead_tokens: i64,
|
|
34
|
+
#[pyo3(get)]
|
|
35
|
+
pub message_hashes: Vec<i64>,
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
#[pyclass(frozen)]
|
|
39
|
+
#[derive(Debug)]
|
|
40
|
+
pub struct PruneResult {
|
|
41
|
+
#[pyo3(get)]
|
|
42
|
+
pub surviving_indices: Vec<usize>,
|
|
43
|
+
#[pyo3(get)]
|
|
44
|
+
pub dropped_count: usize,
|
|
45
|
+
#[pyo3(get)]
|
|
46
|
+
pub had_pending_tool_calls: bool,
|
|
47
|
+
#[pyo3(get)]
|
|
48
|
+
pub pending_tool_call_count: usize,
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
#[pyclass(frozen)]
|
|
52
|
+
#[derive(Debug)]
|
|
53
|
+
pub struct SplitResult {
|
|
54
|
+
#[pyo3(get)]
|
|
55
|
+
pub summarize_indices: Vec<usize>,
|
|
56
|
+
#[pyo3(get)]
|
|
57
|
+
pub protected_indices: Vec<usize>,
|
|
58
|
+
#[pyo3(get)]
|
|
59
|
+
pub protected_token_count: i64,
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// ── Helper: parse list[dict] → Vec<Message> ─────────────────────────────────
|
|
63
|
+
|
|
64
|
+
// ── Python-facing functions ─────────────────────────────────────────────────
|
|
65
|
+
|
|
66
|
+
#[pyfunction]
|
|
67
|
+
#[pyo3(signature = (messages, tool_definitions, mcp_tool_definitions, system_prompt))]
|
|
68
|
+
fn process_messages_batch<'py>(
|
|
69
|
+
messages: &Bound<'py, PyList>,
|
|
70
|
+
tool_definitions: &Bound<'py, PyList>,
|
|
71
|
+
mcp_tool_definitions: &Bound<'py, PyList>,
|
|
72
|
+
system_prompt: &str,
|
|
73
|
+
) -> PyResult<ProcessResult> {
|
|
74
|
+
process_messages_batch_impl(
|
|
75
|
+
messages,
|
|
76
|
+
tool_definitions,
|
|
77
|
+
mcp_tool_definitions,
|
|
78
|
+
system_prompt,
|
|
79
|
+
)
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
#[pyfunction]
|
|
83
|
+
#[pyo3(signature = (messages, max_tokens_per_message=50000))]
|
|
84
|
+
fn prune_and_filter(
|
|
85
|
+
messages: &Bound<'_, PyList>,
|
|
86
|
+
max_tokens_per_message: i64,
|
|
87
|
+
) -> PyResult<PruneResult> {
|
|
88
|
+
prune_and_filter_impl(messages, max_tokens_per_message)
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
#[pyfunction]
|
|
92
|
+
#[pyo3(signature = (per_message_tokens, protected_tokens, second_has_thinking))]
|
|
93
|
+
fn truncation_indices(
|
|
94
|
+
per_message_tokens: Vec<i64>,
|
|
95
|
+
protected_tokens: i64,
|
|
96
|
+
second_has_thinking: bool,
|
|
97
|
+
) -> PyResult<Vec<usize>> {
|
|
98
|
+
Ok(truncation_indices_impl(
|
|
99
|
+
&per_message_tokens,
|
|
100
|
+
protected_tokens,
|
|
101
|
+
second_has_thinking,
|
|
102
|
+
))
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
#[pyfunction]
|
|
106
|
+
#[pyo3(signature = (per_message_tokens, tool_call_ids_per_message, protected_tokens_limit))]
|
|
107
|
+
fn split_for_summarization(
|
|
108
|
+
per_message_tokens: Vec<i64>,
|
|
109
|
+
tool_call_ids_per_message: Vec<Vec<(String, String)>>,
|
|
110
|
+
protected_tokens_limit: i64,
|
|
111
|
+
) -> PyResult<SplitResult> {
|
|
112
|
+
Ok(split_for_summarization_impl(
|
|
113
|
+
&per_message_tokens,
|
|
114
|
+
&tool_call_ids_per_message,
|
|
115
|
+
protected_tokens_limit,
|
|
116
|
+
))
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
#[pyfunction]
|
|
120
|
+
#[pyo3(signature = (messages,))]
|
|
121
|
+
fn serialize_session(messages: &Bound<'_, PyList>) -> PyResult<Vec<u8>> {
|
|
122
|
+
serialize_session_impl(messages)
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
#[pyfunction]
|
|
126
|
+
#[pyo3(signature = (data,))]
|
|
127
|
+
fn deserialize_session<'py>(data: &[u8], py: Python<'py>) -> PyResult<Py<PyList>> {
|
|
128
|
+
deserialize_session_impl(py, data)
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
#[pyfunction]
|
|
132
|
+
#[pyo3(signature = (new_messages, existing_data=None))]
|
|
133
|
+
fn serialize_session_incremental(
|
|
134
|
+
new_messages: &Bound<'_, PyList>,
|
|
135
|
+
existing_data: Option<&[u8]>,
|
|
136
|
+
) -> PyResult<Vec<u8>> {
|
|
137
|
+
serialize_session_incremental_impl(new_messages, existing_data)
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// ── Hashline functions ──────────────────────────────────────────────────────
|
|
141
|
+
|
|
142
|
+
#[pyfunction]
|
|
143
|
+
fn compute_line_hash(idx: u32, line: &str) -> String {
|
|
144
|
+
compute_line_hash_impl(idx, line)
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
#[pyfunction]
|
|
148
|
+
#[pyo3(signature = (text, start_line=1))]
|
|
149
|
+
fn format_hashlines(text: &str, start_line: u32) -> String {
|
|
150
|
+
format_hashlines_impl(text, start_line)
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
#[pyfunction]
|
|
154
|
+
fn strip_hashline_prefixes(text: &str) -> String {
|
|
155
|
+
strip_hashline_prefixes_impl(text)
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
#[pyfunction]
|
|
159
|
+
fn validate_hashline_anchor(idx: u32, line: &str, expected_hash: &str) -> bool {
|
|
160
|
+
validate_hashline_anchor_impl(idx, line, expected_hash)
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// ── Module registration ─────────────────────────────────────────────────────
|
|
164
|
+
|
|
165
|
+
#[pymodule]
|
|
166
|
+
fn _code_puppy_core(m: &Bound<'_, PyModule>) -> PyResult<()> {
|
|
167
|
+
m.add_class::<ProcessResult>()?;
|
|
168
|
+
m.add_class::<PruneResult>()?;
|
|
169
|
+
m.add_class::<SplitResult>()?;
|
|
170
|
+
m.add_function(wrap_pyfunction!(process_messages_batch, m)?)?;
|
|
171
|
+
m.add_function(wrap_pyfunction!(prune_and_filter, m)?)?;
|
|
172
|
+
m.add_function(wrap_pyfunction!(truncation_indices, m)?)?;
|
|
173
|
+
m.add_function(wrap_pyfunction!(split_for_summarization, m)?)?;
|
|
174
|
+
m.add_function(wrap_pyfunction!(serialize_session, m)?)?;
|
|
175
|
+
m.add_function(wrap_pyfunction!(deserialize_session, m)?)?;
|
|
176
|
+
m.add_function(wrap_pyfunction!(serialize_session_incremental, m)?)?;
|
|
177
|
+
m.add_function(wrap_pyfunction!(compute_line_hash, m)?)?;
|
|
178
|
+
m.add_function(wrap_pyfunction!(format_hashlines, m)?)?;
|
|
179
|
+
m.add_function(wrap_pyfunction!(strip_hashline_prefixes, m)?)?;
|
|
180
|
+
m.add_function(wrap_pyfunction!(validate_hashline_anchor, m)?)?;
|
|
181
|
+
Ok(())
|
|
182
|
+
}
|