cridecoder 0.3.2__tar.gz → 0.3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. {cridecoder-0.3.2 → cridecoder-0.3.3}/Cargo.lock +1 -1
  2. {cridecoder-0.3.2 → cridecoder-0.3.3}/Cargo.toml +1 -1
  3. {cridecoder-0.3.2 → cridecoder-0.3.3}/PKG-INFO +1 -1
  4. {cridecoder-0.3.2 → cridecoder-0.3.3}/cridecoder.pyi +12 -0
  5. cridecoder-0.3.3/examples/profile_acb.rs +55 -0
  6. {cridecoder-0.3.2 → cridecoder-0.3.3}/pyproject.toml +1 -1
  7. {cridecoder-0.3.2 → cridecoder-0.3.3}/src/acb/decode.rs +9 -4
  8. {cridecoder-0.3.2 → cridecoder-0.3.3}/src/acb/extractor.rs +106 -42
  9. {cridecoder-0.3.2 → cridecoder-0.3.3}/src/acb/utf.rs +9 -0
  10. {cridecoder-0.3.2 → cridecoder-0.3.3}/src/acb.rs +2 -1
  11. {cridecoder-0.3.2 → cridecoder-0.3.3}/src/lib.rs +2 -1
  12. {cridecoder-0.3.2 → cridecoder-0.3.3}/src/python.rs +45 -6
  13. {cridecoder-0.3.2 → cridecoder-0.3.3}/tests/integration_tests.rs +30 -0
  14. {cridecoder-0.3.2 → cridecoder-0.3.3}/.github/copilot-instructions.md +0 -0
  15. {cridecoder-0.3.2 → cridecoder-0.3.3}/.github/dependabot.yml +0 -0
  16. {cridecoder-0.3.2 → cridecoder-0.3.3}/.github/workflows/ci.yml +0 -0
  17. {cridecoder-0.3.2 → cridecoder-0.3.3}/.github/workflows/release-crate.yml +0 -0
  18. {cridecoder-0.3.2 → cridecoder-0.3.3}/.github/workflows/release-python.yml +0 -0
  19. {cridecoder-0.3.2 → cridecoder-0.3.3}/.gitignore +0 -0
  20. {cridecoder-0.3.2 → cridecoder-0.3.3}/AGENTS.md +0 -0
  21. {cridecoder-0.3.2 → cridecoder-0.3.3}/CLAUDE.md +0 -0
  22. {cridecoder-0.3.2 → cridecoder-0.3.3}/KNOWN_GAPS.md +0 -0
  23. {cridecoder-0.3.2 → cridecoder-0.3.3}/LICENSE +0 -0
  24. {cridecoder-0.3.2 → cridecoder-0.3.3}/README.md +0 -0
  25. {cridecoder-0.3.2 → cridecoder-0.3.3}/examples/debug_acb.rs +0 -0
  26. {cridecoder-0.3.2 → cridecoder-0.3.3}/examples/profile_hca.rs +0 -0
  27. {cridecoder-0.3.2 → cridecoder-0.3.3}/examples/test_acb.rs +0 -0
  28. {cridecoder-0.3.2 → cridecoder-0.3.3}/examples/test_hca.rs +0 -0
  29. {cridecoder-0.3.2 → cridecoder-0.3.3}/examples/test_usm.rs +0 -0
  30. {cridecoder-0.3.2 → cridecoder-0.3.3}/src/acb/afs.rs +0 -0
  31. {cridecoder-0.3.2 → cridecoder-0.3.3}/src/acb/builder.rs +0 -0
  32. {cridecoder-0.3.2 → cridecoder-0.3.3}/src/acb/consts.rs +0 -0
  33. {cridecoder-0.3.2 → cridecoder-0.3.3}/src/acb/track.rs +0 -0
  34. {cridecoder-0.3.2 → cridecoder-0.3.3}/src/hca/ath.rs +0 -0
  35. {cridecoder-0.3.2 → cridecoder-0.3.3}/src/hca/bitreader.rs +0 -0
  36. {cridecoder-0.3.2 → cridecoder-0.3.3}/src/hca/cipher.rs +0 -0
  37. {cridecoder-0.3.2 → cridecoder-0.3.3}/src/hca/decoder.rs +0 -0
  38. {cridecoder-0.3.2 → cridecoder-0.3.3}/src/hca/encoder.rs +0 -0
  39. {cridecoder-0.3.2 → cridecoder-0.3.3}/src/hca/hca_file.rs +0 -0
  40. {cridecoder-0.3.2 → cridecoder-0.3.3}/src/hca/imdct.rs +0 -0
  41. {cridecoder-0.3.2 → cridecoder-0.3.3}/src/hca/tables.rs +0 -0
  42. {cridecoder-0.3.2 → cridecoder-0.3.3}/src/hca.rs +0 -0
  43. {cridecoder-0.3.2 → cridecoder-0.3.3}/src/reader.rs +0 -0
  44. {cridecoder-0.3.2 → cridecoder-0.3.3}/src/usm/builder.rs +0 -0
  45. {cridecoder-0.3.2 → cridecoder-0.3.3}/src/usm/extractor.rs +0 -0
  46. {cridecoder-0.3.2 → cridecoder-0.3.3}/src/usm/metadata.rs +0 -0
  47. {cridecoder-0.3.2 → cridecoder-0.3.3}/src/usm.rs +0 -0
@@ -28,7 +28,7 @@ checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
28
28
 
29
29
  [[package]]
30
30
  name = "cridecoder"
31
- version = "0.3.2"
31
+ version = "0.3.3"
32
32
  dependencies = [
33
33
  "byteorder",
34
34
  "encoding_rs",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "cridecoder"
3
- version = "0.3.2"
3
+ version = "0.3.3"
4
4
  edition = "2021"
5
5
  description = "CRI codec library for ACB/AWB, HCA audio, and USM video extraction"
6
6
  license = "MIT"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cridecoder
3
- Version: 0.3.2
3
+ Version: 0.3.3
4
4
  Classifier: Development Status :: 4 - Beta
5
5
  Classifier: Intended Audience :: Developers
6
6
  Classifier: License :: OSI Approved :: MIT License
@@ -11,6 +11,7 @@ __all__ = [
11
11
  "extract_acb",
12
12
  "extract_acb_tracks",
13
13
  "extract_acb_bytes",
14
+ "extract_acb_unique_bytes",
14
15
  "decode_acb_to_wav",
15
16
  "decode_acb_to_wav_bytes",
16
17
  "build_acb",
@@ -58,6 +59,17 @@ def extract_acb_bytes(acb_data: bytes) -> list[dict[str, object]]:
58
59
  """
59
60
  ...
60
61
 
62
+ def extract_acb_unique_bytes(acb_data: bytes) -> list[dict[str, object]]:
63
+ """Extract each distinct waveform from in-memory ACB bytes exactly once.
64
+
65
+ ACBs often point several cues at one physical waveform; unlike
66
+ :func:`extract_acb_bytes` (which copies it once per cue), this copies each
67
+ waveform a single time. Returns a list of dicts
68
+ ``{"extension", "subkey", "data", "cues"}`` where ``data`` is ``bytes`` and
69
+ ``cues`` is a list of ``{"name", "cue_id"}`` (at least one entry).
70
+ """
71
+ ...
72
+
61
73
  def decode_acb_to_wav(
62
74
  acb_path: str, output_dir: str, key: Optional[int] = ...
63
75
  ) -> list[str]:
@@ -0,0 +1,55 @@
1
+ //! Micro-profile of ACB extraction stages on a real ACB.
2
+ //! Usage: cargo run --release --example profile_acb -- <path-to.acb>
3
+
4
+ use std::io::Cursor;
5
+ use std::time::Instant;
6
+
7
+ use cridecoder::acb::{AfsArchive, TrackList, UtfTable};
8
+
9
+ fn time<T>(label: &str, rounds: u32, mut f: impl FnMut() -> T) -> T {
10
+ for _ in 0..3 {
11
+ std::hint::black_box(f());
12
+ }
13
+ let mut best = f64::INFINITY;
14
+ let mut last = None;
15
+ for _ in 0..rounds {
16
+ let t0 = Instant::now();
17
+ let r = std::hint::black_box(f());
18
+ best = best.min(t0.elapsed().as_secs_f64());
19
+ last = Some(r);
20
+ }
21
+ println!(" {label:40} best={:8.4} ms", best * 1000.0);
22
+ last.unwrap()
23
+ }
24
+
25
+ fn main() {
26
+ let path = std::env::args().nth(1).expect("usage: profile_acb <acb>");
27
+ let data = std::fs::read(&path).unwrap();
28
+ println!("ACB: {} ({} bytes)\n", path, data.len());
29
+ let rounds = 300;
30
+
31
+ time("UtfTable::new (outer)", rounds, || {
32
+ UtfTable::new(Cursor::new(&data)).unwrap()
33
+ });
34
+
35
+ time("UtfTable::new + TrackList::new", rounds, || {
36
+ let utf = UtfTable::new(Cursor::new(&data)).unwrap();
37
+ TrackList::new(&utf).unwrap()
38
+ });
39
+
40
+ time("+ build embedded AFS2", rounds, || {
41
+ let utf = UtfTable::new(Cursor::new(&data)).unwrap();
42
+ let _tl = TrackList::new(&utf).unwrap();
43
+ let awb = utf.rows[0]
44
+ .get("AwbFile")
45
+ .unwrap()
46
+ .as_bytes()
47
+ .unwrap()
48
+ .to_vec();
49
+ AfsArchive::new(Cursor::new(awb)).unwrap()
50
+ });
51
+
52
+ time("extract_acb_to_memory (full)", rounds, || {
53
+ cridecoder::extract_acb_to_memory(Cursor::new(&data), None).unwrap()
54
+ });
55
+ }
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "cridecoder"
7
- version = "0.3.2"
7
+ version = "0.3.3"
8
8
  description = "CRI codec library for ACB/AWB, HCA audio, and USM video extraction"
9
9
  readme = "README.md"
10
10
  license = { text = "MIT" }
@@ -4,13 +4,13 @@
4
4
  //! an ACB to decoded audio without managing the intermediate HCA bytes
5
5
  //! themselves. The per-AWB AFS2 subkey is applied automatically.
6
6
 
7
- use std::fs::{self, File};
7
+ use std::fs;
8
8
  use std::io::{Cursor, Read, Seek};
9
9
  use std::path::Path;
10
10
 
11
11
  use thiserror::Error;
12
12
 
13
- use crate::acb::extractor::{extract_acb_to_memory, ExtractError};
13
+ use crate::acb::extractor::{extract_acb_to_memory, read_validated_acb, ExtractError};
14
14
  use crate::hca::{HcaDecoder, HcaDecoderError};
15
15
 
16
16
  /// A decoded ACB track held in memory.
@@ -102,6 +102,11 @@ pub fn decode_acb_to_wav_from_file(
102
102
  target_dir: &Path,
103
103
  key: Option<u64>,
104
104
  ) -> Result<Vec<String>, DecodeAcbError> {
105
- let file = File::open(acb_path)?;
106
- decode_acb_to_wav(file, target_dir, Some(acb_path), key)
105
+ // Slurp once so the parser reads from memory instead of issuing many small
106
+ // syscalls against the file handle.
107
+ let data = match read_validated_acb(acb_path)? {
108
+ Some(d) => d,
109
+ None => return Ok(Vec::new()),
110
+ };
111
+ decode_acb_to_wav(Cursor::new(data), target_dir, Some(acb_path), key)
107
112
  }
@@ -3,8 +3,9 @@
3
3
  use crate::acb::afs::AfsArchive;
4
4
  use crate::acb::consts::wave_type_extension;
5
5
  use crate::acb::track::{Track, TrackList};
6
- use crate::acb::utf::{get_bytes_field, get_string_field, UtfTable};
7
- use std::fs::{self, File};
6
+ use crate::acb::utf::{get_bytes_field, get_string_field, take_bytes_field, UtfTable};
7
+ use std::collections::HashMap;
8
+ use std::fs;
8
9
  use std::io::{Cursor, Read, Seek};
9
10
  use std::path::Path;
10
11
  use thiserror::Error;
@@ -42,11 +43,11 @@ pub fn extract_acb<R: Read + Seek>(
42
43
  target_dir: &Path,
43
44
  acb_file_path: Option<&Path>,
44
45
  ) -> Result<Vec<String>, ExtractError> {
45
- let utf = UtfTable::new(acb_file)?;
46
+ let mut utf = UtfTable::new(acb_file)?;
46
47
 
47
48
  let track_list = TrackList::new(&utf)?;
48
49
 
49
- let mut embedded_awb = load_embedded_awb(&utf.rows[0]);
50
+ let mut embedded_awb = load_embedded_awb(&mut utf.rows[0]);
50
51
  let mut external_awbs = load_external_awbs(&utf.rows[0], acb_file_path);
51
52
 
52
53
  extract_all_tracks(
@@ -62,9 +63,9 @@ pub fn extract_acb_to_memory<R: Read + Seek>(
62
63
  acb_file: R,
63
64
  acb_file_path: Option<&Path>,
64
65
  ) -> Result<Vec<ExtractedAcbTrack>, ExtractError> {
65
- let utf = UtfTable::new(acb_file)?;
66
+ let mut utf = UtfTable::new(acb_file)?;
66
67
  let track_list = TrackList::new(&utf)?;
67
- let mut embedded_awb = load_embedded_awb(&utf.rows[0]);
68
+ let mut embedded_awb = load_embedded_awb(&mut utf.rows[0]);
68
69
  let mut external_awbs = load_external_awbs(&utf.rows[0], acb_file_path);
69
70
 
70
71
  let mut outputs = Vec::new();
@@ -73,16 +74,10 @@ pub fn extract_acb_to_memory<R: Read + Seek>(
73
74
  Some(data) => data,
74
75
  None => continue,
75
76
  };
76
- let extension = wave_type_extension(track.enc_type);
77
- let extension = if extension.is_empty() {
78
- track.enc_type.to_string()
79
- } else {
80
- extension.trim_start_matches('.').to_string()
81
- };
82
77
  outputs.push(ExtractedAcbTrack {
83
78
  name: track.name.clone(),
84
79
  cue_id: track.cue_id,
85
- extension,
80
+ extension: track_extension(track),
86
81
  data,
87
82
  subkey,
88
83
  });
@@ -91,12 +86,86 @@ pub fn extract_acb_to_memory<R: Read + Seek>(
91
86
  Ok(outputs)
92
87
  }
93
88
 
94
- fn load_embedded_awb(row: &crate::acb::utf::ValueMap) -> Option<AfsArchive<Cursor<Vec<u8>>>> {
95
- let awb_data = get_bytes_field(row, "AwbFile")?;
89
+ /// A cue that references a waveform.
90
+ #[derive(Debug, Clone, PartialEq, Eq)]
91
+ pub struct AcbCueRef {
92
+ pub name: String,
93
+ pub cue_id: i32,
94
+ }
95
+
96
+ /// A distinct (de-duplicated) waveform from an ACB, with every cue that maps to
97
+ /// it. ACBs frequently point multiple cues at one physical waveform.
98
+ #[derive(Debug, Clone, PartialEq, Eq)]
99
+ pub struct UniqueWaveform {
100
+ pub extension: String,
101
+ /// AFS2 subkey of the originating AWB (0 if unencrypted).
102
+ pub subkey: u16,
103
+ pub data: Vec<u8>,
104
+ /// All cues that reference this waveform (always at least one).
105
+ pub cues: Vec<AcbCueRef>,
106
+ }
107
+
108
+ /// Extract each distinct waveform from an ACB exactly once, together with the
109
+ /// cues that reference it. The per-cue extractors read and copy a shared
110
+ /// waveform once per cue; this reads and copies each waveform a single time.
111
+ pub fn extract_acb_unique_to_memory<R: Read + Seek>(
112
+ acb_file: R,
113
+ acb_file_path: Option<&Path>,
114
+ ) -> Result<Vec<UniqueWaveform>, ExtractError> {
115
+ let mut utf = UtfTable::new(acb_file)?;
116
+ let track_list = TrackList::new(&utf)?;
117
+ let mut embedded_awb = load_embedded_awb(&mut utf.rows[0]);
118
+ let mut external_awbs = load_external_awbs(&utf.rows[0], acb_file_path);
119
+
120
+ // A physical waveform is identified by which AWB it lives in plus its id.
121
+ let mut seen: HashMap<(bool, i32, i32), usize> = HashMap::new();
122
+ let mut out: Vec<UniqueWaveform> = Vec::new();
123
+
124
+ for track in &track_list.tracks {
125
+ let key = (track.is_stream, track.stream_awb_id, track.wav_id);
126
+ let cue = AcbCueRef {
127
+ name: track.name.clone(),
128
+ cue_id: track.cue_id,
129
+ };
130
+ if let Some(&idx) = seen.get(&key) {
131
+ out[idx].cues.push(cue);
132
+ continue;
133
+ }
134
+ let (data, subkey) = match get_track_data(track, &mut embedded_awb, &mut external_awbs)? {
135
+ Some(d) => d,
136
+ None => continue,
137
+ };
138
+ seen.insert(key, out.len());
139
+ out.push(UniqueWaveform {
140
+ extension: track_extension(track),
141
+ subkey,
142
+ data,
143
+ cues: vec![cue],
144
+ });
145
+ }
146
+
147
+ Ok(out)
148
+ }
149
+
150
+ /// Output extension for a track's waveform (no leading dot; falls back to the
151
+ /// numeric encode type for unknown formats).
152
+ fn track_extension(track: &Track) -> String {
153
+ let ext = wave_type_extension(track.enc_type);
154
+ if ext.is_empty() {
155
+ track.enc_type.to_string()
156
+ } else {
157
+ ext.trim_start_matches('.').to_string()
158
+ }
159
+ }
160
+
161
+ fn load_embedded_awb(row: &mut crate::acb::utf::ValueMap) -> Option<AfsArchive<Cursor<Vec<u8>>>> {
162
+ // Move the embedded AWB bytes out of the UTF cell rather than cloning them
163
+ // (the embedded AWB is often several MB).
164
+ let awb_data = take_bytes_field(row, "AwbFile")?;
96
165
  if awb_data.is_empty() {
97
166
  return None;
98
167
  }
99
- AfsArchive::new(Cursor::new(awb_data.to_vec())).ok()
168
+ AfsArchive::new(Cursor::new(awb_data)).ok()
100
169
  }
101
170
 
102
171
  fn load_external_awbs(
@@ -233,9 +302,9 @@ pub fn extract_acb_tracks<R: Read + Seek>(
233
302
  target_dir: &Path,
234
303
  acb_file_path: Option<&Path>,
235
304
  ) -> Result<Vec<ExtractedTrackFile>, ExtractError> {
236
- let utf = UtfTable::new(acb_file)?;
305
+ let mut utf = UtfTable::new(acb_file)?;
237
306
  let track_list = TrackList::new(&utf)?;
238
- let mut embedded_awb = load_embedded_awb(&utf.rows[0]);
307
+ let mut embedded_awb = load_embedded_awb(&mut utf.rows[0]);
239
308
  let mut external_awbs = load_external_awbs(&utf.rows[0], acb_file_path);
240
309
 
241
310
  fs::create_dir_all(target_dir)?;
@@ -281,30 +350,25 @@ fn extract_single_track_file(
281
350
  }))
282
351
  }
283
352
 
284
- /// Open and validate an ACB file, returning the seekable handle positioned at
285
- /// the start, or `None` if the path is missing or is not a valid ACB.
286
- fn open_validated_acb(acb_path: &Path) -> Result<Option<File>, ExtractError> {
287
- let info = match fs::metadata(acb_path) {
288
- Ok(i) => i,
353
+ /// Read and validate an ACB file into memory, returning its bytes or `None` if
354
+ /// the path is missing or is not a valid ACB.
355
+ ///
356
+ /// Slurping the whole file once (instead of parsing straight from the `File`)
357
+ /// turns the parser's many small `seek`/`read` calls into in-memory pointer
358
+ /// math rather than syscalls — a large win on the file-based entry points.
359
+ pub fn read_validated_acb(acb_path: &Path) -> Result<Option<Vec<u8>>, ExtractError> {
360
+ let data = match fs::read(acb_path) {
361
+ Ok(d) => d,
289
362
  Err(_) => return Ok(None),
290
363
  };
291
364
 
292
- // A valid ACB file must have at least @UTF magic (4 bytes) + header (28 bytes) = 32 bytes
293
- if info.len() < 32 {
365
+ // A valid ACB file must have at least @UTF magic (4 bytes) + header (28 bytes) = 32 bytes,
366
+ // and start with the @UTF magic (0x40 0x55 0x54 0x46).
367
+ if data.len() < 32 || data[0..4] != [0x40, 0x55, 0x54, 0x46] {
294
368
  return Ok(None);
295
369
  }
296
370
 
297
- let mut file = File::open(acb_path)?;
298
-
299
- // Read and validate the @UTF magic (0x40 0x55 0x54 0x46)
300
- let mut header = [0u8; 4];
301
- file.read_exact(&mut header)?;
302
- if header != [0x40, 0x55, 0x54, 0x46] {
303
- return Ok(None); // Not a valid ACB file
304
- }
305
-
306
- file.seek(std::io::SeekFrom::Start(0))?;
307
- Ok(Some(file))
371
+ Ok(Some(data))
308
372
  }
309
373
 
310
374
  /// Convenience function to extract from a file path
@@ -312,11 +376,11 @@ pub fn extract_acb_from_file(
312
376
  acb_path: &Path,
313
377
  target_dir: &Path,
314
378
  ) -> Result<Option<Vec<String>>, ExtractError> {
315
- let file = match open_validated_acb(acb_path)? {
316
- Some(f) => f,
379
+ let data = match read_validated_acb(acb_path)? {
380
+ Some(d) => d,
317
381
  None => return Ok(None),
318
382
  };
319
- let outputs = extract_acb(file, target_dir, Some(acb_path))?;
383
+ let outputs = extract_acb(Cursor::new(data), target_dir, Some(acb_path))?;
320
384
  Ok(Some(outputs))
321
385
  }
322
386
 
@@ -326,10 +390,10 @@ pub fn extract_acb_tracks_from_file(
326
390
  acb_path: &Path,
327
391
  target_dir: &Path,
328
392
  ) -> Result<Option<Vec<ExtractedTrackFile>>, ExtractError> {
329
- let file = match open_validated_acb(acb_path)? {
330
- Some(f) => f,
393
+ let data = match read_validated_acb(acb_path)? {
394
+ Some(d) => d,
331
395
  None => return Ok(None),
332
396
  };
333
- let outputs = extract_acb_tracks(file, target_dir, Some(acb_path))?;
397
+ let outputs = extract_acb_tracks(Cursor::new(data), target_dir, Some(acb_path))?;
334
398
  Ok(Some(outputs))
335
399
  }
@@ -408,6 +408,15 @@ pub fn get_bytes_field<'a>(row: &'a ValueMap, key: &str) -> Option<&'a [u8]> {
408
408
  row.get(key).and_then(|v| v.as_bytes())
409
409
  }
410
410
 
411
+ /// Move (take ownership of) a bytes field out of a row, leaving an empty
412
+ /// placeholder. Avoids cloning large embedded blobs such as the embedded AWB.
413
+ pub fn take_bytes_field(row: &mut ValueMap, key: &str) -> Option<Vec<u8>> {
414
+ match row.get_mut(key) {
415
+ Some(Value::Data(d)) => Some(std::mem::take(d)),
416
+ _ => None,
417
+ }
418
+ }
419
+
411
420
  /// Helper to get string field from a row
412
421
  pub fn get_string_field<'a>(row: &'a ValueMap, key: &str) -> Option<&'a str> {
413
422
  row.get(key).and_then(|v| v.as_string())
@@ -19,7 +19,8 @@ pub use decode::{
19
19
  };
20
20
  pub use extractor::{
21
21
  extract_acb, extract_acb_from_file, extract_acb_to_memory, extract_acb_tracks,
22
- extract_acb_tracks_from_file, ExtractedAcbTrack, ExtractedTrackFile,
22
+ extract_acb_tracks_from_file, extract_acb_unique_to_memory, AcbCueRef, ExtractedAcbTrack,
23
+ ExtractedTrackFile, UniqueWaveform,
23
24
  };
24
25
  pub use track::{Track, TrackList};
25
26
  pub use utf::{UtfHeader, UtfTable, Value};
@@ -20,7 +20,8 @@ pub use acb::{
20
20
  };
21
21
  pub use acb::{
22
22
  extract_acb, extract_acb_from_file, extract_acb_to_memory, extract_acb_tracks,
23
- extract_acb_tracks_from_file, ExtractedAcbTrack, ExtractedTrackFile,
23
+ extract_acb_tracks_from_file, extract_acb_unique_to_memory, AcbCueRef, ExtractedAcbTrack,
24
+ ExtractedTrackFile, UniqueWaveform,
24
25
  };
25
26
  pub use acb::{AcbBuilder, AfsArchiveBuilder, BuilderError, TrackInput, UtfTableBuilder};
26
27
 
@@ -122,7 +122,7 @@ fn extract_acb_bytes<'py>(
122
122
  py: Python<'py>,
123
123
  acb_data: &[u8],
124
124
  ) -> PyResult<Vec<Bound<'py, pyo3::types::PyDict>>> {
125
- let tracks = acb::extract_acb_to_memory(Cursor::new(acb_data.to_vec()), None)
125
+ let tracks = acb::extract_acb_to_memory(Cursor::new(acb_data), None)
126
126
  .map_err(|e| PyRuntimeError::new_err(format!("ACB extraction failed: {}", e)))?;
127
127
 
128
128
  let mut out = Vec::with_capacity(tracks.len());
@@ -138,6 +138,45 @@ fn extract_acb_bytes<'py>(
138
138
  Ok(out)
139
139
  }
140
140
 
141
+ /// Extract each distinct waveform from in-memory ACB bytes exactly once.
142
+ ///
143
+ /// ACBs often point several cues at the same physical waveform; unlike
144
+ /// :func:`extract_acb_bytes` (which copies it once per cue), this reads and
145
+ /// copies each waveform a single time and lists the cues that reference it.
146
+ ///
147
+ /// Args:
148
+ /// acb_data: Raw ACB file bytes
149
+ ///
150
+ /// Returns:
151
+ /// List of dicts ``{"extension", "subkey", "data", "cues"}`` where ``cues``
152
+ /// is a list of ``{"name", "cue_id"}`` (at least one).
153
+ #[pyfunction]
154
+ fn extract_acb_unique_bytes<'py>(
155
+ py: Python<'py>,
156
+ acb_data: &[u8],
157
+ ) -> PyResult<Vec<Bound<'py, pyo3::types::PyDict>>> {
158
+ let waveforms = acb::extract_acb_unique_to_memory(Cursor::new(acb_data), None)
159
+ .map_err(|e| PyRuntimeError::new_err(format!("ACB extraction failed: {}", e)))?;
160
+
161
+ let mut out = Vec::with_capacity(waveforms.len());
162
+ for wf in waveforms {
163
+ let dict = pyo3::types::PyDict::new(py);
164
+ dict.set_item("extension", wf.extension)?;
165
+ dict.set_item("subkey", wf.subkey)?;
166
+ dict.set_item("data", pyo3::types::PyBytes::new(py, &wf.data))?;
167
+ let mut cues = Vec::with_capacity(wf.cues.len());
168
+ for cue in wf.cues {
169
+ let c = pyo3::types::PyDict::new(py);
170
+ c.set_item("name", cue.name)?;
171
+ c.set_item("cue_id", cue.cue_id)?;
172
+ cues.push(c);
173
+ }
174
+ dict.set_item("cues", cues)?;
175
+ out.push(dict);
176
+ }
177
+ Ok(out)
178
+ }
179
+
141
180
  /// Decode an in-memory ACB straight to WAV bytes (no disk I/O).
142
181
  ///
143
182
  /// The in-memory counterpart of :func:`decode_acb_to_wav`: each AWB's subkey is
@@ -158,7 +197,7 @@ fn decode_acb_to_wav_bytes<'py>(
158
197
  acb_data: &[u8],
159
198
  key: Option<u64>,
160
199
  ) -> PyResult<Vec<Bound<'py, pyo3::types::PyDict>>> {
161
- let tracks = acb::decode_acb_to_wav_to_memory(Cursor::new(acb_data.to_vec()), None, key)
200
+ let tracks = acb::decode_acb_to_wav_to_memory(Cursor::new(acb_data), None, key)
162
201
  .map_err(|e| PyRuntimeError::new_err(format!("ACB decode failed: {}", e)))?;
163
202
 
164
203
  let mut out = Vec::with_capacity(tracks.len());
@@ -349,7 +388,7 @@ fn decode_hca<'py>(
349
388
  #[pyfunction]
350
389
  #[pyo3(signature = (hca_data, key=None, subkey=None))]
351
390
  fn decode_hca_bytes(hca_data: &[u8], key: Option<u64>, subkey: Option<u64>) -> PyResult<Vec<u8>> {
352
- let mut decoder = HcaDecoder::from_reader(Cursor::new(hca_data.to_vec()))
391
+ let mut decoder = HcaDecoder::from_reader(Cursor::new(hca_data))
353
392
  .map_err(|e| PyRuntimeError::new_err(format!("Failed to parse HCA: {}", e)))?;
354
393
  if let Some(k) = key {
355
394
  decoder.set_encryption_key(k, subkey.unwrap_or(0));
@@ -587,9 +626,8 @@ fn extract_usm_bytes<'py>(
587
626
  key: Option<u64>,
588
627
  export_audio: bool,
589
628
  ) -> PyResult<Vec<Bound<'py, pyo3::types::PyDict>>> {
590
- let streams =
591
- usm::extract_usm_to_memory(Cursor::new(usm_data.to_vec()), b"", key, export_audio)
592
- .map_err(|e| PyRuntimeError::new_err(format!("USM extraction failed: {}", e)))?;
629
+ let streams = usm::extract_usm_to_memory(Cursor::new(usm_data), b"", key, export_audio)
630
+ .map_err(|e| PyRuntimeError::new_err(format!("USM extraction failed: {}", e)))?;
593
631
 
594
632
  let mut out = Vec::with_capacity(streams.len());
595
633
  for stream in streams {
@@ -689,6 +727,7 @@ pub fn register(m: &Bound<'_, PyModule>) -> PyResult<()> {
689
727
  m.add_function(wrap_pyfunction!(extract_acb, m)?)?;
690
728
  m.add_function(wrap_pyfunction!(extract_acb_tracks, m)?)?;
691
729
  m.add_function(wrap_pyfunction!(extract_acb_bytes, m)?)?;
730
+ m.add_function(wrap_pyfunction!(extract_acb_unique_bytes, m)?)?;
692
731
  m.add_function(wrap_pyfunction!(decode_acb_to_wav, m)?)?;
693
732
  m.add_function(wrap_pyfunction!(decode_acb_to_wav_bytes, m)?)?;
694
733
  m.add_function(wrap_pyfunction!(build_acb, m)?)?;
@@ -626,6 +626,36 @@ fn test_acb_extract_to_memory() {
626
626
  }
627
627
  }
628
628
 
629
+ /// Test extract_acb_unique_to_memory de-duplicates waveforms and lists cues
630
+ #[test]
631
+ fn test_acb_extract_unique() {
632
+ use cridecoder::{extract_acb_unique_to_memory, AcbBuilder, TrackInput};
633
+ use std::io::Cursor;
634
+
635
+ let mut builder = AcbBuilder::new();
636
+ builder.add_track(TrackInput::new("uniq_a", 0, create_minimal_hca_header()));
637
+ builder.add_track(TrackInput::new("uniq_b", 1, create_minimal_hca_header()));
638
+
639
+ let mut output = Vec::new();
640
+ builder
641
+ .build(&mut Cursor::new(&mut output), None)
642
+ .expect("ACB build should succeed");
643
+
644
+ let waves = extract_acb_unique_to_memory(Cursor::new(output), None)
645
+ .expect("unique extract should work");
646
+
647
+ // The builder lays down two distinct waveforms, one cue each.
648
+ assert_eq!(waves.len(), 2);
649
+ for wf in &waves {
650
+ assert_eq!(wf.extension, "hca");
651
+ assert_eq!(wf.subkey, 0);
652
+ assert_eq!(wf.cues.len(), 1);
653
+ assert_eq!(&wf.data[0..4], b"HCA\x00");
654
+ }
655
+ assert_eq!(waves[0].cues[0].name, "uniq_a");
656
+ assert_eq!(waves[1].cues[0].name, "uniq_b");
657
+ }
658
+
629
659
  /// Test ACB builder keeps Waveform AWB ids aligned with non-zero cue ids
630
660
  #[test]
631
661
  fn test_acb_builder_nonzero_cue_id() {
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes