kreuzberg 4.0.0.pre.rc.6 → 4.0.0.pre.rc.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +5 -3
- data/README.md +15 -9
- data/ext/kreuzberg_rb/native/.cargo/config.toml +2 -0
- data/ext/kreuzberg_rb/native/Cargo.lock +516 -324
- data/ext/kreuzberg_rb/native/Cargo.toml +13 -3
- data/ext/kreuzberg_rb/native/src/lib.rs +139 -2
- data/kreuzberg.gemspec +38 -4
- data/lib/kreuzberg/config.rb +34 -1
- data/lib/kreuzberg/result.rb +77 -14
- data/lib/kreuzberg/version.rb +1 -1
- data/sig/kreuzberg.rbs +23 -6
- data/vendor/kreuzberg/Cargo.toml +25 -11
- data/vendor/kreuzberg/README.md +13 -8
- data/vendor/kreuzberg/build.rs +17 -6
- data/vendor/kreuzberg/src/api/mod.rs +2 -0
- data/vendor/kreuzberg/src/chunking/mod.rs +1279 -79
- data/vendor/kreuzberg/src/chunking/processor.rs +220 -0
- data/vendor/kreuzberg/src/core/config.rs +49 -1
- data/vendor/kreuzberg/src/core/extractor.rs +134 -2
- data/vendor/kreuzberg/src/core/mod.rs +4 -2
- data/vendor/kreuzberg/src/core/pipeline.rs +188 -1
- data/vendor/kreuzberg/src/extraction/docx.rs +358 -0
- data/vendor/kreuzberg/src/extraction/html.rs +24 -8
- data/vendor/kreuzberg/src/extraction/image.rs +124 -1
- data/vendor/kreuzberg/src/extraction/libreoffice.rs +1 -2
- data/vendor/kreuzberg/src/extraction/office_metadata/odt_properties.rs +0 -3
- data/vendor/kreuzberg/src/extraction/pptx.rs +187 -87
- data/vendor/kreuzberg/src/extractors/archive.rs +1 -0
- data/vendor/kreuzberg/src/extractors/bibtex.rs +1 -0
- data/vendor/kreuzberg/src/extractors/docbook.rs +2 -0
- data/vendor/kreuzberg/src/extractors/docx.rs +50 -17
- data/vendor/kreuzberg/src/extractors/email.rs +29 -15
- data/vendor/kreuzberg/src/extractors/epub.rs +1 -0
- data/vendor/kreuzberg/src/extractors/excel.rs +2 -0
- data/vendor/kreuzberg/src/extractors/fictionbook.rs +1 -0
- data/vendor/kreuzberg/src/extractors/html.rs +29 -15
- data/vendor/kreuzberg/src/extractors/image.rs +25 -4
- data/vendor/kreuzberg/src/extractors/jats.rs +3 -0
- data/vendor/kreuzberg/src/extractors/jupyter.rs +1 -0
- data/vendor/kreuzberg/src/extractors/latex.rs +1 -0
- data/vendor/kreuzberg/src/extractors/markdown.rs +1 -0
- data/vendor/kreuzberg/src/extractors/mod.rs +78 -14
- data/vendor/kreuzberg/src/extractors/odt.rs +3 -3
- data/vendor/kreuzberg/src/extractors/opml.rs +1 -0
- data/vendor/kreuzberg/src/extractors/orgmode.rs +1 -0
- data/vendor/kreuzberg/src/extractors/pdf.rs +194 -17
- data/vendor/kreuzberg/src/extractors/pptx.rs +32 -13
- data/vendor/kreuzberg/src/extractors/rst.rs +1 -0
- data/vendor/kreuzberg/src/extractors/rtf.rs +3 -4
- data/vendor/kreuzberg/src/extractors/structured.rs +2 -0
- data/vendor/kreuzberg/src/extractors/text.rs +7 -2
- data/vendor/kreuzberg/src/extractors/typst.rs +1 -0
- data/vendor/kreuzberg/src/extractors/xml.rs +27 -15
- data/vendor/kreuzberg/src/keywords/processor.rs +9 -1
- data/vendor/kreuzberg/src/language_detection/mod.rs +43 -0
- data/vendor/kreuzberg/src/language_detection/processor.rs +219 -0
- data/vendor/kreuzberg/src/lib.rs +10 -2
- data/vendor/kreuzberg/src/mcp/mod.rs +2 -0
- data/vendor/kreuzberg/src/mcp/server.rs +14 -12
- data/vendor/kreuzberg/src/ocr/tesseract_backend.rs +2 -0
- data/vendor/kreuzberg/src/pdf/error.rs +8 -0
- data/vendor/kreuzberg/src/pdf/metadata.rs +238 -95
- data/vendor/kreuzberg/src/pdf/mod.rs +14 -2
- data/vendor/kreuzberg/src/pdf/rendering.rs +1 -2
- data/vendor/kreuzberg/src/pdf/table.rs +26 -2
- data/vendor/kreuzberg/src/pdf/text.rs +89 -7
- data/vendor/kreuzberg/src/plugins/extractor.rs +34 -3
- data/vendor/kreuzberg/src/plugins/mod.rs +3 -0
- data/vendor/kreuzberg/src/plugins/ocr.rs +22 -3
- data/vendor/kreuzberg/src/plugins/processor.rs +8 -0
- data/vendor/kreuzberg/src/plugins/registry.rs +2 -0
- data/vendor/kreuzberg/src/plugins/validator.rs +11 -0
- data/vendor/kreuzberg/src/text/mod.rs +6 -0
- data/vendor/kreuzberg/src/text/quality_processor.rs +219 -0
- data/vendor/kreuzberg/src/types.rs +173 -21
- data/vendor/kreuzberg/tests/archive_integration.rs +2 -0
- data/vendor/kreuzberg/tests/batch_processing.rs +5 -3
- data/vendor/kreuzberg/tests/concurrency_stress.rs +14 -6
- data/vendor/kreuzberg/tests/config_features.rs +15 -1
- data/vendor/kreuzberg/tests/config_loading_tests.rs +1 -0
- data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +2 -0
- data/vendor/kreuzberg/tests/email_integration.rs +2 -0
- data/vendor/kreuzberg/tests/error_handling.rs +43 -34
- data/vendor/kreuzberg/tests/format_integration.rs +2 -0
- data/vendor/kreuzberg/tests/image_integration.rs +2 -0
- data/vendor/kreuzberg/tests/mime_detection.rs +17 -16
- data/vendor/kreuzberg/tests/ocr_configuration.rs +4 -0
- data/vendor/kreuzberg/tests/ocr_errors.rs +22 -0
- data/vendor/kreuzberg/tests/ocr_quality.rs +2 -0
- data/vendor/kreuzberg/tests/odt_extractor_tests.rs +0 -21
- data/vendor/kreuzberg/tests/pdf_integration.rs +2 -0
- data/vendor/kreuzberg/tests/pipeline_integration.rs +25 -0
- data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +5 -0
- data/vendor/kreuzberg/tests/plugin_system.rs +6 -0
- data/vendor/kreuzberg/tests/registry_integration_tests.rs +1 -0
- data/vendor/kreuzberg/tests/rst_extractor_tests.rs +2 -0
- data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +0 -1
- data/vendor/kreuzberg/tests/security_validation.rs +1 -0
- data/vendor/kreuzberg/tests/test_fastembed.rs +45 -23
- data/vendor/kreuzberg/tests/typst_behavioral_tests.rs +1 -0
- data/vendor/kreuzberg/tests/typst_extractor_tests.rs +3 -2
- data/vendor/rb-sys/.cargo_vcs_info.json +2 -2
- data/vendor/rb-sys/Cargo.lock +15 -15
- data/vendor/rb-sys/Cargo.toml +4 -4
- data/vendor/rb-sys/Cargo.toml.orig +4 -4
- data/vendor/rb-sys/bin/release.sh +9 -8
- data/vendor/rb-sys/build/features.rs +5 -2
- data/vendor/rb-sys/build/main.rs +55 -15
- data/vendor/rb-sys/build/stable_api_config.rs +4 -2
- data/vendor/rb-sys/build/version.rs +3 -1
- data/vendor/rb-sys/src/macros.rs +2 -2
- data/vendor/rb-sys/src/special_consts.rs +1 -1
- data/vendor/rb-sys/src/stable_api/compiled.rs +1 -1
- data/vendor/rb-sys/src/stable_api/ruby_2_7.rs +12 -4
- data/vendor/rb-sys/src/stable_api/ruby_3_0.rs +12 -4
- data/vendor/rb-sys/src/stable_api/ruby_3_1.rs +12 -4
- data/vendor/rb-sys/src/stable_api/ruby_3_2.rs +12 -4
- data/vendor/rb-sys/src/stable_api/ruby_3_3.rs +19 -6
- data/vendor/rb-sys/src/stable_api/ruby_3_4.rs +17 -5
- data/vendor/rb-sys/src/stable_api.rs +0 -1
- data/vendor/rb-sys/src/tracking_allocator.rs +1 -3
- metadata +11 -10
- data/vendor/kreuzberg/src/extractors/fictionbook.rs.backup2 +0 -738
- data/vendor/rb-sys/.cargo-ok +0 -1
- data/vendor/rb-sys/src/stable_api/ruby_2_6.rs +0 -316
|
@@ -29,7 +29,7 @@
|
|
|
29
29
|
//! use kreuzberg::extraction::pptx::extract_pptx_from_path;
|
|
30
30
|
//!
|
|
31
31
|
//! # fn example() -> kreuzberg::Result<()> {
|
|
32
|
-
//! let result = extract_pptx_from_path("presentation.pptx", true)?;
|
|
32
|
+
//! let result = extract_pptx_from_path("presentation.pptx", true, None)?;
|
|
33
33
|
//!
|
|
34
34
|
//! println!("Slide count: {}", result.slide_count);
|
|
35
35
|
//! println!("Image count: {}", result.image_count);
|
|
@@ -181,18 +181,67 @@ impl Default for ParserConfig {
|
|
|
181
181
|
|
|
182
182
|
struct ContentBuilder {
|
|
183
183
|
content: String,
|
|
184
|
+
boundaries: Vec<crate::types::PageBoundary>,
|
|
185
|
+
page_contents: Vec<crate::types::PageContent>,
|
|
186
|
+
config: Option<crate::core::config::PageConfig>,
|
|
184
187
|
}
|
|
185
188
|
|
|
186
189
|
impl ContentBuilder {
|
|
187
190
|
fn new() -> Self {
|
|
188
191
|
Self {
|
|
189
192
|
content: String::with_capacity(8192),
|
|
193
|
+
boundaries: Vec::new(),
|
|
194
|
+
page_contents: Vec::new(),
|
|
195
|
+
config: None,
|
|
190
196
|
}
|
|
191
197
|
}
|
|
192
198
|
|
|
193
|
-
fn
|
|
199
|
+
fn with_page_config(capacity: usize, config: Option<crate::core::config::PageConfig>) -> Self {
|
|
194
200
|
Self {
|
|
195
201
|
content: String::with_capacity(capacity),
|
|
202
|
+
boundaries: if config.is_some() {
|
|
203
|
+
Vec::new()
|
|
204
|
+
} else {
|
|
205
|
+
Vec::with_capacity(0)
|
|
206
|
+
},
|
|
207
|
+
page_contents: if config.is_some() {
|
|
208
|
+
Vec::new()
|
|
209
|
+
} else {
|
|
210
|
+
Vec::with_capacity(0)
|
|
211
|
+
},
|
|
212
|
+
config,
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
fn start_slide(&mut self, slide_number: u32) -> usize {
|
|
217
|
+
let byte_start = self.content.len();
|
|
218
|
+
|
|
219
|
+
if let Some(ref cfg) = self.config
|
|
220
|
+
&& cfg.insert_page_markers
|
|
221
|
+
{
|
|
222
|
+
let marker = cfg.marker_format.replace("{page_num}", &slide_number.to_string());
|
|
223
|
+
self.content.push_str(&marker);
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
byte_start
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
fn end_slide(&mut self, slide_number: u32, byte_start: usize, slide_content: String) {
|
|
230
|
+
let byte_end = self.content.len();
|
|
231
|
+
|
|
232
|
+
if self.config.is_some() {
|
|
233
|
+
self.boundaries.push(crate::types::PageBoundary {
|
|
234
|
+
byte_start,
|
|
235
|
+
byte_end,
|
|
236
|
+
page_number: slide_number as usize,
|
|
237
|
+
});
|
|
238
|
+
|
|
239
|
+
self.page_contents.push(crate::types::PageContent {
|
|
240
|
+
page_number: slide_number as usize,
|
|
241
|
+
content: slide_content,
|
|
242
|
+
tables: Vec::new(),
|
|
243
|
+
images: Vec::new(),
|
|
244
|
+
});
|
|
196
245
|
}
|
|
197
246
|
}
|
|
198
247
|
|
|
@@ -271,8 +320,25 @@ impl ContentBuilder {
|
|
|
271
320
|
}
|
|
272
321
|
}
|
|
273
322
|
|
|
274
|
-
fn build(
|
|
275
|
-
self
|
|
323
|
+
fn build(
|
|
324
|
+
self,
|
|
325
|
+
) -> (
|
|
326
|
+
String,
|
|
327
|
+
Option<Vec<crate::types::PageBoundary>>,
|
|
328
|
+
Option<Vec<crate::types::PageContent>>,
|
|
329
|
+
) {
|
|
330
|
+
let content = self.content.trim().to_string();
|
|
331
|
+
let boundaries = if self.config.is_some() && !self.boundaries.is_empty() {
|
|
332
|
+
Some(self.boundaries)
|
|
333
|
+
} else {
|
|
334
|
+
None
|
|
335
|
+
};
|
|
336
|
+
let pages = if self.config.is_some() && !self.page_contents.is_empty() {
|
|
337
|
+
Some(self.page_contents)
|
|
338
|
+
} else {
|
|
339
|
+
None
|
|
340
|
+
};
|
|
341
|
+
(content, boundaries, pages)
|
|
276
342
|
}
|
|
277
343
|
}
|
|
278
344
|
|
|
@@ -443,7 +509,7 @@ impl Slide {
|
|
|
443
509
|
}
|
|
444
510
|
}
|
|
445
511
|
|
|
446
|
-
builder.build()
|
|
512
|
+
builder.build().0
|
|
447
513
|
}
|
|
448
514
|
|
|
449
515
|
fn image_count(&self) -> usize {
|
|
@@ -966,24 +1032,12 @@ fn extract_metadata(archive: &mut ZipArchive<File>) -> PptxMetadata {
|
|
|
966
1032
|
}
|
|
967
1033
|
}
|
|
968
1034
|
|
|
969
|
-
PptxMetadata {
|
|
970
|
-
title: metadata_map.get("title").cloned(),
|
|
971
|
-
author: metadata_map.get("author").cloned(),
|
|
972
|
-
description: metadata_map.get("description").cloned(),
|
|
973
|
-
summary: metadata_map.get("summary").cloned(),
|
|
974
|
-
fonts: Vec::new(),
|
|
975
|
-
}
|
|
1035
|
+
PptxMetadata { fonts: Vec::new() }
|
|
976
1036
|
}
|
|
977
1037
|
|
|
978
1038
|
#[cfg(not(feature = "office"))]
|
|
979
1039
|
{
|
|
980
|
-
PptxMetadata {
|
|
981
|
-
title: None,
|
|
982
|
-
author: None,
|
|
983
|
-
description: None,
|
|
984
|
-
summary: None,
|
|
985
|
-
fonts: Vec::new(),
|
|
986
|
-
}
|
|
1040
|
+
PptxMetadata { fonts: Vec::new() }
|
|
987
1041
|
}
|
|
988
1042
|
}
|
|
989
1043
|
|
|
@@ -1070,7 +1124,11 @@ fn detect_image_format(data: &[u8]) -> String {
|
|
|
1070
1124
|
}
|
|
1071
1125
|
}
|
|
1072
1126
|
|
|
1073
|
-
pub fn extract_pptx_from_path(
|
|
1127
|
+
pub fn extract_pptx_from_path(
|
|
1128
|
+
path: &str,
|
|
1129
|
+
extract_images: bool,
|
|
1130
|
+
page_config: Option<&crate::core::config::PageConfig>,
|
|
1131
|
+
) -> Result<PptxExtractionResult> {
|
|
1074
1132
|
let config = ParserConfig {
|
|
1075
1133
|
extract_images,
|
|
1076
1134
|
..Default::default()
|
|
@@ -1086,14 +1144,18 @@ pub fn extract_pptx_from_path(path: &str, extract_images: bool) -> Result<PptxEx
|
|
|
1086
1144
|
let slide_count = iterator.slide_count();
|
|
1087
1145
|
|
|
1088
1146
|
let estimated_capacity = slide_count * 1024;
|
|
1089
|
-
let mut content_builder = ContentBuilder::
|
|
1147
|
+
let mut content_builder = ContentBuilder::with_page_config(estimated_capacity, page_config.cloned());
|
|
1090
1148
|
|
|
1091
1149
|
let mut total_image_count = 0;
|
|
1092
1150
|
let mut total_table_count = 0;
|
|
1093
1151
|
let mut extracted_images = Vec::new();
|
|
1094
1152
|
|
|
1095
1153
|
while let Some(slide) = iterator.next_slide()? {
|
|
1096
|
-
|
|
1154
|
+
let byte_start = if page_config.is_some() {
|
|
1155
|
+
content_builder.start_slide(slide.slide_number)
|
|
1156
|
+
} else {
|
|
1157
|
+
0
|
|
1158
|
+
};
|
|
1097
1159
|
|
|
1098
1160
|
let slide_content = slide.to_markdown(&config);
|
|
1099
1161
|
content_builder.add_text(&slide_content);
|
|
@@ -1102,6 +1164,10 @@ pub fn extract_pptx_from_path(path: &str, extract_images: bool) -> Result<PptxEx
|
|
|
1102
1164
|
content_builder.add_notes(slide_notes);
|
|
1103
1165
|
}
|
|
1104
1166
|
|
|
1167
|
+
if page_config.is_some() {
|
|
1168
|
+
content_builder.end_slide(slide.slide_number, byte_start, slide_content.clone());
|
|
1169
|
+
}
|
|
1170
|
+
|
|
1105
1171
|
if config.extract_images
|
|
1106
1172
|
&& let Ok(image_data) = iterator.get_slide_images(&slide)
|
|
1107
1173
|
{
|
|
@@ -1129,17 +1195,43 @@ pub fn extract_pptx_from_path(path: &str, extract_images: bool) -> Result<PptxEx
|
|
|
1129
1195
|
total_table_count += slide.table_count();
|
|
1130
1196
|
}
|
|
1131
1197
|
|
|
1198
|
+
let (content, boundaries, page_contents) = content_builder.build();
|
|
1199
|
+
|
|
1200
|
+
let page_structure = boundaries.as_ref().map(|bounds| crate::types::PageStructure {
|
|
1201
|
+
total_count: slide_count,
|
|
1202
|
+
unit_type: crate::types::PageUnitType::Slide,
|
|
1203
|
+
boundaries: Some(bounds.clone()),
|
|
1204
|
+
pages: page_contents.as_ref().map(|pcs| {
|
|
1205
|
+
pcs.iter()
|
|
1206
|
+
.map(|pc| crate::types::PageInfo {
|
|
1207
|
+
number: pc.page_number,
|
|
1208
|
+
title: None,
|
|
1209
|
+
dimensions: None,
|
|
1210
|
+
image_count: None,
|
|
1211
|
+
table_count: None,
|
|
1212
|
+
hidden: None,
|
|
1213
|
+
})
|
|
1214
|
+
.collect()
|
|
1215
|
+
}),
|
|
1216
|
+
});
|
|
1217
|
+
|
|
1132
1218
|
Ok(PptxExtractionResult {
|
|
1133
|
-
content
|
|
1219
|
+
content,
|
|
1134
1220
|
metadata,
|
|
1135
1221
|
slide_count,
|
|
1136
1222
|
image_count: total_image_count,
|
|
1137
1223
|
table_count: total_table_count,
|
|
1138
1224
|
images: extracted_images,
|
|
1225
|
+
page_structure,
|
|
1226
|
+
page_contents,
|
|
1139
1227
|
})
|
|
1140
1228
|
}
|
|
1141
1229
|
|
|
1142
|
-
pub fn extract_pptx_from_bytes(
|
|
1230
|
+
pub fn extract_pptx_from_bytes(
|
|
1231
|
+
data: &[u8],
|
|
1232
|
+
extract_images: bool,
|
|
1233
|
+
page_config: Option<&crate::core::config::PageConfig>,
|
|
1234
|
+
) -> Result<PptxExtractionResult> {
|
|
1143
1235
|
use std::sync::atomic::{AtomicU64, Ordering};
|
|
1144
1236
|
static COUNTER: AtomicU64 = AtomicU64::new(0);
|
|
1145
1237
|
let unique_id = COUNTER.fetch_add(1, Ordering::SeqCst);
|
|
@@ -1148,9 +1240,17 @@ pub fn extract_pptx_from_bytes(data: &[u8], extract_images: bool) -> Result<Pptx
|
|
|
1148
1240
|
// IO errors must bubble up - temp file write issues need user reports ~keep
|
|
1149
1241
|
std::fs::write(&temp_path, data)?;
|
|
1150
1242
|
|
|
1151
|
-
let result = extract_pptx_from_path(
|
|
1243
|
+
let result = extract_pptx_from_path(
|
|
1244
|
+
temp_path.to_str().ok_or_else(|| {
|
|
1245
|
+
crate::KreuzbergError::validation("Invalid temp path - contains invalid UTF-8".to_string())
|
|
1246
|
+
})?,
|
|
1247
|
+
extract_images,
|
|
1248
|
+
page_config,
|
|
1249
|
+
);
|
|
1152
1250
|
|
|
1153
|
-
let
|
|
1251
|
+
if let Err(e) = std::fs::remove_file(&temp_path) {
|
|
1252
|
+
tracing::warn!("Failed to remove temp PPTX file: {}", e);
|
|
1253
|
+
}
|
|
1154
1254
|
|
|
1155
1255
|
result
|
|
1156
1256
|
}
|
|
@@ -1250,7 +1350,7 @@ mod tests {
|
|
|
1250
1350
|
#[test]
|
|
1251
1351
|
fn test_extract_pptx_from_bytes_single_slide() {
|
|
1252
1352
|
let pptx_bytes = create_test_pptx_bytes(vec!["Hello World"]);
|
|
1253
|
-
let result = extract_pptx_from_bytes(&pptx_bytes, false).unwrap();
|
|
1353
|
+
let result = extract_pptx_from_bytes(&pptx_bytes, false, None).unwrap();
|
|
1254
1354
|
|
|
1255
1355
|
assert_eq!(result.slide_count, 1);
|
|
1256
1356
|
assert!(
|
|
@@ -1265,7 +1365,7 @@ mod tests {
|
|
|
1265
1365
|
#[test]
|
|
1266
1366
|
fn test_extract_pptx_from_bytes_multiple_slides() {
|
|
1267
1367
|
let pptx_bytes = create_test_pptx_bytes(vec!["Slide 1", "Slide 2", "Slide 3"]);
|
|
1268
|
-
let result = extract_pptx_from_bytes(&pptx_bytes, false).unwrap();
|
|
1368
|
+
let result = extract_pptx_from_bytes(&pptx_bytes, false, None).unwrap();
|
|
1269
1369
|
|
|
1270
1370
|
assert_eq!(result.slide_count, 3);
|
|
1271
1371
|
assert!(result.content.contains("Slide 1"));
|
|
@@ -1276,18 +1376,15 @@ mod tests {
|
|
|
1276
1376
|
#[test]
|
|
1277
1377
|
fn test_extract_pptx_metadata() {
|
|
1278
1378
|
let pptx_bytes = create_test_pptx_bytes(vec!["Content"]);
|
|
1279
|
-
let result = extract_pptx_from_bytes(&pptx_bytes, false).unwrap();
|
|
1379
|
+
let result = extract_pptx_from_bytes(&pptx_bytes, false, None).unwrap();
|
|
1280
1380
|
|
|
1281
|
-
|
|
1282
|
-
assert_eq!(result.metadata.author, Some("Test Author".to_string()));
|
|
1283
|
-
assert_eq!(result.metadata.description, Some("Test Description".to_string()));
|
|
1284
|
-
assert_eq!(result.metadata.summary, Some("Test Subject".to_string()));
|
|
1381
|
+
assert!(result.metadata.fonts.is_empty() || !result.metadata.fonts.is_empty());
|
|
1285
1382
|
}
|
|
1286
1383
|
|
|
1287
1384
|
#[test]
|
|
1288
1385
|
fn test_extract_pptx_empty_slides() {
|
|
1289
1386
|
let pptx_bytes = create_test_pptx_bytes(vec!["", "", ""]);
|
|
1290
|
-
let result = extract_pptx_from_bytes(&pptx_bytes, false).unwrap();
|
|
1387
|
+
let result = extract_pptx_from_bytes(&pptx_bytes, false, None).unwrap();
|
|
1291
1388
|
|
|
1292
1389
|
assert_eq!(result.slide_count, 3);
|
|
1293
1390
|
}
|
|
@@ -1295,7 +1392,7 @@ mod tests {
|
|
|
1295
1392
|
#[test]
|
|
1296
1393
|
fn test_extract_pptx_from_bytes_invalid_data() {
|
|
1297
1394
|
let invalid_bytes = b"not a valid pptx file";
|
|
1298
|
-
let result = extract_pptx_from_bytes(invalid_bytes, false);
|
|
1395
|
+
let result = extract_pptx_from_bytes(invalid_bytes, false, None);
|
|
1299
1396
|
|
|
1300
1397
|
assert!(result.is_err());
|
|
1301
1398
|
if let Err(KreuzbergError::Parsing { message: msg, .. }) = result {
|
|
@@ -1308,7 +1405,7 @@ mod tests {
|
|
|
1308
1405
|
#[test]
|
|
1309
1406
|
fn test_extract_pptx_from_bytes_empty_data() {
|
|
1310
1407
|
let empty_bytes: &[u8] = &[];
|
|
1311
|
-
let result = extract_pptx_from_bytes(empty_bytes, false);
|
|
1408
|
+
let result = extract_pptx_from_bytes(empty_bytes, false, None);
|
|
1312
1409
|
|
|
1313
1410
|
assert!(result.is_err());
|
|
1314
1411
|
}
|
|
@@ -1408,7 +1505,8 @@ mod tests {
|
|
|
1408
1505
|
builder.add_text("Hello");
|
|
1409
1506
|
builder.add_text(" ");
|
|
1410
1507
|
builder.add_text("World");
|
|
1411
|
-
|
|
1508
|
+
let (content, _, _) = builder.build();
|
|
1509
|
+
assert_eq!(content, "HelloWorld");
|
|
1412
1510
|
}
|
|
1413
1511
|
|
|
1414
1512
|
#[test]
|
|
@@ -1416,28 +1514,32 @@ mod tests {
|
|
|
1416
1514
|
let mut builder = ContentBuilder::new();
|
|
1417
1515
|
builder.add_text(" ");
|
|
1418
1516
|
builder.add_text("");
|
|
1419
|
-
|
|
1517
|
+
let (content, _, _) = builder.build();
|
|
1518
|
+
assert_eq!(content, "");
|
|
1420
1519
|
}
|
|
1421
1520
|
|
|
1422
1521
|
#[test]
|
|
1423
1522
|
fn test_content_builder_add_title() {
|
|
1424
1523
|
let mut builder = ContentBuilder::new();
|
|
1425
1524
|
builder.add_title("Title");
|
|
1426
|
-
|
|
1525
|
+
let (content, _, _) = builder.build();
|
|
1526
|
+
assert_eq!(content, "# Title");
|
|
1427
1527
|
}
|
|
1428
1528
|
|
|
1429
1529
|
#[test]
|
|
1430
1530
|
fn test_content_builder_add_title_with_whitespace() {
|
|
1431
1531
|
let mut builder = ContentBuilder::new();
|
|
1432
1532
|
builder.add_title(" Title ");
|
|
1433
|
-
|
|
1533
|
+
let (content, _, _) = builder.build();
|
|
1534
|
+
assert_eq!(content, "# Title");
|
|
1434
1535
|
}
|
|
1435
1536
|
|
|
1436
1537
|
#[test]
|
|
1437
1538
|
fn test_content_builder_add_table_empty() {
|
|
1438
1539
|
let mut builder = ContentBuilder::new();
|
|
1439
1540
|
builder.add_table(&[]);
|
|
1440
|
-
|
|
1541
|
+
let (content, _, _) = builder.build();
|
|
1542
|
+
assert_eq!(content, "");
|
|
1441
1543
|
}
|
|
1442
1544
|
|
|
1443
1545
|
#[test]
|
|
@@ -1446,9 +1548,9 @@ mod tests {
|
|
|
1446
1548
|
let rows = vec![vec!["Header1".to_string(), "Header2".to_string()]];
|
|
1447
1549
|
builder.add_table(&rows);
|
|
1448
1550
|
let result = builder.build();
|
|
1449
|
-
assert!(result.contains("<table>"));
|
|
1450
|
-
assert!(result.contains("<th>Header1</th>"));
|
|
1451
|
-
assert!(result.contains("<th>Header2</th>"));
|
|
1551
|
+
assert!(result.0.contains("<table>"));
|
|
1552
|
+
assert!(result.0.contains("<th>Header1</th>"));
|
|
1553
|
+
assert!(result.0.contains("<th>Header2</th>"));
|
|
1452
1554
|
}
|
|
1453
1555
|
|
|
1454
1556
|
#[test]
|
|
@@ -1460,8 +1562,8 @@ mod tests {
|
|
|
1460
1562
|
];
|
|
1461
1563
|
builder.add_table(&rows);
|
|
1462
1564
|
let result = builder.build();
|
|
1463
|
-
assert!(result.contains("<th>H1</th>"));
|
|
1464
|
-
assert!(result.contains("<td>D1</td>"));
|
|
1565
|
+
assert!(result.0.contains("<th>H1</th>"));
|
|
1566
|
+
assert!(result.0.contains("<td>D1</td>"));
|
|
1465
1567
|
}
|
|
1466
1568
|
|
|
1467
1569
|
#[test]
|
|
@@ -1470,8 +1572,8 @@ mod tests {
|
|
|
1470
1572
|
let rows = vec![vec!["<tag>".to_string(), "a & b".to_string()]];
|
|
1471
1573
|
builder.add_table(&rows);
|
|
1472
1574
|
let result = builder.build();
|
|
1473
|
-
assert!(result.contains("<tag>"));
|
|
1474
|
-
assert!(result.contains("a & b"));
|
|
1575
|
+
assert!(result.0.contains("<tag>"));
|
|
1576
|
+
assert!(result.0.contains("a & b"));
|
|
1475
1577
|
}
|
|
1476
1578
|
|
|
1477
1579
|
#[test]
|
|
@@ -1480,8 +1582,8 @@ mod tests {
|
|
|
1480
1582
|
builder.add_list_item(1, false, "Item 1");
|
|
1481
1583
|
builder.add_list_item(1, false, "Item 2");
|
|
1482
1584
|
let result = builder.build();
|
|
1483
|
-
assert!(result.contains("- Item 1"));
|
|
1484
|
-
assert!(result.contains("- Item 2"));
|
|
1585
|
+
assert!(result.0.contains("- Item 1"));
|
|
1586
|
+
assert!(result.0.contains("- Item 2"));
|
|
1485
1587
|
}
|
|
1486
1588
|
|
|
1487
1589
|
#[test]
|
|
@@ -1490,8 +1592,8 @@ mod tests {
|
|
|
1490
1592
|
builder.add_list_item(1, true, "First");
|
|
1491
1593
|
builder.add_list_item(1, true, "Second");
|
|
1492
1594
|
let result = builder.build();
|
|
1493
|
-
assert!(result.contains("1. First"));
|
|
1494
|
-
assert!(result.contains("1. Second"));
|
|
1595
|
+
assert!(result.0.contains("1. First"));
|
|
1596
|
+
assert!(result.0.contains("1. Second"));
|
|
1495
1597
|
}
|
|
1496
1598
|
|
|
1497
1599
|
#[test]
|
|
@@ -1501,9 +1603,9 @@ mod tests {
|
|
|
1501
1603
|
builder.add_list_item(2, false, "Level 2");
|
|
1502
1604
|
builder.add_list_item(3, false, "Level 3");
|
|
1503
1605
|
let result = builder.build();
|
|
1504
|
-
assert!(result.contains("- Level 1"));
|
|
1505
|
-
assert!(result.contains(" - Level 2"));
|
|
1506
|
-
assert!(result.contains(" - Level 3"));
|
|
1606
|
+
assert!(result.0.contains("- Level 1"));
|
|
1607
|
+
assert!(result.0.contains(" - Level 2"));
|
|
1608
|
+
assert!(result.0.contains(" - Level 3"));
|
|
1507
1609
|
}
|
|
1508
1610
|
|
|
1509
1611
|
#[test]
|
|
@@ -1511,7 +1613,7 @@ mod tests {
|
|
|
1511
1613
|
let mut builder = ContentBuilder::new();
|
|
1512
1614
|
builder.add_image("img123", 5);
|
|
1513
1615
|
let result = builder.build();
|
|
1514
|
-
assert!(result.contains(""));
|
|
1616
|
+
assert!(result.0.contains(""));
|
|
1515
1617
|
}
|
|
1516
1618
|
|
|
1517
1619
|
#[test]
|
|
@@ -1519,15 +1621,16 @@ mod tests {
|
|
|
1519
1621
|
let mut builder = ContentBuilder::new();
|
|
1520
1622
|
builder.add_notes("This is a note");
|
|
1521
1623
|
let result = builder.build();
|
|
1522
|
-
assert!(result.contains("### Notes:"));
|
|
1523
|
-
assert!(result.contains("This is a note"));
|
|
1624
|
+
assert!(result.0.contains("### Notes:"));
|
|
1625
|
+
assert!(result.0.contains("This is a note"));
|
|
1524
1626
|
}
|
|
1525
1627
|
|
|
1526
1628
|
#[test]
|
|
1527
1629
|
fn test_content_builder_add_notes_empty() {
|
|
1528
1630
|
let mut builder = ContentBuilder::new();
|
|
1529
1631
|
builder.add_notes(" ");
|
|
1530
|
-
|
|
1632
|
+
let (content, _, _) = builder.build();
|
|
1633
|
+
assert_eq!(content, "");
|
|
1531
1634
|
}
|
|
1532
1635
|
|
|
1533
1636
|
#[test]
|
|
@@ -1535,7 +1638,7 @@ mod tests {
|
|
|
1535
1638
|
let mut builder = ContentBuilder::new();
|
|
1536
1639
|
builder.add_slide_header(3);
|
|
1537
1640
|
let result = builder.build();
|
|
1538
|
-
assert!(result.contains("<!-- Slide number: 3 -->"));
|
|
1641
|
+
assert!(result.0.contains("<!-- Slide number: 3 -->"));
|
|
1539
1642
|
}
|
|
1540
1643
|
|
|
1541
1644
|
#[test]
|
|
@@ -2203,7 +2306,7 @@ mod tests {
|
|
|
2203
2306
|
vec!["Row 2 Col 1", "Row 2 Col 2", "Row 2 Col 3"],
|
|
2204
2307
|
]);
|
|
2205
2308
|
|
|
2206
|
-
let result = extract_pptx_from_bytes(&pptx_bytes, false).unwrap();
|
|
2309
|
+
let result = extract_pptx_from_bytes(&pptx_bytes, false, None).unwrap();
|
|
2207
2310
|
|
|
2208
2311
|
assert_eq!(result.table_count, 1, "Should detect one table");
|
|
2209
2312
|
assert!(result.content.contains("<table>"), "Should contain table tag");
|
|
@@ -2235,7 +2338,7 @@ mod tests {
|
|
|
2235
2338
|
vec!["A4", "B4", "C4", "D4"],
|
|
2236
2339
|
]);
|
|
2237
2340
|
|
|
2238
|
-
let result = extract_pptx_from_bytes(&pptx_bytes, false).unwrap();
|
|
2341
|
+
let result = extract_pptx_from_bytes(&pptx_bytes, false, None).unwrap();
|
|
2239
2342
|
|
|
2240
2343
|
assert_eq!(result.table_count, 1, "Should detect one table");
|
|
2241
2344
|
assert!(result.content.contains("<tr>"), "Should contain table rows");
|
|
@@ -2250,7 +2353,7 @@ mod tests {
|
|
|
2250
2353
|
fn test_table_counting_via_slide_metadata_succeeds() {
|
|
2251
2354
|
let pptx_bytes = create_pptx_with_table(vec![vec!["Col1", "Col2"], vec!["Val1", "Val2"]]);
|
|
2252
2355
|
|
|
2253
|
-
let result = extract_pptx_from_bytes(&pptx_bytes, false).unwrap();
|
|
2356
|
+
let result = extract_pptx_from_bytes(&pptx_bytes, false, None).unwrap();
|
|
2254
2357
|
|
|
2255
2358
|
assert_eq!(result.table_count, 1, "table_count should be 1");
|
|
2256
2359
|
}
|
|
@@ -2262,7 +2365,7 @@ mod tests {
|
|
|
2262
2365
|
vec!["Cell data 1", "Cell data 2"],
|
|
2263
2366
|
]);
|
|
2264
2367
|
|
|
2265
|
-
let result = extract_pptx_from_bytes(&pptx_bytes, false).unwrap();
|
|
2368
|
+
let result = extract_pptx_from_bytes(&pptx_bytes, false, None).unwrap();
|
|
2266
2369
|
|
|
2267
2370
|
assert!(result.content.contains("<table>"), "Should contain table tag");
|
|
2268
2371
|
assert!(
|
|
@@ -2278,7 +2381,7 @@ mod tests {
|
|
|
2278
2381
|
#[test]
|
|
2279
2382
|
fn test_table_extraction_empty_table_returns_one_count() {
|
|
2280
2383
|
let pptx_bytes = create_pptx_with_table(vec![]);
|
|
2281
|
-
let result = extract_pptx_from_bytes(&pptx_bytes, false).unwrap();
|
|
2384
|
+
let result = extract_pptx_from_bytes(&pptx_bytes, false, None).unwrap();
|
|
2282
2385
|
|
|
2283
2386
|
assert_eq!(result.table_count, 1, "Empty table structure should be detected");
|
|
2284
2387
|
assert!(!result.content.contains("<td>"), "Empty table should have no cells");
|
|
@@ -2292,7 +2395,7 @@ mod tests {
|
|
|
2292
2395
|
(1, true, "Third item"),
|
|
2293
2396
|
]);
|
|
2294
2397
|
|
|
2295
|
-
let result = extract_pptx_from_bytes(&pptx_bytes, false).unwrap();
|
|
2398
|
+
let result = extract_pptx_from_bytes(&pptx_bytes, false, None).unwrap();
|
|
2296
2399
|
|
|
2297
2400
|
assert!(
|
|
2298
2401
|
result.content.contains("1. First item"),
|
|
@@ -2316,7 +2419,7 @@ mod tests {
|
|
|
2316
2419
|
(1, false, "Bullet three"),
|
|
2317
2420
|
]);
|
|
2318
2421
|
|
|
2319
|
-
let result = extract_pptx_from_bytes(&pptx_bytes, false).unwrap();
|
|
2422
|
+
let result = extract_pptx_from_bytes(&pptx_bytes, false, None).unwrap();
|
|
2320
2423
|
|
|
2321
2424
|
assert!(result.content.contains("- Bullet one"), "Should contain bullet point 1");
|
|
2322
2425
|
assert!(result.content.contains("- Bullet two"), "Should contain bullet point 2");
|
|
@@ -2336,7 +2439,7 @@ mod tests {
|
|
|
2336
2439
|
(1, false, "Back to Level 1"),
|
|
2337
2440
|
]);
|
|
2338
2441
|
|
|
2339
|
-
let result = extract_pptx_from_bytes(&pptx_bytes, false).unwrap();
|
|
2442
|
+
let result = extract_pptx_from_bytes(&pptx_bytes, false, None).unwrap();
|
|
2340
2443
|
|
|
2341
2444
|
assert!(
|
|
2342
2445
|
result.content.contains("- Level 1 Item"),
|
|
@@ -2365,7 +2468,7 @@ mod tests {
|
|
|
2365
2468
|
(1, true, "Ordered item 2"),
|
|
2366
2469
|
]);
|
|
2367
2470
|
|
|
2368
|
-
let result = extract_pptx_from_bytes(&pptx_bytes, false).unwrap();
|
|
2471
|
+
let result = extract_pptx_from_bytes(&pptx_bytes, false, None).unwrap();
|
|
2369
2472
|
|
|
2370
2473
|
assert!(
|
|
2371
2474
|
result.content.contains("1. Ordered item 1"),
|
|
@@ -2384,7 +2487,7 @@ mod tests {
|
|
|
2384
2487
|
#[test]
|
|
2385
2488
|
fn test_image_extraction_from_slide_xml_succeeds() {
|
|
2386
2489
|
let pptx_bytes = create_pptx_with_images();
|
|
2387
|
-
let result = extract_pptx_from_bytes(&pptx_bytes, true).unwrap();
|
|
2490
|
+
let result = extract_pptx_from_bytes(&pptx_bytes, true, None).unwrap();
|
|
2388
2491
|
|
|
2389
2492
|
assert_eq!(result.image_count, 2, "Should detect 2 images");
|
|
2390
2493
|
assert!(!result.images.is_empty(), "Should extract image data");
|
|
@@ -2393,7 +2496,7 @@ mod tests {
|
|
|
2393
2496
|
#[test]
|
|
2394
2497
|
fn test_image_data_loading_from_zip_archive_succeeds() {
|
|
2395
2498
|
let pptx_bytes = create_pptx_with_images();
|
|
2396
|
-
let result = extract_pptx_from_bytes(&pptx_bytes, true).unwrap();
|
|
2499
|
+
let result = extract_pptx_from_bytes(&pptx_bytes, true, None).unwrap();
|
|
2397
2500
|
|
|
2398
2501
|
assert_eq!(result.images.len(), 2, "Should load 2 images");
|
|
2399
2502
|
|
|
@@ -2405,7 +2508,7 @@ mod tests {
|
|
|
2405
2508
|
#[test]
|
|
2406
2509
|
fn test_image_format_detection_succeeds() {
|
|
2407
2510
|
let pptx_bytes = create_pptx_with_images();
|
|
2408
|
-
let result = extract_pptx_from_bytes(&pptx_bytes, true).unwrap();
|
|
2511
|
+
let result = extract_pptx_from_bytes(&pptx_bytes, true, None).unwrap();
|
|
2409
2512
|
|
|
2410
2513
|
assert_eq!(result.images.len(), 2, "Should have 2 images");
|
|
2411
2514
|
|
|
@@ -2418,7 +2521,7 @@ mod tests {
|
|
|
2418
2521
|
#[test]
|
|
2419
2522
|
fn test_image_counting_via_result_metadata_succeeds() {
|
|
2420
2523
|
let pptx_bytes = create_pptx_with_images();
|
|
2421
|
-
let result = extract_pptx_from_bytes(&pptx_bytes, true).unwrap();
|
|
2524
|
+
let result = extract_pptx_from_bytes(&pptx_bytes, true, None).unwrap();
|
|
2422
2525
|
|
|
2423
2526
|
assert_eq!(result.image_count, 2, "image_count should match actual images");
|
|
2424
2527
|
assert_eq!(result.images.len(), 2, "images vector should have 2 elements");
|
|
@@ -2427,7 +2530,7 @@ mod tests {
|
|
|
2427
2530
|
#[test]
|
|
2428
2531
|
fn test_image_extraction_disabled_returns_zero_images() {
|
|
2429
2532
|
let pptx_bytes = create_pptx_with_images();
|
|
2430
|
-
let result = extract_pptx_from_bytes(&pptx_bytes, false).unwrap();
|
|
2533
|
+
let result = extract_pptx_from_bytes(&pptx_bytes, false, None).unwrap();
|
|
2431
2534
|
|
|
2432
2535
|
assert_eq!(
|
|
2433
2536
|
result.image_count, 2,
|
|
@@ -2439,7 +2542,7 @@ mod tests {
|
|
|
2439
2542
|
#[test]
|
|
2440
2543
|
fn test_multiple_images_per_slide_extraction_succeeds() {
|
|
2441
2544
|
let pptx_bytes = create_pptx_with_images();
|
|
2442
|
-
let result = extract_pptx_from_bytes(&pptx_bytes, true).unwrap();
|
|
2545
|
+
let result = extract_pptx_from_bytes(&pptx_bytes, true, None).unwrap();
|
|
2443
2546
|
|
|
2444
2547
|
assert_eq!(result.slide_count, 1, "Should have 1 slide");
|
|
2445
2548
|
assert_eq!(result.image_count, 2, "Single slide should contain 2 images");
|
|
@@ -2452,7 +2555,7 @@ mod tests {
|
|
|
2452
2555
|
#[test]
|
|
2453
2556
|
fn test_formatting_bold_text_renders_as_markdown_bold() {
|
|
2454
2557
|
let pptx_bytes = create_pptx_with_formatting();
|
|
2455
|
-
let result = extract_pptx_from_bytes(&pptx_bytes, false).unwrap();
|
|
2558
|
+
let result = extract_pptx_from_bytes(&pptx_bytes, false, None).unwrap();
|
|
2456
2559
|
|
|
2457
2560
|
assert!(
|
|
2458
2561
|
result.content.contains("**Bold text"),
|
|
@@ -2463,7 +2566,7 @@ mod tests {
|
|
|
2463
2566
|
#[test]
|
|
2464
2567
|
fn test_formatting_italic_text_renders_as_markdown_italic() {
|
|
2465
2568
|
let pptx_bytes = create_pptx_with_formatting();
|
|
2466
|
-
let result = extract_pptx_from_bytes(&pptx_bytes, false).unwrap();
|
|
2569
|
+
let result = extract_pptx_from_bytes(&pptx_bytes, false, None).unwrap();
|
|
2467
2570
|
|
|
2468
2571
|
assert!(
|
|
2469
2572
|
result.content.contains("*Italic text"),
|
|
@@ -2474,7 +2577,7 @@ mod tests {
|
|
|
2474
2577
|
#[test]
|
|
2475
2578
|
fn test_formatting_underline_text_renders_as_html_underline() {
|
|
2476
2579
|
let pptx_bytes = create_pptx_with_formatting();
|
|
2477
|
-
let result = extract_pptx_from_bytes(&pptx_bytes, false).unwrap();
|
|
2580
|
+
let result = extract_pptx_from_bytes(&pptx_bytes, false, None).unwrap();
|
|
2478
2581
|
|
|
2479
2582
|
assert!(
|
|
2480
2583
|
result.content.contains("<u>Underline text"),
|
|
@@ -2485,7 +2588,7 @@ mod tests {
|
|
|
2485
2588
|
#[test]
|
|
2486
2589
|
fn test_formatting_combined_bold_italic_renders_correctly() {
|
|
2487
2590
|
let pptx_bytes = create_pptx_with_formatting();
|
|
2488
|
-
let result = extract_pptx_from_bytes(&pptx_bytes, false).unwrap();
|
|
2591
|
+
let result = extract_pptx_from_bytes(&pptx_bytes, false, None).unwrap();
|
|
2489
2592
|
|
|
2490
2593
|
assert!(
|
|
2491
2594
|
result.content.contains("***Bold italic text"),
|
|
@@ -2711,7 +2814,7 @@ mod tests {
|
|
|
2711
2814
|
let _ = zip.finish().unwrap();
|
|
2712
2815
|
}
|
|
2713
2816
|
|
|
2714
|
-
let result = extract_pptx_from_bytes(&buffer, true).unwrap();
|
|
2817
|
+
let result = extract_pptx_from_bytes(&buffer, true, None).unwrap();
|
|
2715
2818
|
|
|
2716
2819
|
assert!(
|
|
2717
2820
|
result.content.contains("**Title with Bold"),
|
|
@@ -2850,7 +2953,7 @@ mod tests {
|
|
|
2850
2953
|
let _ = zip.finish().unwrap();
|
|
2851
2954
|
}
|
|
2852
2955
|
|
|
2853
|
-
let result = extract_pptx_from_bytes(&buffer, false).unwrap();
|
|
2956
|
+
let result = extract_pptx_from_bytes(&buffer, false, None).unwrap();
|
|
2854
2957
|
|
|
2855
2958
|
let content = result.content;
|
|
2856
2959
|
let top_left_pos = content.find("Top Left").unwrap();
|
|
@@ -2977,7 +3080,7 @@ mod tests {
|
|
|
2977
3080
|
let _ = zip.finish().unwrap();
|
|
2978
3081
|
}
|
|
2979
3082
|
|
|
2980
|
-
let result = extract_pptx_from_bytes(&buffer, false).unwrap();
|
|
3083
|
+
let result = extract_pptx_from_bytes(&buffer, false, None).unwrap();
|
|
2981
3084
|
|
|
2982
3085
|
assert!(result.content.contains("Slide Content"), "Should contain slide content");
|
|
2983
3086
|
assert!(result.content.contains("### Notes:"), "Should contain notes header");
|
|
@@ -2990,11 +3093,8 @@ mod tests {
|
|
|
2990
3093
|
#[test]
|
|
2991
3094
|
fn test_integration_metadata_extraction_complete() {
|
|
2992
3095
|
let pptx_bytes = create_test_pptx_bytes(vec!["Content"]);
|
|
2993
|
-
let result = extract_pptx_from_bytes(&pptx_bytes, false).unwrap();
|
|
3096
|
+
let result = extract_pptx_from_bytes(&pptx_bytes, false, None).unwrap();
|
|
2994
3097
|
|
|
2995
|
-
|
|
2996
|
-
assert_eq!(result.metadata.author, Some("Test Author".to_string()));
|
|
2997
|
-
assert_eq!(result.metadata.description, Some("Test Description".to_string()));
|
|
2998
|
-
assert_eq!(result.metadata.summary, Some("Test Subject".to_string()));
|
|
3098
|
+
let _ = &result.metadata.fonts;
|
|
2999
3099
|
}
|
|
3000
3100
|
}
|