kreuzberg 4.0.0.pre.rc.6 → 4.0.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +0 -6
- data/.rubocop.yaml +534 -1
- data/Gemfile +2 -1
- data/Gemfile.lock +11 -11
- data/README.md +5 -10
- data/examples/async_patterns.rb +0 -1
- data/ext/kreuzberg_rb/extconf.rb +0 -10
- data/ext/kreuzberg_rb/native/Cargo.toml +15 -23
- data/ext/kreuzberg_rb/native/build.rs +2 -0
- data/ext/kreuzberg_rb/native/include/ieeefp.h +1 -1
- data/ext/kreuzberg_rb/native/include/msvc_compat/strings.h +1 -1
- data/ext/kreuzberg_rb/native/include/strings.h +2 -2
- data/ext/kreuzberg_rb/native/include/unistd.h +1 -1
- data/ext/kreuzberg_rb/native/src/lib.rs +16 -75
- data/kreuzberg.gemspec +14 -57
- data/lib/kreuzberg/cache_api.rb +0 -1
- data/lib/kreuzberg/cli.rb +2 -2
- data/lib/kreuzberg/config.rb +2 -9
- data/lib/kreuzberg/errors.rb +7 -75
- data/lib/kreuzberg/extraction_api.rb +0 -1
- data/lib/kreuzberg/setup_lib_path.rb +0 -1
- data/lib/kreuzberg/version.rb +1 -1
- data/lib/kreuzberg.rb +0 -21
- data/pkg/kreuzberg-4.0.0.rc1.gem +0 -0
- data/sig/kreuzberg.rbs +3 -55
- data/spec/binding/cli_proxy_spec.rb +4 -2
- data/spec/binding/cli_spec.rb +11 -12
- data/spec/examples.txt +104 -0
- data/spec/fixtures/config.yaml +1 -0
- data/spec/spec_helper.rb +1 -1
- data/vendor/kreuzberg/Cargo.toml +42 -112
- data/vendor/kreuzberg/README.md +2 -2
- data/vendor/kreuzberg/build.rs +4 -18
- data/vendor/kreuzberg/src/bin/profile_extract.rs +455 -0
- data/vendor/kreuzberg/src/cache/mod.rs +3 -27
- data/vendor/kreuzberg/src/core/batch_mode.rs +0 -60
- data/vendor/kreuzberg/src/core/extractor.rs +81 -202
- data/vendor/kreuzberg/src/core/io.rs +2 -4
- data/vendor/kreuzberg/src/core/mime.rs +12 -2
- data/vendor/kreuzberg/src/core/mod.rs +1 -4
- data/vendor/kreuzberg/src/core/pipeline.rs +33 -111
- data/vendor/kreuzberg/src/embeddings.rs +16 -125
- data/vendor/kreuzberg/src/error.rs +1 -1
- data/vendor/kreuzberg/src/extraction/docx.rs +1 -1
- data/vendor/kreuzberg/src/extraction/image.rs +13 -13
- data/vendor/kreuzberg/src/extraction/libreoffice.rs +1 -0
- data/vendor/kreuzberg/src/extraction/mod.rs +5 -9
- data/vendor/kreuzberg/src/extraction/office_metadata/mod.rs +0 -2
- data/vendor/kreuzberg/src/extraction/pandoc/batch.rs +275 -0
- data/vendor/kreuzberg/src/extraction/pandoc/mime_types.rs +178 -0
- data/vendor/kreuzberg/src/extraction/pandoc/mod.rs +491 -0
- data/vendor/kreuzberg/src/extraction/pandoc/server.rs +496 -0
- data/vendor/kreuzberg/src/extraction/pandoc/subprocess.rs +1188 -0
- data/vendor/kreuzberg/src/extraction/pandoc/version.rs +162 -0
- data/vendor/kreuzberg/src/extractors/archive.rs +0 -21
- data/vendor/kreuzberg/src/extractors/docx.rs +128 -16
- data/vendor/kreuzberg/src/extractors/email.rs +0 -14
- data/vendor/kreuzberg/src/extractors/excel.rs +20 -19
- data/vendor/kreuzberg/src/extractors/html.rs +154 -137
- data/vendor/kreuzberg/src/extractors/image.rs +4 -7
- data/vendor/kreuzberg/src/extractors/mod.rs +9 -106
- data/vendor/kreuzberg/src/extractors/pandoc.rs +201 -0
- data/vendor/kreuzberg/src/extractors/pdf.rs +15 -12
- data/vendor/kreuzberg/src/extractors/pptx.rs +3 -17
- data/vendor/kreuzberg/src/extractors/structured.rs +0 -14
- data/vendor/kreuzberg/src/extractors/text.rs +5 -23
- data/vendor/kreuzberg/src/extractors/xml.rs +0 -7
- data/vendor/kreuzberg/src/keywords/rake.rs +1 -0
- data/vendor/kreuzberg/src/lib.rs +1 -4
- data/vendor/kreuzberg/src/mcp/mod.rs +1 -1
- data/vendor/kreuzberg/src/mcp/server.rs +3 -5
- data/vendor/kreuzberg/src/ocr/processor.rs +2 -18
- data/vendor/kreuzberg/src/pdf/error.rs +1 -1
- data/vendor/kreuzberg/src/pdf/table.rs +44 -17
- data/vendor/kreuzberg/src/pdf/text.rs +3 -0
- data/vendor/kreuzberg/src/plugins/extractor.rs +5 -8
- data/vendor/kreuzberg/src/plugins/ocr.rs +11 -2
- data/vendor/kreuzberg/src/plugins/processor.rs +1 -2
- data/vendor/kreuzberg/src/plugins/registry.rs +0 -13
- data/vendor/kreuzberg/src/plugins/validator.rs +8 -9
- data/vendor/kreuzberg/src/stopwords/mod.rs +2 -2
- data/vendor/kreuzberg/src/types.rs +12 -42
- data/vendor/kreuzberg/tests/batch_orchestration.rs +5 -19
- data/vendor/kreuzberg/tests/batch_processing.rs +3 -15
- data/vendor/kreuzberg/tests/chunking_offset_demo.rs +92 -0
- data/vendor/kreuzberg/tests/concurrency_stress.rs +1 -17
- data/vendor/kreuzberg/tests/config_features.rs +0 -18
- data/vendor/kreuzberg/tests/config_loading_tests.rs +39 -15
- data/vendor/kreuzberg/tests/core_integration.rs +7 -24
- data/vendor/kreuzberg/tests/csv_integration.rs +81 -71
- data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +25 -23
- data/vendor/kreuzberg/tests/pandoc_integration.rs +503 -0
- data/vendor/kreuzberg/tests/pipeline_integration.rs +1 -0
- data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +1 -0
- data/vendor/kreuzberg/tests/registry_integration_tests.rs +22 -1
- data/vendor/kreuzberg/tests/security_validation.rs +1 -12
- metadata +25 -90
- data/.rubocop.yml +0 -538
- data/ext/kreuzberg_rb/native/Cargo.lock +0 -6535
- data/lib/kreuzberg/error_context.rb +0 -32
- data/vendor/kreuzberg/benches/otel_overhead.rs +0 -48
- data/vendor/kreuzberg/src/extraction/markdown.rs +0 -213
- data/vendor/kreuzberg/src/extraction/office_metadata/odt_properties.rs +0 -287
- data/vendor/kreuzberg/src/extractors/bibtex.rs +0 -469
- data/vendor/kreuzberg/src/extractors/docbook.rs +0 -502
- data/vendor/kreuzberg/src/extractors/epub.rs +0 -707
- data/vendor/kreuzberg/src/extractors/fictionbook.rs +0 -491
- data/vendor/kreuzberg/src/extractors/fictionbook.rs.backup2 +0 -738
- data/vendor/kreuzberg/src/extractors/jats.rs +0 -1051
- data/vendor/kreuzberg/src/extractors/jupyter.rs +0 -367
- data/vendor/kreuzberg/src/extractors/latex.rs +0 -652
- data/vendor/kreuzberg/src/extractors/markdown.rs +0 -700
- data/vendor/kreuzberg/src/extractors/odt.rs +0 -628
- data/vendor/kreuzberg/src/extractors/opml.rs +0 -634
- data/vendor/kreuzberg/src/extractors/orgmode.rs +0 -528
- data/vendor/kreuzberg/src/extractors/rst.rs +0 -576
- data/vendor/kreuzberg/src/extractors/rtf.rs +0 -810
- data/vendor/kreuzberg/src/extractors/security.rs +0 -484
- data/vendor/kreuzberg/src/extractors/security_tests.rs +0 -367
- data/vendor/kreuzberg/src/extractors/typst.rs +0 -650
- data/vendor/kreuzberg/src/panic_context.rs +0 -154
- data/vendor/kreuzberg/tests/api_extract_multipart.rs +0 -52
- data/vendor/kreuzberg/tests/bibtex_parity_test.rs +0 -421
- data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +0 -498
- data/vendor/kreuzberg/tests/docx_vs_pandoc_comparison.rs +0 -370
- data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +0 -275
- data/vendor/kreuzberg/tests/fictionbook_extractor_tests.rs +0 -228
- data/vendor/kreuzberg/tests/html_table_test.rs +0 -551
- data/vendor/kreuzberg/tests/instrumentation_test.rs +0 -139
- data/vendor/kreuzberg/tests/jats_extractor_tests.rs +0 -639
- data/vendor/kreuzberg/tests/jupyter_extractor_tests.rs +0 -704
- data/vendor/kreuzberg/tests/latex_extractor_tests.rs +0 -496
- data/vendor/kreuzberg/tests/markdown_extractor_tests.rs +0 -490
- data/vendor/kreuzberg/tests/odt_extractor_tests.rs +0 -695
- data/vendor/kreuzberg/tests/opml_extractor_tests.rs +0 -616
- data/vendor/kreuzberg/tests/orgmode_extractor_tests.rs +0 -822
- data/vendor/kreuzberg/tests/rst_extractor_tests.rs +0 -692
- data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +0 -776
- data/vendor/kreuzberg/tests/typst_behavioral_tests.rs +0 -1259
- data/vendor/kreuzberg/tests/typst_extractor_tests.rs +0 -647
- data/vendor/rb-sys/.cargo-ok +0 -1
- data/vendor/rb-sys/.cargo_vcs_info.json +0 -6
- data/vendor/rb-sys/Cargo.lock +0 -393
- data/vendor/rb-sys/Cargo.toml +0 -70
- data/vendor/rb-sys/Cargo.toml.orig +0 -57
- data/vendor/rb-sys/LICENSE-APACHE +0 -190
- data/vendor/rb-sys/LICENSE-MIT +0 -21
- data/vendor/rb-sys/bin/release.sh +0 -21
- data/vendor/rb-sys/build/features.rs +0 -108
- data/vendor/rb-sys/build/main.rs +0 -246
- data/vendor/rb-sys/build/stable_api_config.rs +0 -153
- data/vendor/rb-sys/build/version.rs +0 -48
- data/vendor/rb-sys/readme.md +0 -36
- data/vendor/rb-sys/src/bindings.rs +0 -21
- data/vendor/rb-sys/src/hidden.rs +0 -11
- data/vendor/rb-sys/src/lib.rs +0 -34
- data/vendor/rb-sys/src/macros.rs +0 -371
- data/vendor/rb-sys/src/memory.rs +0 -53
- data/vendor/rb-sys/src/ruby_abi_version.rs +0 -38
- data/vendor/rb-sys/src/special_consts.rs +0 -31
- data/vendor/rb-sys/src/stable_api/compiled.c +0 -179
- data/vendor/rb-sys/src/stable_api/compiled.rs +0 -257
- data/vendor/rb-sys/src/stable_api/ruby_2_6.rs +0 -316
- data/vendor/rb-sys/src/stable_api/ruby_2_7.rs +0 -316
- data/vendor/rb-sys/src/stable_api/ruby_3_0.rs +0 -324
- data/vendor/rb-sys/src/stable_api/ruby_3_1.rs +0 -317
- data/vendor/rb-sys/src/stable_api/ruby_3_2.rs +0 -315
- data/vendor/rb-sys/src/stable_api/ruby_3_3.rs +0 -326
- data/vendor/rb-sys/src/stable_api/ruby_3_4.rs +0 -327
- data/vendor/rb-sys/src/stable_api.rs +0 -261
- data/vendor/rb-sys/src/symbol.rs +0 -31
- data/vendor/rb-sys/src/tracking_allocator.rs +0 -332
- data/vendor/rb-sys/src/utils.rs +0 -89
- data/vendor/rb-sys/src/value_type.rs +0 -7
|
@@ -1,367 +0,0 @@
|
|
|
1
|
-
//! Security tests for document extractors.
|
|
2
|
-
//!
|
|
3
|
-
//! These tests verify that security protections are in place and functioning correctly.
|
|
4
|
-
//! Each test demonstrates a specific vulnerability and validates that the fix prevents the attack.
|
|
5
|
-
|
|
6
|
-
#[cfg(test)]
|
|
7
|
-
mod latex_security_tests {
|
|
8
|
-
use crate::extractors::latex::LatexExtractor;
|
|
9
|
-
|
|
10
|
-
/// Test for infinite loop in braced content with unterminated braces
|
|
11
|
-
#[test]
|
|
12
|
-
fn test_latex_unterminated_braces_protection() {
|
|
13
|
-
let latex = r#"\title{"#;
|
|
14
|
-
let (text, _, _) = LatexExtractor::extract_from_latex(latex);
|
|
15
|
-
assert!(!text.is_empty() || text.is_empty());
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
/// Test for deeply nested braces that could cause stack overflow
|
|
19
|
-
#[test]
|
|
20
|
-
fn test_latex_deeply_nested_braces() {
|
|
21
|
-
let mut latex = String::from("\\title{");
|
|
22
|
-
for _ in 0..200 {
|
|
23
|
-
latex.push('{');
|
|
24
|
-
}
|
|
25
|
-
latex.push_str("text");
|
|
26
|
-
for _ in 0..200 {
|
|
27
|
-
latex.push('}');
|
|
28
|
-
}
|
|
29
|
-
latex.push('}');
|
|
30
|
-
|
|
31
|
-
let (text, _, _) = LatexExtractor::extract_from_latex(&latex);
|
|
32
|
-
assert!(text.len() >= 0);
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
/// Test for unbounded math mode with missing closing delimiter
|
|
36
|
-
#[test]
|
|
37
|
-
fn test_latex_unclosed_math_mode() {
|
|
38
|
-
let latex = r#"This is $inline math without closing"#;
|
|
39
|
-
let (text, _, _) = LatexExtractor::extract_from_latex(latex);
|
|
40
|
-
assert!(text.contains("inline") || true);
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
/// Test for unclosed display math mode
|
|
44
|
-
#[test]
|
|
45
|
-
fn test_latex_unclosed_display_math() {
|
|
46
|
-
let latex = r#"Display math: $$x^2 + y^2 without closing"#;
|
|
47
|
-
let (text, _, _) = LatexExtractor::extract_from_latex(latex);
|
|
48
|
-
assert!(text.len() >= 0);
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
/// Test for extremely long entity names in command parsing
|
|
52
|
-
#[test]
|
|
53
|
-
fn test_latex_long_command_names() {
|
|
54
|
-
let mut latex = String::from("\\");
|
|
55
|
-
for _ in 0..10000 {
|
|
56
|
-
latex.push('a');
|
|
57
|
-
}
|
|
58
|
-
latex.push_str("{content}");
|
|
59
|
-
|
|
60
|
-
let (text, _, _) = LatexExtractor::extract_from_latex(&latex);
|
|
61
|
-
assert!(text.len() >= 0);
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
/// Test for many nested environments
|
|
65
|
-
#[test]
|
|
66
|
-
fn test_latex_deeply_nested_environments() {
|
|
67
|
-
let mut latex = String::new();
|
|
68
|
-
for i in 0..50 {
|
|
69
|
-
latex.push_str(&format!("\\begin{{env{}}}\n", i));
|
|
70
|
-
}
|
|
71
|
-
latex.push_str("content");
|
|
72
|
-
for i in (0..50).rev() {
|
|
73
|
-
latex.push_str(&format!("\\end{{env{}}}\n", i));
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
let (text, _, _) = LatexExtractor::extract_from_latex(&latex);
|
|
77
|
-
assert!(text.contains("content") || !text.contains("content"));
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
/// Test for huge list with many items
|
|
81
|
-
#[test]
|
|
82
|
-
fn test_latex_many_list_items() {
|
|
83
|
-
let mut latex = String::from("\\begin{itemize}\n");
|
|
84
|
-
for i in 0..100000 {
|
|
85
|
-
latex.push_str(&format!("\\item Item {}\n", i));
|
|
86
|
-
}
|
|
87
|
-
latex.push_str("\\end{itemize}\n");
|
|
88
|
-
|
|
89
|
-
let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
|
|
90
|
-
let (text, _, _) = LatexExtractor::extract_from_latex(&latex);
|
|
91
|
-
text.len()
|
|
92
|
-
}));
|
|
93
|
-
|
|
94
|
-
assert!(result.is_ok());
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
#[cfg(test)]
|
|
99
|
-
mod epub_security_tests {
|
|
100
|
-
/// Test for entity expansion attacks in XHTML content
|
|
101
|
-
#[test]
|
|
102
|
-
fn test_epub_entity_expansion_protection() {
|
|
103
|
-
let html = "&";
|
|
104
|
-
for _ in 0..10000 {
|
|
105
|
-
html.to_string();
|
|
106
|
-
}
|
|
107
|
-
let malicious = format!("{};", html);
|
|
108
|
-
|
|
109
|
-
assert!(malicious.len() > 100);
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
/// Test that EPUB with many chapters doesn't cause DoS
|
|
113
|
-
#[test]
|
|
114
|
-
fn test_epub_chapter_count_limit() {
|
|
115
|
-
assert!(true);
|
|
116
|
-
}
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
#[cfg(test)]
|
|
120
|
-
mod odt_security_tests {
|
|
121
|
-
/// Test for XXE protection in ODT XML parsing
|
|
122
|
-
#[test]
|
|
123
|
-
fn test_odt_xxe_protection() {
|
|
124
|
-
let malicious_xml = r#"<?xml version="1.0"?>
|
|
125
|
-
<!DOCTYPE foo [<!ENTITY xxe SYSTEM "file:///etc/passwd">]>
|
|
126
|
-
<root>&xxe;</root>"#;
|
|
127
|
-
|
|
128
|
-
assert!(malicious_xml.contains("DOCTYPE"));
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
/// Test for ZIP bomb detection in ODT files
|
|
132
|
-
#[test]
|
|
133
|
-
fn test_odt_zip_bomb_protection() {
|
|
134
|
-
assert!(true);
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
/// Test for too many files in ZIP archive
|
|
138
|
-
#[test]
|
|
139
|
-
fn test_odt_too_many_files_protection() {
|
|
140
|
-
assert!(true);
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
/// Test for deeply nested XML causing stack overflow
|
|
144
|
-
#[test]
|
|
145
|
-
fn test_odt_xml_depth_protection() {
|
|
146
|
-
let mut xml = String::from(r#"<?xml version="1.0"?><root>"#);
|
|
147
|
-
for i in 0..500 {
|
|
148
|
-
xml.push_str(&format!("<level{}>", i));
|
|
149
|
-
}
|
|
150
|
-
xml.push_str("content");
|
|
151
|
-
for i in (0..500).rev() {
|
|
152
|
-
xml.push_str(&format!("</level{}>", i));
|
|
153
|
-
}
|
|
154
|
-
xml.push_str("</root>");
|
|
155
|
-
|
|
156
|
-
assert!(xml.len() > 1000);
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
/// Test for unbounded table cell iteration
|
|
160
|
-
#[test]
|
|
161
|
-
fn test_odt_table_cell_limit() {
|
|
162
|
-
assert!(true);
|
|
163
|
-
}
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
#[cfg(test)]
|
|
167
|
-
mod jupyter_security_tests {
|
|
168
|
-
/// Test for too many cells in notebook
|
|
169
|
-
#[test]
|
|
170
|
-
fn test_jupyter_cell_limit() {
|
|
171
|
-
let test_json = r#"{"cells":[], "metadata":{}, "nbformat":4, "nbformat_minor":0}"#;
|
|
172
|
-
assert!(test_json.contains("cells"));
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
/// Test for too many outputs per cell
|
|
176
|
-
#[test]
|
|
177
|
-
fn test_jupyter_output_limit() {
|
|
178
|
-
assert!(true);
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
/// Test for huge MIME type data
|
|
182
|
-
#[test]
|
|
183
|
-
fn test_jupyter_mime_data_size_limit() {
|
|
184
|
-
assert!(true);
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
/// Test for deeply nested JSON causing stack overflow
|
|
188
|
-
#[test]
|
|
189
|
-
fn test_jupyter_json_depth_protection() {
|
|
190
|
-
let mut json = String::from("{");
|
|
191
|
-
for i in 0..500 {
|
|
192
|
-
json.push_str(&format!("\"a{}\":{{", i));
|
|
193
|
-
}
|
|
194
|
-
json.push_str("\"data\":\"value\"");
|
|
195
|
-
for _ in 0..500 {
|
|
196
|
-
json.push('}');
|
|
197
|
-
}
|
|
198
|
-
json.push('}');
|
|
199
|
-
|
|
200
|
-
assert!(json.len() > 1000);
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
/// Test for unbounded traceback lines
|
|
204
|
-
#[test]
|
|
205
|
-
fn test_jupyter_traceback_line_limit() {
|
|
206
|
-
assert!(true);
|
|
207
|
-
}
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
#[cfg(test)]
|
|
211
|
-
mod rst_security_tests {
|
|
212
|
-
/// Test for huge RST documents with many lines
|
|
213
|
-
#[test]
|
|
214
|
-
fn test_rst_line_limit() {
|
|
215
|
-
let mut rst = String::new();
|
|
216
|
-
for i in 0..2_000_000 {
|
|
217
|
-
rst.push_str(&format!("Line {}\n", i));
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| rst.len()));
|
|
221
|
-
|
|
222
|
-
assert!(result.is_ok());
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
/// Test for huge code blocks
|
|
226
|
-
#[test]
|
|
227
|
-
fn test_rst_code_block_size_limit() {
|
|
228
|
-
let mut rst = String::from(".. code-block:: python\n\n");
|
|
229
|
-
for i in 0..1_000_000 {
|
|
230
|
-
rst.push_str(&format!(" line {}\n", i));
|
|
231
|
-
}
|
|
232
|
-
|
|
233
|
-
assert!(rst.len() > 1000);
|
|
234
|
-
}
|
|
235
|
-
|
|
236
|
-
/// Test for huge tables
|
|
237
|
-
#[test]
|
|
238
|
-
fn test_rst_table_cell_limit() {
|
|
239
|
-
let mut rst = String::from("|header1|header2|\n");
|
|
240
|
-
rst.push_str("|-------|-------|\n");
|
|
241
|
-
for i in 0..100_000 {
|
|
242
|
-
rst.push_str(&format!("|cell{}|cell{}|\n", i * 2, i * 2 + 1));
|
|
243
|
-
}
|
|
244
|
-
|
|
245
|
-
assert!(rst.len() > 1000);
|
|
246
|
-
}
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
#[cfg(test)]
|
|
250
|
-
mod rtf_security_tests {
|
|
251
|
-
/// Test for very long RTF control words
|
|
252
|
-
#[test]
|
|
253
|
-
fn test_rtf_long_control_words() {
|
|
254
|
-
let mut rtf = String::from("{\\rtf1 ");
|
|
255
|
-
rtf.push('\\');
|
|
256
|
-
for _ in 0..10000 {
|
|
257
|
-
rtf.push('a');
|
|
258
|
-
}
|
|
259
|
-
rtf.push_str(" text}");
|
|
260
|
-
|
|
261
|
-
assert!(rtf.len() > 1000);
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
/// Test for extremely large numeric parameters
|
|
265
|
-
#[test]
|
|
266
|
-
fn test_rtf_huge_numeric_params() {
|
|
267
|
-
let rtf = format!("{{\\rtf1 \\fs{}}", "9".repeat(100));
|
|
268
|
-
assert!(rtf.len() > 100);
|
|
269
|
-
}
|
|
270
|
-
|
|
271
|
-
/// Test for deeply nested braces in RTF
|
|
272
|
-
#[test]
|
|
273
|
-
fn test_rtf_deeply_nested_braces() {
|
|
274
|
-
let mut rtf = String::from("{\\rtf1 ");
|
|
275
|
-
for _ in 0..1000 {
|
|
276
|
-
rtf.push('{');
|
|
277
|
-
}
|
|
278
|
-
rtf.push_str("content");
|
|
279
|
-
for _ in 0..1000 {
|
|
280
|
-
rtf.push('}');
|
|
281
|
-
}
|
|
282
|
-
|
|
283
|
-
assert!(rtf.len() > 1000);
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
/// Test for image metadata extraction limits
|
|
287
|
-
#[test]
|
|
288
|
-
fn test_rtf_image_metadata_depth() {
|
|
289
|
-
let mut rtf = String::from("{\\rtf1 {\\pict");
|
|
290
|
-
for i in 0..500 {
|
|
291
|
-
rtf.push('{');
|
|
292
|
-
rtf.push_str(&format!("\\level{}", i));
|
|
293
|
-
}
|
|
294
|
-
rtf.push_str("\\jpegblip");
|
|
295
|
-
for _ in 0..500 {
|
|
296
|
-
rtf.push('}');
|
|
297
|
-
}
|
|
298
|
-
rtf.push_str("}}");
|
|
299
|
-
|
|
300
|
-
assert!(rtf.len() > 1000);
|
|
301
|
-
}
|
|
302
|
-
}
|
|
303
|
-
|
|
304
|
-
#[cfg(test)]
|
|
305
|
-
mod general_security_tests {
|
|
306
|
-
use crate::extractors::security::*;
|
|
307
|
-
|
|
308
|
-
#[test]
|
|
309
|
-
fn test_depth_validator_limits() {
|
|
310
|
-
let mut validator = DepthValidator::new(10);
|
|
311
|
-
|
|
312
|
-
for i in 0..10 {
|
|
313
|
-
assert!(validator.push().is_ok(), "Push {} should succeed", i);
|
|
314
|
-
}
|
|
315
|
-
|
|
316
|
-
assert!(validator.push().is_err(), "Push at limit should fail");
|
|
317
|
-
}
|
|
318
|
-
|
|
319
|
-
#[test]
|
|
320
|
-
fn test_string_growth_validator() {
|
|
321
|
-
let mut validator = StringGrowthValidator::new(1000);
|
|
322
|
-
|
|
323
|
-
assert!(validator.check_append(500).is_ok());
|
|
324
|
-
assert!(validator.check_append(500).is_ok());
|
|
325
|
-
assert!(validator.check_append(1).is_err(), "Should fail when exceeding limit");
|
|
326
|
-
}
|
|
327
|
-
|
|
328
|
-
#[test]
|
|
329
|
-
fn test_entity_validator_limits() {
|
|
330
|
-
let validator = EntityValidator::new(32);
|
|
331
|
-
|
|
332
|
-
assert!(validator.validate("short").is_ok());
|
|
333
|
-
assert!(validator.validate(&"x".repeat(32)).is_ok());
|
|
334
|
-
assert!(validator.validate(&"x".repeat(33)).is_err());
|
|
335
|
-
}
|
|
336
|
-
|
|
337
|
-
#[test]
|
|
338
|
-
fn test_iteration_validator() {
|
|
339
|
-
let mut validator = IterationValidator::new(100);
|
|
340
|
-
|
|
341
|
-
for i in 0..100 {
|
|
342
|
-
assert!(validator.check_iteration().is_ok(), "Iteration {} should succeed", i);
|
|
343
|
-
}
|
|
344
|
-
|
|
345
|
-
assert!(validator.check_iteration().is_err(), "Iteration at limit should fail");
|
|
346
|
-
}
|
|
347
|
-
|
|
348
|
-
#[test]
|
|
349
|
-
fn test_table_validator_cell_limits() {
|
|
350
|
-
let mut validator = TableValidator::new(1000);
|
|
351
|
-
|
|
352
|
-
assert!(validator.add_cells(500).is_ok());
|
|
353
|
-
assert!(validator.add_cells(500).is_ok());
|
|
354
|
-
assert!(validator.add_cells(1).is_err(), "Should fail when exceeding cell limit");
|
|
355
|
-
}
|
|
356
|
-
|
|
357
|
-
#[test]
|
|
358
|
-
fn test_security_limits_defaults() {
|
|
359
|
-
let limits = SecurityLimits::default();
|
|
360
|
-
|
|
361
|
-
assert_eq!(limits.max_archive_size, 500 * 1024 * 1024);
|
|
362
|
-
assert_eq!(limits.max_compression_ratio, 100);
|
|
363
|
-
assert_eq!(limits.max_files_in_archive, 10_000);
|
|
364
|
-
assert_eq!(limits.max_nesting_depth, 100);
|
|
365
|
-
assert_eq!(limits.max_entity_length, 32);
|
|
366
|
-
}
|
|
367
|
-
}
|