kreuzberg 4.1.1 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +4 -4
  3. data/README.md +8 -5
  4. data/ext/kreuzberg_rb/native/Cargo.toml +2 -2
  5. data/ext/kreuzberg_rb/native/libpdfium.so +0 -0
  6. data/ext/kreuzberg_rb/native/src/config/types.rs +23 -13
  7. data/kreuzberg.gemspec +14 -2
  8. data/lib/kreuzberg/api_proxy.rb +0 -1
  9. data/lib/kreuzberg/cli_proxy.rb +0 -1
  10. data/lib/kreuzberg/config.rb +70 -35
  11. data/lib/kreuzberg/mcp_proxy.rb +0 -1
  12. data/lib/kreuzberg/version.rb +1 -1
  13. data/sig/kreuzberg.rbs +5 -1
  14. data/spec/binding/batch_operations_spec.rb +80 -0
  15. data/spec/binding/metadata_types_spec.rb +77 -57
  16. data/spec/serialization_spec.rb +134 -0
  17. data/spec/unit/config/output_format_spec.rb +380 -0
  18. data/vendor/Cargo.toml +1 -1
  19. data/vendor/kreuzberg/Cargo.toml +3 -3
  20. data/vendor/kreuzberg/README.md +1 -1
  21. data/vendor/kreuzberg/src/embeddings.rs +4 -4
  22. data/vendor/kreuzberg/src/mcp/format.rs +237 -39
  23. data/vendor/kreuzberg/src/mcp/params.rs +26 -33
  24. data/vendor/kreuzberg/src/mcp/server.rs +6 -3
  25. data/vendor/kreuzberg/src/mcp/tools/extraction.rs +16 -23
  26. data/vendor/kreuzberg/tests/api_chunk.rs +40 -30
  27. data/vendor/kreuzberg/tests/api_consistency.rs +349 -0
  28. data/vendor/kreuzberg/tests/api_embed.rs +84 -50
  29. data/vendor/kreuzberg/tests/api_large_pdf_extraction_diagnostics.rs +8 -2
  30. data/vendor/kreuzberg/tests/api_tests.rs +298 -139
  31. data/vendor/kreuzberg/tests/archive_integration.rs +63 -56
  32. data/vendor/kreuzberg/tests/batch_orchestration.rs +22 -14
  33. data/vendor/kreuzberg/tests/batch_pooling_benchmark.rs +13 -13
  34. data/vendor/kreuzberg/tests/batch_processing.rs +13 -9
  35. data/vendor/kreuzberg/tests/bibtex_parity_test.rs +10 -10
  36. data/vendor/kreuzberg/tests/concurrency_stress.rs +10 -6
  37. data/vendor/kreuzberg/tests/config_behavioral.rs +414 -0
  38. data/vendor/kreuzberg/tests/config_features.rs +19 -15
  39. data/vendor/kreuzberg/tests/config_integration_test.rs +68 -68
  40. data/vendor/kreuzberg/tests/config_loading_tests.rs +71 -62
  41. data/vendor/kreuzberg/tests/contract_mcp.rs +314 -0
  42. data/vendor/kreuzberg/tests/core_integration.rs +55 -53
  43. data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +23 -23
  44. data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +15 -14
  45. data/vendor/kreuzberg/tests/docx_vs_pandoc_comparison.rs +4 -4
  46. data/vendor/kreuzberg/tests/email_integration.rs +7 -7
  47. data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +2 -2
  48. data/vendor/kreuzberg/tests/error_handling.rs +13 -11
  49. data/vendor/kreuzberg/tests/fictionbook_extractor_tests.rs +2 -2
  50. data/vendor/kreuzberg/tests/html_table_test.rs +11 -11
  51. data/vendor/kreuzberg/tests/instrumentation_test.rs +18 -13
  52. data/vendor/kreuzberg/tests/jats_extractor_tests.rs +17 -17
  53. data/vendor/kreuzberg/tests/jupyter_extractor_tests.rs +9 -9
  54. data/vendor/kreuzberg/tests/keywords_integration.rs +25 -25
  55. data/vendor/kreuzberg/tests/keywords_quality.rs +9 -9
  56. data/vendor/kreuzberg/tests/latex_extractor_tests.rs +2 -2
  57. data/vendor/kreuzberg/tests/markdown_extractor_tests.rs +1 -1
  58. data/vendor/kreuzberg/tests/mcp_integration.rs +849 -0
  59. data/vendor/kreuzberg/tests/mime_detection.rs +72 -41
  60. data/vendor/kreuzberg/tests/ocr_errors.rs +10 -4
  61. data/vendor/kreuzberg/tests/ocr_language_registry.rs +1 -1
  62. data/vendor/kreuzberg/tests/ocr_stress.rs +3 -3
  63. data/vendor/kreuzberg/tests/odt_extractor_tests.rs +6 -6
  64. data/vendor/kreuzberg/tests/opml_extractor_tests.rs +2 -2
  65. data/vendor/kreuzberg/tests/orgmode_extractor_tests.rs +2 -2
  66. data/vendor/kreuzberg/tests/page_markers.rs +1 -1
  67. data/vendor/kreuzberg/tests/pdf_hierarchy_detection.rs +6 -6
  68. data/vendor/kreuzberg/tests/pdf_text_merging.rs +2 -2
  69. data/vendor/kreuzberg/tests/pipeline_integration.rs +77 -61
  70. data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +97 -77
  71. data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +78 -61
  72. data/vendor/kreuzberg/tests/plugin_system.rs +49 -46
  73. data/vendor/kreuzberg/tests/plugin_validator_test.rs +109 -97
  74. data/vendor/kreuzberg/tests/pptx_regression_tests.rs +40 -30
  75. data/vendor/kreuzberg/tests/registry_integration_tests.rs +26 -23
  76. data/vendor/kreuzberg/tests/rst_extractor_tests.rs +1 -1
  77. data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +18 -18
  78. data/vendor/kreuzberg/tests/security_validation.rs +20 -19
  79. data/vendor/kreuzberg/tests/serialization_integration.rs +112 -0
  80. data/vendor/kreuzberg/tests/stopwords_integration_test.rs +36 -36
  81. data/vendor/kreuzberg/tests/test_fastembed.rs +8 -8
  82. data/vendor/kreuzberg/tests/typst_extractor_tests.rs +9 -9
  83. data/vendor/kreuzberg/tests/xlsx_metadata_extraction_test.rs +12 -9
  84. data/vendor/kreuzberg-tesseract/Cargo.toml +3 -3
  85. data/vendor/kreuzberg-tesseract/build.rs +4 -4
  86. data/vendor/kreuzberg-tesseract/src/lib.rs +6 -6
  87. data/vendor/kreuzberg-tesseract/tests/integration_test.rs +3 -3
  88. metadata +13 -2
@@ -83,13 +83,13 @@ mod jats_extractor_tests {
83
83
  .await;
84
84
 
85
85
  assert!(result.is_ok());
86
- let extraction = result.unwrap();
86
+ let extraction = result.expect("Operation failed");
87
87
 
88
88
  assert!(extraction.content.contains("Effects of Caffeine"));
89
89
  assert!(extraction.content.contains("Introduction"));
90
90
 
91
91
  assert!(extraction.metadata.subject.is_some());
92
- let subject = extraction.metadata.subject.unwrap();
92
+ let subject = extraction.metadata.subject.expect("Operation failed");
93
93
  assert!(subject.contains("Effects of Caffeine"));
94
94
 
95
95
  assert!(subject.contains("10.1371"));
@@ -144,9 +144,9 @@ mod jats_extractor_tests {
144
144
  .await;
145
145
 
146
146
  assert!(result.is_ok());
147
- let extraction = result.unwrap();
147
+ let extraction = result.expect("Operation failed");
148
148
 
149
- let subject = extraction.metadata.subject.unwrap();
149
+ let subject = extraction.metadata.subject.expect("Operation failed");
150
150
  assert!(subject.contains("Alpha"));
151
151
  assert!(subject.contains("Beta"));
152
152
  assert!(subject.contains("Gamma"));
@@ -201,7 +201,7 @@ mod jats_extractor_tests {
201
201
  .await;
202
202
 
203
203
  assert!(result.is_ok());
204
- let extraction = result.unwrap();
204
+ let extraction = result.expect("Operation failed");
205
205
 
206
206
  assert!(extraction.content.contains("Introduction"));
207
207
  assert!(extraction.content.contains("Methods"));
@@ -273,7 +273,7 @@ mod jats_extractor_tests {
273
273
  .await;
274
274
 
275
275
  assert!(result.is_ok());
276
- let extraction = result.unwrap();
276
+ let extraction = result.expect("Operation failed");
277
277
 
278
278
  assert_eq!(extraction.tables.len(), 1);
279
279
  let table = &extraction.tables[0];
@@ -327,7 +327,7 @@ mod jats_extractor_tests {
327
327
  .await;
328
328
 
329
329
  assert!(result.is_ok());
330
- let extraction = result.unwrap();
330
+ let extraction = result.expect("Operation failed");
331
331
 
332
332
  assert_eq!(extraction.tables.len(), 2);
333
333
  assert_eq!(extraction.tables[0].cells[0].len(), 2);
@@ -390,7 +390,7 @@ mod jats_extractor_tests {
390
390
  .await;
391
391
 
392
392
  assert!(result.is_ok());
393
- let extraction = result.unwrap();
393
+ let extraction = result.expect("Operation failed");
394
394
 
395
395
  assert!(extraction.content.contains("Previous research"));
396
396
  assert!(extraction.content.contains("Other studies"));
@@ -429,9 +429,9 @@ mod jats_extractor_tests {
429
429
  .await;
430
430
 
431
431
  assert!(result.is_ok());
432
- let extraction = result.unwrap();
432
+ let extraction = result.expect("Operation failed");
433
433
 
434
- let subject = extraction.metadata.subject.unwrap();
434
+ let subject = extraction.metadata.subject.expect("Operation failed");
435
435
  assert!(subject.contains("background") || subject.contains("Background") || subject.contains("Abstract"));
436
436
  }
437
437
 
@@ -457,7 +457,7 @@ mod jats_extractor_tests {
457
457
  .await;
458
458
 
459
459
  assert!(result.is_ok());
460
- let extraction = result.unwrap();
460
+ let extraction = result.expect("Operation failed");
461
461
 
462
462
  assert!(extraction.metadata.subject.is_some());
463
463
  }
@@ -486,7 +486,7 @@ mod jats_extractor_tests {
486
486
  .await;
487
487
 
488
488
  assert!(result.is_ok());
489
- let extraction = result.unwrap();
489
+ let extraction = result.expect("Operation failed");
490
490
 
491
491
  assert!(extraction.metadata.created_at.is_some());
492
492
  }
@@ -511,7 +511,7 @@ mod jats_extractor_tests {
511
511
  .await;
512
512
 
513
513
  assert!(result.is_ok());
514
- let extraction = result.unwrap();
514
+ let extraction = result.expect("Operation failed");
515
515
  assert!(extraction.content.is_empty() || extraction.content.trim().is_empty());
516
516
  }
517
517
 
@@ -578,7 +578,7 @@ mod jats_extractor_tests {
578
578
  .await;
579
579
 
580
580
  assert!(result.is_ok());
581
- let extraction = result.unwrap();
581
+ let extraction = result.expect("Operation failed");
582
582
 
583
583
  assert!(extraction.content.contains("First paragraph"));
584
584
  assert!(extraction.content.contains("Second paragraph"));
@@ -611,9 +611,9 @@ mod jats_extractor_tests {
611
611
  .await;
612
612
 
613
613
  assert!(result.is_ok());
614
- let extraction = result.unwrap();
614
+ let extraction = result.expect("Operation failed");
615
615
 
616
- let subject = extraction.metadata.subject.unwrap();
616
+ let subject = extraction.metadata.subject.expect("Operation failed");
617
617
  assert!(subject.contains("keyword") || subject.contains("Keyword"));
618
618
  }
619
619
 
@@ -630,7 +630,7 @@ mod jats_extractor_tests {
630
630
  .await;
631
631
 
632
632
  assert!(result.is_ok());
633
- let extraction = result.unwrap();
633
+ let extraction = result.expect("Operation failed");
634
634
 
635
635
  assert!(!extraction.content.is_empty());
636
636
  assert!(extraction.metadata.subject.is_some());
@@ -61,7 +61,7 @@ async fn test_jupyter_simple_notebook_extraction() {
61
61
  return;
62
62
  }
63
63
 
64
- let extraction = result.unwrap();
64
+ let extraction = result.expect("Operation failed");
65
65
 
66
66
  assert_eq!(
67
67
  extraction.mime_type, "application/x-ipynb+json",
@@ -156,7 +156,7 @@ async fn test_jupyter_mime_notebook_extraction() {
156
156
  return;
157
157
  }
158
158
 
159
- let extraction = result.unwrap();
159
+ let extraction = result.expect("Operation failed");
160
160
 
161
161
  assert_eq!(
162
162
  extraction.mime_type, "application/x-ipynb+json",
@@ -261,7 +261,7 @@ async fn test_jupyter_mime_out_notebook_extraction() {
261
261
  return;
262
262
  }
263
263
 
264
- let extraction = result.unwrap();
264
+ let extraction = result.expect("Operation failed");
265
265
 
266
266
  assert_eq!(
267
267
  extraction.mime_type, "application/x-ipynb+json",
@@ -351,7 +351,7 @@ async fn test_jupyter_rank_notebook_extraction() {
351
351
  return;
352
352
  }
353
353
 
354
- let extraction = result.unwrap();
354
+ let extraction = result.expect("Operation failed");
355
355
 
356
356
  assert_eq!(
357
357
  extraction.mime_type, "application/x-ipynb+json",
@@ -440,7 +440,7 @@ async fn test_jupyter_metadata_aggregation() {
440
440
  continue;
441
441
  }
442
442
 
443
- let extraction = result.unwrap();
443
+ let extraction = result.expect("Operation failed");
444
444
 
445
445
  assert!(
446
446
  !extraction.content.is_empty(),
@@ -491,7 +491,7 @@ async fn test_jupyter_cell_content_aggregation() {
491
491
  return;
492
492
  }
493
493
 
494
- let extraction = result.unwrap();
494
+ let extraction = result.expect("Operation failed");
495
495
 
496
496
  let code_indicators = ["class", "def", "import", "from", "python"];
497
497
  let code_count = code_indicators
@@ -563,7 +563,7 @@ async fn test_jupyter_mime_output_handling() {
563
563
  return;
564
564
  }
565
565
 
566
- let extraction = result.unwrap();
566
+ let extraction = result.expect("Operation failed");
567
567
 
568
568
  assert!(
569
569
  extraction.content.contains("image")
@@ -620,7 +620,7 @@ async fn test_jupyter_notebook_structure_preservation() {
620
620
  return;
621
621
  }
622
622
 
623
- let extraction = result.unwrap();
623
+ let extraction = result.expect("Operation failed");
624
624
 
625
625
  let cell_id_patterns = ["uid1", "uid2", "uid3", "uid4", "uid6"];
626
626
  let id_count = cell_id_patterns
@@ -672,7 +672,7 @@ async fn test_jupyter_pandoc_baseline_alignment() {
672
672
  continue;
673
673
  }
674
674
 
675
- let extraction = result.unwrap();
675
+ let extraction = result.expect("Operation failed");
676
676
 
677
677
  assert!(
678
678
  extraction.content.contains("cell")
@@ -38,7 +38,7 @@ El procesamiento del lenguaje natural es un campo de la inteligencia artificial
38
38
  #[test]
39
39
  fn test_yake_basic_extraction() {
40
40
  let config = KeywordConfig::yake();
41
- let keywords = extract_keywords(ML_DOCUMENT, &config).unwrap();
41
+ let keywords = extract_keywords(ML_DOCUMENT, &config).expect("Operation failed");
42
42
 
43
43
  assert!(!keywords.is_empty(), "Should extract keywords from document");
44
44
  assert!(
@@ -80,7 +80,7 @@ fn test_yake_basic_extraction() {
80
80
  #[test]
81
81
  fn test_rake_basic_extraction() {
82
82
  let config = KeywordConfig::rake();
83
- let keywords = extract_keywords(ML_DOCUMENT, &config).unwrap();
83
+ let keywords = extract_keywords(ML_DOCUMENT, &config).expect("Operation failed");
84
84
 
85
85
  assert!(!keywords.is_empty(), "Should extract keywords from document");
86
86
  assert!(
@@ -122,8 +122,8 @@ fn test_yake_vs_rake_comparison() {
122
122
  let yake_config = KeywordConfig::yake().with_max_keywords(5);
123
123
  let rake_config = KeywordConfig::rake().with_max_keywords(5);
124
124
 
125
- let yake_keywords = extract_keywords(ML_DOCUMENT, &yake_config).unwrap();
126
- let rake_keywords = extract_keywords(ML_DOCUMENT, &rake_config).unwrap();
125
+ let yake_keywords = extract_keywords(ML_DOCUMENT, &yake_config).expect("Operation failed");
126
+ let rake_keywords = extract_keywords(ML_DOCUMENT, &rake_config).expect("Operation failed");
127
127
 
128
128
  assert!(!yake_keywords.is_empty(), "YAKE should extract keywords");
129
129
  assert!(!rake_keywords.is_empty(), "RAKE should extract keywords");
@@ -161,7 +161,7 @@ fn test_yake_vs_rake_comparison() {
161
161
  #[test]
162
162
  fn test_yake_with_max_keywords() {
163
163
  let config = KeywordConfig::yake().with_max_keywords(3);
164
- let keywords = extract_keywords(ML_DOCUMENT, &config).unwrap();
164
+ let keywords = extract_keywords(ML_DOCUMENT, &config).expect("Operation failed");
165
165
 
166
166
  assert!(keywords.len() <= 3, "Should respect max_keywords=3 limit");
167
167
 
@@ -176,7 +176,7 @@ fn test_yake_with_max_keywords() {
176
176
  #[test]
177
177
  fn test_rake_with_max_keywords() {
178
178
  let config = KeywordConfig::rake().with_max_keywords(3);
179
- let keywords = extract_keywords(ML_DOCUMENT, &config).unwrap();
179
+ let keywords = extract_keywords(ML_DOCUMENT, &config).expect("Operation failed");
180
180
 
181
181
  assert!(keywords.len() <= 3, "Should respect max_keywords=3 limit");
182
182
 
@@ -191,7 +191,7 @@ fn test_rake_with_max_keywords() {
191
191
  #[test]
192
192
  fn test_yake_with_min_score() {
193
193
  let config = KeywordConfig::yake().with_min_score(0.5);
194
- let keywords = extract_keywords(ML_DOCUMENT, &config).unwrap();
194
+ let keywords = extract_keywords(ML_DOCUMENT, &config).expect("Operation failed");
195
195
 
196
196
  for keyword in &keywords {
197
197
  assert!(
@@ -207,7 +207,7 @@ fn test_yake_with_min_score() {
207
207
  #[test]
208
208
  fn test_rake_with_min_score() {
209
209
  let config = KeywordConfig::rake().with_min_score(0.2);
210
- let keywords = extract_keywords(ML_DOCUMENT, &config).unwrap();
210
+ let keywords = extract_keywords(ML_DOCUMENT, &config).expect("Operation failed");
211
211
 
212
212
  for keyword in &keywords {
213
213
  assert!(
@@ -223,7 +223,7 @@ fn test_rake_with_min_score() {
223
223
  #[test]
224
224
  fn test_yake_with_ngram_range() {
225
225
  let config = KeywordConfig::yake().with_ngram_range(1, 1);
226
- let keywords = extract_keywords(ML_DOCUMENT, &config).unwrap();
226
+ let keywords = extract_keywords(ML_DOCUMENT, &config).expect("Operation failed");
227
227
 
228
228
  for keyword in &keywords {
229
229
  let word_count = keyword.text.split_whitespace().count();
@@ -231,7 +231,7 @@ fn test_yake_with_ngram_range() {
231
231
  }
232
232
 
233
233
  let config = KeywordConfig::yake().with_ngram_range(2, 3);
234
- let keywords = extract_keywords(ML_DOCUMENT, &config).unwrap();
234
+ let keywords = extract_keywords(ML_DOCUMENT, &config).expect("Operation failed");
235
235
 
236
236
  for keyword in &keywords {
237
237
  let word_count = keyword.text.split_whitespace().count();
@@ -248,7 +248,7 @@ fn test_yake_with_ngram_range() {
248
248
  #[test]
249
249
  fn test_rake_with_ngram_range() {
250
250
  let config = KeywordConfig::rake().with_ngram_range(1, 1);
251
- let keywords = extract_keywords(ML_DOCUMENT, &config).unwrap();
251
+ let keywords = extract_keywords(ML_DOCUMENT, &config).expect("Operation failed");
252
252
 
253
253
  for keyword in &keywords {
254
254
  let word_count = keyword.text.split_whitespace().count();
@@ -256,7 +256,7 @@ fn test_rake_with_ngram_range() {
256
256
  }
257
257
 
258
258
  let config = KeywordConfig::rake().with_ngram_range(2, 2);
259
- let keywords = extract_keywords(ML_DOCUMENT, &config).unwrap();
259
+ let keywords = extract_keywords(ML_DOCUMENT, &config).expect("Operation failed");
260
260
 
261
261
  for keyword in &keywords {
262
262
  let word_count = keyword.text.split_whitespace().count();
@@ -268,7 +268,7 @@ fn test_rake_with_ngram_range() {
268
268
  #[test]
269
269
  fn test_rake_with_spanish() {
270
270
  let config = KeywordConfig::rake().with_language("es");
271
- let keywords = extract_keywords(SPANISH_DOCUMENT, &config).unwrap();
271
+ let keywords = extract_keywords(SPANISH_DOCUMENT, &config).expect("Operation failed");
272
272
 
273
273
  assert!(!keywords.is_empty(), "Should extract Spanish keywords");
274
274
 
@@ -294,7 +294,7 @@ fn test_rake_with_spanish() {
294
294
  #[test]
295
295
  fn test_yake_with_spanish() {
296
296
  let config = KeywordConfig::yake().with_language("es");
297
- let keywords = extract_keywords(SPANISH_DOCUMENT, &config).unwrap();
297
+ let keywords = extract_keywords(SPANISH_DOCUMENT, &config).expect("Operation failed");
298
298
 
299
299
  assert!(!keywords.is_empty(), "Should extract Spanish keywords");
300
300
 
@@ -308,7 +308,7 @@ fn test_yake_with_spanish() {
308
308
  #[test]
309
309
  fn test_rake_empty_document() {
310
310
  let config = KeywordConfig::rake();
311
- let keywords = extract_keywords("", &config).unwrap();
311
+ let keywords = extract_keywords("", &config).expect("Operation failed");
312
312
 
313
313
  assert!(keywords.is_empty(), "Empty document should yield no keywords");
314
314
  }
@@ -317,7 +317,7 @@ fn test_rake_empty_document() {
317
317
  #[test]
318
318
  fn test_yake_empty_document() {
319
319
  let config = KeywordConfig::yake();
320
- let keywords = extract_keywords("", &config).unwrap();
320
+ let keywords = extract_keywords("", &config).expect("Operation failed");
321
321
 
322
322
  assert!(keywords.is_empty(), "Empty document should yield no keywords");
323
323
  }
@@ -327,7 +327,7 @@ fn test_yake_empty_document() {
327
327
  fn test_rake_short_document() {
328
328
  let short_text = "Machine learning algorithms.";
329
329
  let config = KeywordConfig::rake();
330
- let keywords = extract_keywords(short_text, &config).unwrap();
330
+ let keywords = extract_keywords(short_text, &config).expect("Operation failed");
331
331
 
332
332
  println!(
333
333
  "Keywords from short text: {:?}",
@@ -340,7 +340,7 @@ fn test_rake_short_document() {
340
340
  fn test_yake_short_document() {
341
341
  let short_text = "Machine learning algorithms.";
342
342
  let config = KeywordConfig::yake();
343
- let keywords = extract_keywords(short_text, &config).unwrap();
343
+ let keywords = extract_keywords(short_text, &config).expect("Operation failed");
344
344
 
345
345
  println!(
346
346
  "YAKE keywords from short text: {:?}",
@@ -353,13 +353,13 @@ fn test_yake_short_document() {
353
353
  fn test_rake_different_domains() {
354
354
  let config = KeywordConfig::rake().with_max_keywords(5);
355
355
 
356
- let ml_keywords = extract_keywords(ML_DOCUMENT, &config).unwrap();
356
+ let ml_keywords = extract_keywords(ML_DOCUMENT, &config).expect("Operation failed");
357
357
  println!("\nML domain keywords:");
358
358
  for kw in &ml_keywords {
359
359
  println!(" {} (score: {:.3})", kw.text, kw.score);
360
360
  }
361
361
 
362
- let climate_keywords = extract_keywords(CLIMATE_DOCUMENT, &config).unwrap();
362
+ let climate_keywords = extract_keywords(CLIMATE_DOCUMENT, &config).expect("Operation failed");
363
363
  println!("\nClimate domain keywords:");
364
364
  for kw in &climate_keywords {
365
365
  println!(" {} (score: {:.3})", kw.text, kw.score);
@@ -395,13 +395,13 @@ fn test_rake_different_domains() {
395
395
  fn test_yake_different_domains() {
396
396
  let config = KeywordConfig::yake().with_max_keywords(5);
397
397
 
398
- let ml_keywords = extract_keywords(ML_DOCUMENT, &config).unwrap();
398
+ let ml_keywords = extract_keywords(ML_DOCUMENT, &config).expect("Operation failed");
399
399
  println!("\nYAKE ML domain keywords:");
400
400
  for kw in &ml_keywords {
401
401
  println!(" {} (score: {:.3})", kw.text, kw.score);
402
402
  }
403
403
 
404
- let climate_keywords = extract_keywords(CLIMATE_DOCUMENT, &config).unwrap();
404
+ let climate_keywords = extract_keywords(CLIMATE_DOCUMENT, &config).expect("Operation failed");
405
405
  println!("\nYAKE Climate domain keywords:");
406
406
  for kw in &climate_keywords {
407
407
  println!(" {} (score: {:.3})", kw.text, kw.score);
@@ -415,7 +415,7 @@ fn test_yake_different_domains() {
415
415
  #[test]
416
416
  fn test_rake_score_distribution() {
417
417
  let config = KeywordConfig::rake();
418
- let keywords = extract_keywords(ML_DOCUMENT, &config).unwrap();
418
+ let keywords = extract_keywords(ML_DOCUMENT, &config).expect("Operation failed");
419
419
 
420
420
  if keywords.is_empty() {
421
421
  return;
@@ -439,7 +439,7 @@ fn test_rake_score_distribution() {
439
439
  #[test]
440
440
  fn test_yake_score_distribution() {
441
441
  let config = KeywordConfig::yake();
442
- let keywords = extract_keywords(ML_DOCUMENT, &config).unwrap();
442
+ let keywords = extract_keywords(ML_DOCUMENT, &config).expect("Operation failed");
443
443
 
444
444
  if keywords.is_empty() {
445
445
  return;
@@ -463,7 +463,7 @@ fn test_yake_score_distribution() {
463
463
  #[test]
464
464
  fn test_keyword_struct_properties() {
465
465
  let config = KeywordConfig::default();
466
- let keywords = extract_keywords(ML_DOCUMENT, &config).unwrap();
466
+ let keywords = extract_keywords(ML_DOCUMENT, &config).expect("Operation failed");
467
467
 
468
468
  if keywords.is_empty() {
469
469
  return;
@@ -189,7 +189,7 @@ Global warming is the long-term heating of Earth's climate system. Climate scien
189
189
  #[test]
190
190
  fn test_yake_quality_ml_document_default_config() {
191
191
  let config = KeywordConfig::yake();
192
- let keywords = extract_keywords(ML_DOC_SAMPLE, &config).unwrap();
192
+ let keywords = extract_keywords(ML_DOC_SAMPLE, &config).expect("Operation failed");
193
193
 
194
194
  assert!(!keywords.is_empty(), "Should extract keywords with default config");
195
195
 
@@ -239,7 +239,7 @@ fn test_yake_quality_ml_document_default_config() {
239
239
  #[test]
240
240
  fn test_rake_quality_ml_document_default_config() {
241
241
  let config = KeywordConfig::rake();
242
- let keywords = extract_keywords(ML_DOC_SAMPLE, &config).unwrap();
242
+ let keywords = extract_keywords(ML_DOC_SAMPLE, &config).expect("Operation failed");
243
243
 
244
244
  assert!(!keywords.is_empty(), "Should extract keywords with default config");
245
245
 
@@ -289,7 +289,7 @@ fn test_rake_quality_ml_document_default_config() {
289
289
  #[test]
290
290
  fn test_yake_quality_climate_document_default_config() {
291
291
  let config = KeywordConfig::yake();
292
- let keywords = extract_keywords(CLIMATE_DOC_SAMPLE, &config).unwrap();
292
+ let keywords = extract_keywords(CLIMATE_DOC_SAMPLE, &config).expect("Operation failed");
293
293
 
294
294
  assert!(!keywords.is_empty(), "Should extract keywords with default config");
295
295
 
@@ -331,7 +331,7 @@ fn test_yake_quality_climate_document_default_config() {
331
331
  #[test]
332
332
  fn test_rake_quality_climate_document_default_config() {
333
333
  let config = KeywordConfig::rake();
334
- let keywords = extract_keywords(CLIMATE_DOC_SAMPLE, &config).unwrap();
334
+ let keywords = extract_keywords(CLIMATE_DOC_SAMPLE, &config).expect("Operation failed");
335
335
 
336
336
  assert!(!keywords.is_empty(), "Should extract keywords with default config");
337
337
 
@@ -375,8 +375,8 @@ fn test_yake_vs_rake_quality_comparison() {
375
375
  let yake_config = KeywordConfig::yake();
376
376
  let rake_config = KeywordConfig::rake();
377
377
 
378
- let yake_keywords = extract_keywords(ML_DOC_SAMPLE, &yake_config).unwrap();
379
- let rake_keywords = extract_keywords(ML_DOC_SAMPLE, &rake_config).unwrap();
378
+ let yake_keywords = extract_keywords(ML_DOC_SAMPLE, &yake_config).expect("Operation failed");
379
+ let rake_keywords = extract_keywords(ML_DOC_SAMPLE, &rake_config).expect("Operation failed");
380
380
 
381
381
  let yake_extracted: Vec<&str> = yake_keywords.iter().map(|k| k.text.as_str()).collect();
382
382
  let rake_extracted: Vec<&str> = rake_keywords.iter().map(|k| k.text.as_str()).collect();
@@ -414,7 +414,7 @@ fn test_yake_quality_with_optimized_config() {
414
414
  .with_ngram_range(1, 3)
415
415
  .with_min_score(0.0);
416
416
 
417
- let keywords = extract_keywords(ML_DOC_SAMPLE, &config).unwrap();
417
+ let keywords = extract_keywords(ML_DOC_SAMPLE, &config).expect("Operation failed");
418
418
 
419
419
  let extracted: Vec<&str> = keywords.iter().map(|k| k.text.as_str()).collect();
420
420
  let ground_truth = get_ml_ground_truth();
@@ -441,7 +441,7 @@ fn test_rake_quality_with_optimized_config() {
441
441
  .with_ngram_range(1, 3)
442
442
  .with_min_score(0.0);
443
443
 
444
- let keywords = extract_keywords(ML_DOC_SAMPLE, &config).unwrap();
444
+ let keywords = extract_keywords(ML_DOC_SAMPLE, &config).expect("Operation failed");
445
445
 
446
446
  let extracted: Vec<&str> = keywords.iter().map(|k| k.text.as_str()).collect();
447
447
  let ground_truth = get_ml_ground_truth();
@@ -464,7 +464,7 @@ fn test_rake_quality_with_optimized_config() {
464
464
  #[test]
465
465
  fn test_extracted_keywords_are_domain_relevant() {
466
466
  let config = KeywordConfig::default();
467
- let keywords = extract_keywords(ML_DOC_SAMPLE, &config).unwrap();
467
+ let keywords = extract_keywords(ML_DOC_SAMPLE, &config).expect("Operation failed");
468
468
 
469
469
  let ml_terms = [
470
470
  "machine",
@@ -28,9 +28,9 @@ fn test_file_path(filename: &str) -> PathBuf {
28
28
  let manifest_dir = env!("CARGO_MANIFEST_DIR");
29
29
  PathBuf::from(manifest_dir)
30
30
  .parent()
31
- .unwrap()
31
+ .expect("Operation failed")
32
32
  .parent()
33
- .unwrap()
33
+ .expect("Operation failed")
34
34
  .join("test_documents")
35
35
  .join("latex")
36
36
  .join(filename)
@@ -486,5 +486,5 @@ async fn test_special_characters_in_metadata() {
486
486
 
487
487
  let title = result.metadata.additional.get("title").and_then(|v| v.as_str());
488
488
  assert!(title.is_some());
489
- assert!(title.unwrap().contains("&") || title.unwrap().contains("Part"));
489
+ assert!(title.expect("Operation failed").contains("&") || title.expect("Operation failed").contains("Part"));
490
490
  }