@cdklabs/cdk-appmod-catalog-blueprints 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/.jsii +2537 -204
  2. package/lib/document-processing/adapter/adapter.d.ts +4 -2
  3. package/lib/document-processing/adapter/adapter.js +1 -1
  4. package/lib/document-processing/adapter/queued-s3-adapter.d.ts +9 -2
  5. package/lib/document-processing/adapter/queued-s3-adapter.js +29 -15
  6. package/lib/document-processing/agentic-document-processing.d.ts +4 -0
  7. package/lib/document-processing/agentic-document-processing.js +20 -10
  8. package/lib/document-processing/base-document-processing.d.ts +54 -2
  9. package/lib/document-processing/base-document-processing.js +136 -82
  10. package/lib/document-processing/bedrock-document-processing.d.ts +202 -2
  11. package/lib/document-processing/bedrock-document-processing.js +717 -77
  12. package/lib/document-processing/chunking-config.d.ts +614 -0
  13. package/lib/document-processing/chunking-config.js +5 -0
  14. package/lib/document-processing/default-document-processing-config.js +1 -1
  15. package/lib/document-processing/index.d.ts +1 -0
  16. package/lib/document-processing/index.js +2 -1
  17. package/lib/document-processing/resources/aggregation/handler.py +567 -0
  18. package/lib/document-processing/resources/aggregation/requirements.txt +7 -0
  19. package/lib/document-processing/resources/aggregation/test_handler.py +362 -0
  20. package/lib/document-processing/resources/cleanup/handler.py +276 -0
  21. package/lib/document-processing/resources/cleanup/requirements.txt +5 -0
  22. package/lib/document-processing/resources/cleanup/test_handler.py +436 -0
  23. package/lib/document-processing/resources/default-bedrock-invoke/index.py +85 -3
  24. package/lib/document-processing/resources/default-bedrock-invoke/test_index.py +622 -0
  25. package/lib/document-processing/resources/pdf-chunking/README.md +313 -0
  26. package/lib/document-processing/resources/pdf-chunking/chunking_strategies.py +460 -0
  27. package/lib/document-processing/resources/pdf-chunking/error_handling.py +491 -0
  28. package/lib/document-processing/resources/pdf-chunking/handler.py +958 -0
  29. package/lib/document-processing/resources/pdf-chunking/metrics.py +435 -0
  30. package/lib/document-processing/resources/pdf-chunking/requirements.txt +3 -0
  31. package/lib/document-processing/resources/pdf-chunking/strategy_selection.py +420 -0
  32. package/lib/document-processing/resources/pdf-chunking/structured_logging.py +457 -0
  33. package/lib/document-processing/resources/pdf-chunking/test_chunking_strategies.py +353 -0
  34. package/lib/document-processing/resources/pdf-chunking/test_error_handling.py +487 -0
  35. package/lib/document-processing/resources/pdf-chunking/test_handler.py +609 -0
  36. package/lib/document-processing/resources/pdf-chunking/test_integration.py +694 -0
  37. package/lib/document-processing/resources/pdf-chunking/test_metrics.py +532 -0
  38. package/lib/document-processing/resources/pdf-chunking/test_strategy_selection.py +471 -0
  39. package/lib/document-processing/resources/pdf-chunking/test_structured_logging.py +449 -0
  40. package/lib/document-processing/resources/pdf-chunking/test_token_estimation.py +374 -0
  41. package/lib/document-processing/resources/pdf-chunking/token_estimation.py +189 -0
  42. package/lib/document-processing/tests/agentic-document-processing-nag.test.js +4 -3
  43. package/lib/document-processing/tests/agentic-document-processing.test.js +488 -4
  44. package/lib/document-processing/tests/base-document-processing-nag.test.js +9 -2
  45. package/lib/document-processing/tests/base-document-processing-schema.test.d.ts +1 -0
  46. package/lib/document-processing/tests/base-document-processing-schema.test.js +337 -0
  47. package/lib/document-processing/tests/base-document-processing.test.js +114 -8
  48. package/lib/document-processing/tests/bedrock-document-processing-chunking-nag.test.d.ts +1 -0
  49. package/lib/document-processing/tests/bedrock-document-processing-chunking-nag.test.js +382 -0
  50. package/lib/document-processing/tests/bedrock-document-processing-nag.test.js +4 -3
  51. package/lib/document-processing/tests/bedrock-document-processing-security.test.d.ts +1 -0
  52. package/lib/document-processing/tests/bedrock-document-processing-security.test.js +389 -0
  53. package/lib/document-processing/tests/bedrock-document-processing.test.js +808 -8
  54. package/lib/document-processing/tests/chunking-config.test.d.ts +1 -0
  55. package/lib/document-processing/tests/chunking-config.test.js +238 -0
  56. package/lib/document-processing/tests/queued-s3-adapter-nag.test.js +9 -2
  57. package/lib/document-processing/tests/queued-s3-adapter.test.js +17 -6
  58. package/lib/framework/agents/base-agent.js +1 -1
  59. package/lib/framework/agents/batch-agent.js +1 -1
  60. package/lib/framework/agents/default-agent-config.js +1 -1
  61. package/lib/framework/bedrock/bedrock.js +1 -1
  62. package/lib/framework/custom-resource/default-runtimes.js +1 -1
  63. package/lib/framework/foundation/access-log.js +1 -1
  64. package/lib/framework/foundation/eventbridge-broker.js +1 -1
  65. package/lib/framework/foundation/network.js +1 -1
  66. package/lib/framework/tests/access-log.test.js +5 -2
  67. package/lib/framework/tests/batch-agent.test.js +5 -2
  68. package/lib/framework/tests/bedrock.test.js +5 -2
  69. package/lib/framework/tests/eventbridge-broker.test.js +5 -2
  70. package/lib/framework/tests/framework-nag.test.js +16 -8
  71. package/lib/framework/tests/network.test.js +9 -4
  72. package/lib/tsconfig.tsbuildinfo +1 -1
  73. package/lib/utilities/data-loader.js +1 -1
  74. package/lib/utilities/lambda-iam-utils.js +1 -1
  75. package/lib/utilities/observability/cloudfront-distribution-observability-property-injector.js +1 -1
  76. package/lib/utilities/observability/default-observability-config.js +1 -1
  77. package/lib/utilities/observability/lambda-observability-property-injector.js +1 -1
  78. package/lib/utilities/observability/log-group-data-protection-utils.js +1 -1
  79. package/lib/utilities/observability/powertools-config.d.ts +10 -1
  80. package/lib/utilities/observability/powertools-config.js +19 -3
  81. package/lib/utilities/observability/state-machine-observability-property-injector.js +1 -1
  82. package/lib/utilities/test-utils.d.ts +43 -0
  83. package/lib/utilities/test-utils.js +56 -0
  84. package/lib/utilities/tests/data-loader-nag.test.js +3 -2
  85. package/lib/utilities/tests/data-loader.test.js +3 -2
  86. package/lib/webapp/frontend-construct.js +1 -1
  87. package/lib/webapp/tests/frontend-construct-nag.test.js +3 -2
  88. package/lib/webapp/tests/frontend-construct.test.js +3 -2
  89. package/package.json +6 -5
  90. package/lib/document-processing/resources/default-error-handler/index.js +0 -46
  91. package/lib/document-processing/resources/default-pdf-processor/index.js +0 -46
  92. package/lib/document-processing/resources/default-pdf-validator/index.js +0 -36
@@ -0,0 +1,471 @@
1
+ """
2
+ Unit tests for strategy selection module.
3
+
4
+ Tests cover all three chunking strategies (fixed-pages, token-based, hybrid)
5
+ and threshold boundary conditions.
6
+ """
7
+
8
+ import unittest
9
+ import logging
10
+ from strategy_selection import (
11
+ check_fixed_pages_threshold,
12
+ check_token_based_threshold,
13
+ check_hybrid_threshold,
14
+ select_strategy_and_check_thresholds,
15
+ StrategySelectionResult
16
+ )
17
+
18
+
19
+ class TestCheckFixedPagesThreshold(unittest.TestCase):
20
+ """Test cases for the check_fixed_pages_threshold function."""
21
+
22
+ def test_below_threshold(self):
23
+ """Test when page count is below threshold."""
24
+ requires_chunking, page_exceeded = check_fixed_pages_threshold(50, 100)
25
+ self.assertFalse(requires_chunking)
26
+ self.assertFalse(page_exceeded)
27
+
28
+ def test_at_threshold(self):
29
+ """Test when page count equals threshold (boundary condition)."""
30
+ requires_chunking, page_exceeded = check_fixed_pages_threshold(100, 100)
31
+ self.assertFalse(requires_chunking)
32
+ self.assertFalse(page_exceeded)
33
+
34
+ def test_above_threshold(self):
35
+ """Test when page count exceeds threshold."""
36
+ requires_chunking, page_exceeded = check_fixed_pages_threshold(101, 100)
37
+ self.assertTrue(requires_chunking)
38
+ self.assertTrue(page_exceeded)
39
+
40
+ def test_significantly_above_threshold(self):
41
+ """Test when page count significantly exceeds threshold."""
42
+ requires_chunking, page_exceeded = check_fixed_pages_threshold(500, 100)
43
+ self.assertTrue(requires_chunking)
44
+ self.assertTrue(page_exceeded)
45
+
46
+ def test_one_page_document(self):
47
+ """Test with single page document."""
48
+ requires_chunking, page_exceeded = check_fixed_pages_threshold(1, 100)
49
+ self.assertFalse(requires_chunking)
50
+ self.assertFalse(page_exceeded)
51
+
52
+ def test_custom_threshold(self):
53
+ """Test with custom threshold value."""
54
+ requires_chunking, page_exceeded = check_fixed_pages_threshold(60, 50)
55
+ self.assertTrue(requires_chunking)
56
+ self.assertTrue(page_exceeded)
57
+
58
+
59
+ class TestCheckTokenBasedThreshold(unittest.TestCase):
60
+ """Test cases for the check_token_based_threshold function."""
61
+
62
+ def test_below_threshold(self):
63
+ """Test when token count is below threshold."""
64
+ requires_chunking, token_exceeded = check_token_based_threshold(100000, 150000)
65
+ self.assertFalse(requires_chunking)
66
+ self.assertFalse(token_exceeded)
67
+
68
+ def test_at_threshold(self):
69
+ """Test when token count equals threshold (boundary condition)."""
70
+ requires_chunking, token_exceeded = check_token_based_threshold(150000, 150000)
71
+ self.assertFalse(requires_chunking)
72
+ self.assertFalse(token_exceeded)
73
+
74
+ def test_above_threshold(self):
75
+ """Test when token count exceeds threshold."""
76
+ requires_chunking, token_exceeded = check_token_based_threshold(150001, 150000)
77
+ self.assertTrue(requires_chunking)
78
+ self.assertTrue(token_exceeded)
79
+
80
+ def test_significantly_above_threshold(self):
81
+ """Test when token count significantly exceeds threshold."""
82
+ requires_chunking, token_exceeded = check_token_based_threshold(500000, 150000)
83
+ self.assertTrue(requires_chunking)
84
+ self.assertTrue(token_exceeded)
85
+
86
+ def test_small_document(self):
87
+ """Test with small document (few tokens)."""
88
+ requires_chunking, token_exceeded = check_token_based_threshold(1000, 150000)
89
+ self.assertFalse(requires_chunking)
90
+ self.assertFalse(token_exceeded)
91
+
92
+ def test_custom_threshold(self):
93
+ """Test with custom threshold value."""
94
+ requires_chunking, token_exceeded = check_token_based_threshold(80000, 50000)
95
+ self.assertTrue(requires_chunking)
96
+ self.assertTrue(token_exceeded)
97
+
98
+
99
+ class TestCheckHybridThreshold(unittest.TestCase):
100
+ """Test cases for the check_hybrid_threshold function."""
101
+
102
+ def test_both_below_threshold(self):
103
+ """Test when both page and token counts are below thresholds."""
104
+ requires_chunking, page_exceeded, token_exceeded = check_hybrid_threshold(
105
+ 50, 100000, 100, 150000
106
+ )
107
+ self.assertFalse(requires_chunking)
108
+ self.assertFalse(page_exceeded)
109
+ self.assertFalse(token_exceeded)
110
+
111
+ def test_page_exceeds_only(self):
112
+ """Test when only page count exceeds threshold."""
113
+ requires_chunking, page_exceeded, token_exceeded = check_hybrid_threshold(
114
+ 150, 100000, 100, 150000
115
+ )
116
+ self.assertTrue(requires_chunking)
117
+ self.assertTrue(page_exceeded)
118
+ self.assertFalse(token_exceeded)
119
+
120
+ def test_token_exceeds_only(self):
121
+ """Test when only token count exceeds threshold."""
122
+ requires_chunking, page_exceeded, token_exceeded = check_hybrid_threshold(
123
+ 50, 200000, 100, 150000
124
+ )
125
+ self.assertTrue(requires_chunking)
126
+ self.assertFalse(page_exceeded)
127
+ self.assertTrue(token_exceeded)
128
+
129
+ def test_both_exceed_threshold(self):
130
+ """Test when both page and token counts exceed thresholds."""
131
+ requires_chunking, page_exceeded, token_exceeded = check_hybrid_threshold(
132
+ 150, 200000, 100, 150000
133
+ )
134
+ self.assertTrue(requires_chunking)
135
+ self.assertTrue(page_exceeded)
136
+ self.assertTrue(token_exceeded)
137
+
138
+ def test_at_page_threshold(self):
139
+ """Test when page count equals threshold (boundary condition)."""
140
+ requires_chunking, page_exceeded, token_exceeded = check_hybrid_threshold(
141
+ 100, 100000, 100, 150000
142
+ )
143
+ self.assertFalse(requires_chunking)
144
+ self.assertFalse(page_exceeded)
145
+ self.assertFalse(token_exceeded)
146
+
147
+ def test_at_token_threshold(self):
148
+ """Test when token count equals threshold (boundary condition)."""
149
+ requires_chunking, page_exceeded, token_exceeded = check_hybrid_threshold(
150
+ 50, 150000, 100, 150000
151
+ )
152
+ self.assertFalse(requires_chunking)
153
+ self.assertFalse(page_exceeded)
154
+ self.assertFalse(token_exceeded)
155
+
156
+ def test_one_above_page_threshold(self):
157
+ """Test when page count is one above threshold."""
158
+ requires_chunking, page_exceeded, token_exceeded = check_hybrid_threshold(
159
+ 101, 100000, 100, 150000
160
+ )
161
+ self.assertTrue(requires_chunking)
162
+ self.assertTrue(page_exceeded)
163
+ self.assertFalse(token_exceeded)
164
+
165
+ def test_one_above_token_threshold(self):
166
+ """Test when token count is one above threshold."""
167
+ requires_chunking, page_exceeded, token_exceeded = check_hybrid_threshold(
168
+ 50, 150001, 100, 150000
169
+ )
170
+ self.assertTrue(requires_chunking)
171
+ self.assertFalse(page_exceeded)
172
+ self.assertTrue(token_exceeded)
173
+
174
+
175
+ class TestSelectStrategyAndCheckThresholds(unittest.TestCase):
176
+ """Test cases for the select_strategy_and_check_thresholds function."""
177
+
178
+ def test_fixed_pages_strategy_no_chunking(self):
179
+ """Test fixed-pages strategy when no chunking is needed."""
180
+ result = select_strategy_and_check_thresholds(
181
+ total_pages=50,
182
+ total_tokens=200000, # High tokens, but fixed-pages ignores this
183
+ config={'strategy': 'fixed-pages', 'pageThreshold': 100}
184
+ )
185
+ self.assertFalse(result.requires_chunking)
186
+ self.assertEqual(result.strategy, 'fixed-pages')
187
+ self.assertFalse(result.page_threshold_exceeded)
188
+ self.assertFalse(result.token_threshold_exceeded) # Not checked
189
+ self.assertIn('50 pages', result.reason)
190
+ self.assertIn('below threshold', result.reason)
191
+
192
+ def test_fixed_pages_strategy_chunking_required(self):
193
+ """Test fixed-pages strategy when chunking is required."""
194
+ result = select_strategy_and_check_thresholds(
195
+ total_pages=150,
196
+ total_tokens=50000, # Low tokens, but fixed-pages ignores this
197
+ config={'strategy': 'fixed-pages', 'pageThreshold': 100}
198
+ )
199
+ self.assertTrue(result.requires_chunking)
200
+ self.assertEqual(result.strategy, 'fixed-pages')
201
+ self.assertTrue(result.page_threshold_exceeded)
202
+ self.assertIn('150 pages', result.reason)
203
+ self.assertIn('exceeding threshold', result.reason)
204
+
205
+ def test_token_based_strategy_no_chunking(self):
206
+ """Test token-based strategy when no chunking is needed."""
207
+ result = select_strategy_and_check_thresholds(
208
+ total_pages=200, # High pages, but token-based ignores this
209
+ total_tokens=100000,
210
+ config={'strategy': 'token-based', 'tokenThreshold': 150000}
211
+ )
212
+ self.assertFalse(result.requires_chunking)
213
+ self.assertEqual(result.strategy, 'token-based')
214
+ self.assertFalse(result.page_threshold_exceeded) # Not checked
215
+ self.assertFalse(result.token_threshold_exceeded)
216
+ self.assertIn('100,000 tokens', result.reason)
217
+ self.assertIn('below threshold', result.reason)
218
+
219
+ def test_token_based_strategy_chunking_required(self):
220
+ """Test token-based strategy when chunking is required."""
221
+ result = select_strategy_and_check_thresholds(
222
+ total_pages=50, # Low pages, but token-based ignores this
223
+ total_tokens=200000,
224
+ config={'strategy': 'token-based', 'tokenThreshold': 150000}
225
+ )
226
+ self.assertTrue(result.requires_chunking)
227
+ self.assertEqual(result.strategy, 'token-based')
228
+ self.assertTrue(result.token_threshold_exceeded)
229
+ self.assertIn('200,000 tokens', result.reason)
230
+ self.assertIn('exceeding threshold', result.reason)
231
+
232
+ def test_hybrid_strategy_no_chunking(self):
233
+ """Test hybrid strategy when no chunking is needed."""
234
+ result = select_strategy_and_check_thresholds(
235
+ total_pages=50,
236
+ total_tokens=100000,
237
+ config={'strategy': 'hybrid', 'pageThreshold': 100, 'tokenThreshold': 150000}
238
+ )
239
+ self.assertFalse(result.requires_chunking)
240
+ self.assertEqual(result.strategy, 'hybrid')
241
+ self.assertFalse(result.page_threshold_exceeded)
242
+ self.assertFalse(result.token_threshold_exceeded)
243
+ self.assertIn('below thresholds', result.reason)
244
+
245
+ def test_hybrid_strategy_page_exceeds(self):
246
+ """Test hybrid strategy when page threshold is exceeded."""
247
+ result = select_strategy_and_check_thresholds(
248
+ total_pages=150,
249
+ total_tokens=100000,
250
+ config={'strategy': 'hybrid', 'pageThreshold': 100, 'tokenThreshold': 150000}
251
+ )
252
+ self.assertTrue(result.requires_chunking)
253
+ self.assertEqual(result.strategy, 'hybrid')
254
+ self.assertTrue(result.page_threshold_exceeded)
255
+ self.assertFalse(result.token_threshold_exceeded)
256
+ self.assertIn('150 pages', result.reason)
257
+ self.assertIn('exceeding threshold', result.reason)
258
+
259
+ def test_hybrid_strategy_token_exceeds(self):
260
+ """Test hybrid strategy when token threshold is exceeded."""
261
+ result = select_strategy_and_check_thresholds(
262
+ total_pages=50,
263
+ total_tokens=200000,
264
+ config={'strategy': 'hybrid', 'pageThreshold': 100, 'tokenThreshold': 150000}
265
+ )
266
+ self.assertTrue(result.requires_chunking)
267
+ self.assertEqual(result.strategy, 'hybrid')
268
+ self.assertFalse(result.page_threshold_exceeded)
269
+ self.assertTrue(result.token_threshold_exceeded)
270
+ self.assertIn('200,000 tokens', result.reason)
271
+ self.assertIn('exceeding threshold', result.reason)
272
+
273
+ def test_hybrid_strategy_both_exceed(self):
274
+ """Test hybrid strategy when both thresholds are exceeded."""
275
+ result = select_strategy_and_check_thresholds(
276
+ total_pages=150,
277
+ total_tokens=200000,
278
+ config={'strategy': 'hybrid', 'pageThreshold': 100, 'tokenThreshold': 150000}
279
+ )
280
+ self.assertTrue(result.requires_chunking)
281
+ self.assertEqual(result.strategy, 'hybrid')
282
+ self.assertTrue(result.page_threshold_exceeded)
283
+ self.assertTrue(result.token_threshold_exceeded)
284
+ self.assertIn('both thresholds exceeded', result.reason)
285
+
286
+ def test_default_strategy_is_hybrid(self):
287
+ """Test that default strategy is hybrid when not specified."""
288
+ result = select_strategy_and_check_thresholds(
289
+ total_pages=50,
290
+ total_tokens=100000
291
+ )
292
+ self.assertEqual(result.strategy, 'hybrid')
293
+
294
+ def test_default_thresholds(self):
295
+ """Test that default thresholds are applied."""
296
+ result = select_strategy_and_check_thresholds(
297
+ total_pages=50,
298
+ total_tokens=100000
299
+ )
300
+ self.assertEqual(result.page_threshold, 100)
301
+ self.assertEqual(result.token_threshold, 150000)
302
+
303
+ def test_custom_thresholds(self):
304
+ """Test with custom threshold values."""
305
+ result = select_strategy_and_check_thresholds(
306
+ total_pages=60,
307
+ total_tokens=80000,
308
+ config={'pageThreshold': 50, 'tokenThreshold': 100000}
309
+ )
310
+ self.assertTrue(result.requires_chunking)
311
+ self.assertEqual(result.page_threshold, 50)
312
+ self.assertEqual(result.token_threshold, 100000)
313
+
314
+ def test_result_to_dict(self):
315
+ """Test that result can be converted to dictionary."""
316
+ result = select_strategy_and_check_thresholds(
317
+ total_pages=50,
318
+ total_tokens=100000
319
+ )
320
+ result_dict = result.to_dict()
321
+
322
+ self.assertIn('requires_chunking', result_dict)
323
+ self.assertIn('strategy', result_dict)
324
+ self.assertIn('reason', result_dict)
325
+ self.assertIn('document_pages', result_dict)
326
+ self.assertIn('document_tokens', result_dict)
327
+ self.assertIn('page_threshold', result_dict)
328
+ self.assertIn('token_threshold', result_dict)
329
+ self.assertIn('page_threshold_exceeded', result_dict)
330
+ self.assertIn('token_threshold_exceeded', result_dict)
331
+
332
+ def test_camel_case_config_keys(self):
333
+ """Test that camelCase config keys are supported."""
334
+ result = select_strategy_and_check_thresholds(
335
+ total_pages=150,
336
+ total_tokens=100000,
337
+ config={'chunkingStrategy': 'fixed-pages', 'pageThreshold': 100}
338
+ )
339
+ self.assertEqual(result.strategy, 'fixed-pages')
340
+ self.assertTrue(result.requires_chunking)
341
+
342
+ def test_snake_case_config_keys(self):
343
+ """Test that snake_case config keys are supported."""
344
+ result = select_strategy_and_check_thresholds(
345
+ total_pages=150,
346
+ total_tokens=100000,
347
+ config={'strategy': 'fixed-pages', 'page_threshold': 100}
348
+ )
349
+ self.assertEqual(result.strategy, 'fixed-pages')
350
+ self.assertTrue(result.requires_chunking)
351
+
352
+
353
+ class TestStrategySelectionResult(unittest.TestCase):
354
+ """Test cases for the StrategySelectionResult class."""
355
+
356
+ def test_result_attributes(self):
357
+ """Test that result has all expected attributes."""
358
+ result = StrategySelectionResult(
359
+ requires_chunking=True,
360
+ strategy='hybrid',
361
+ reason='Test reason',
362
+ document_pages=150,
363
+ document_tokens=200000,
364
+ page_threshold=100,
365
+ token_threshold=150000,
366
+ page_threshold_exceeded=True,
367
+ token_threshold_exceeded=True
368
+ )
369
+
370
+ self.assertTrue(result.requires_chunking)
371
+ self.assertEqual(result.strategy, 'hybrid')
372
+ self.assertEqual(result.reason, 'Test reason')
373
+ self.assertEqual(result.document_pages, 150)
374
+ self.assertEqual(result.document_tokens, 200000)
375
+ self.assertEqual(result.page_threshold, 100)
376
+ self.assertEqual(result.token_threshold, 150000)
377
+ self.assertTrue(result.page_threshold_exceeded)
378
+ self.assertTrue(result.token_threshold_exceeded)
379
+
380
+ def test_to_dict_returns_all_fields(self):
381
+ """Test that to_dict returns all fields."""
382
+ result = StrategySelectionResult(
383
+ requires_chunking=False,
384
+ strategy='token-based',
385
+ reason='Below threshold',
386
+ document_pages=50,
387
+ document_tokens=100000,
388
+ page_threshold=100,
389
+ token_threshold=150000,
390
+ page_threshold_exceeded=False,
391
+ token_threshold_exceeded=False
392
+ )
393
+
394
+ result_dict = result.to_dict()
395
+
396
+ self.assertEqual(len(result_dict), 9)
397
+ self.assertFalse(result_dict['requires_chunking'])
398
+ self.assertEqual(result_dict['strategy'], 'token-based')
399
+ self.assertEqual(result_dict['reason'], 'Below threshold')
400
+ self.assertEqual(result_dict['document_pages'], 50)
401
+ self.assertEqual(result_dict['document_tokens'], 100000)
402
+
403
+
404
+ class TestBoundaryConditions(unittest.TestCase):
405
+ """Test boundary conditions for all strategies."""
406
+
407
+ def test_fixed_pages_boundary_at_threshold(self):
408
+ """Test fixed-pages at exact threshold boundary."""
409
+ # At threshold - should NOT require chunking
410
+ result = select_strategy_and_check_thresholds(
411
+ total_pages=100,
412
+ total_tokens=100000,
413
+ config={'strategy': 'fixed-pages', 'pageThreshold': 100}
414
+ )
415
+ self.assertFalse(result.requires_chunking)
416
+
417
+ # One above threshold - should require chunking
418
+ result = select_strategy_and_check_thresholds(
419
+ total_pages=101,
420
+ total_tokens=100000,
421
+ config={'strategy': 'fixed-pages', 'pageThreshold': 100}
422
+ )
423
+ self.assertTrue(result.requires_chunking)
424
+
425
+ def test_token_based_boundary_at_threshold(self):
426
+ """Test token-based at exact threshold boundary."""
427
+ # At threshold - should NOT require chunking
428
+ result = select_strategy_and_check_thresholds(
429
+ total_pages=50,
430
+ total_tokens=150000,
431
+ config={'strategy': 'token-based', 'tokenThreshold': 150000}
432
+ )
433
+ self.assertFalse(result.requires_chunking)
434
+
435
+ # One above threshold - should require chunking
436
+ result = select_strategy_and_check_thresholds(
437
+ total_pages=50,
438
+ total_tokens=150001,
439
+ config={'strategy': 'token-based', 'tokenThreshold': 150000}
440
+ )
441
+ self.assertTrue(result.requires_chunking)
442
+
443
+ def test_hybrid_boundary_at_both_thresholds(self):
444
+ """Test hybrid at exact threshold boundaries."""
445
+ # Both at threshold - should NOT require chunking
446
+ result = select_strategy_and_check_thresholds(
447
+ total_pages=100,
448
+ total_tokens=150000,
449
+ config={'strategy': 'hybrid', 'pageThreshold': 100, 'tokenThreshold': 150000}
450
+ )
451
+ self.assertFalse(result.requires_chunking)
452
+
453
+ # Page one above, token at threshold - should require chunking
454
+ result = select_strategy_and_check_thresholds(
455
+ total_pages=101,
456
+ total_tokens=150000,
457
+ config={'strategy': 'hybrid', 'pageThreshold': 100, 'tokenThreshold': 150000}
458
+ )
459
+ self.assertTrue(result.requires_chunking)
460
+
461
+ # Page at threshold, token one above - should require chunking
462
+ result = select_strategy_and_check_thresholds(
463
+ total_pages=100,
464
+ total_tokens=150001,
465
+ config={'strategy': 'hybrid', 'pageThreshold': 100, 'tokenThreshold': 150000}
466
+ )
467
+ self.assertTrue(result.requires_chunking)
468
+
469
+
470
+ if __name__ == '__main__':
471
+ unittest.main()