local-deep-research 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. local_deep_research/__init__.py +7 -0
  2. local_deep_research/__version__.py +1 -1
  3. local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
  4. local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
  5. local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
  6. local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
  7. local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
  8. local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
  9. local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
  10. local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
  11. local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
  12. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
  13. local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
  14. local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
  15. local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
  16. local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
  17. local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
  18. local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
  19. local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
  20. local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
  21. local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
  22. local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
  23. local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
  24. local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
  25. local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
  26. local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
  27. local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
  28. local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
  29. local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
  30. local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
  31. local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
  32. local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
  33. local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
  34. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
  35. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
  36. local_deep_research/advanced_search_system/findings/repository.py +54 -17
  37. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
  38. local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
  39. local_deep_research/advanced_search_system/questions/__init__.py +16 -0
  40. local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
  41. local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
  42. local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
  43. local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
  44. local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
  45. local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
  46. local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
  47. local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
  48. local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
  49. local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
  50. local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
  51. local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
  52. local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
  53. local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
  54. local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
  55. local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
  56. local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
  57. local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
  58. local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
  59. local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
  60. local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
  61. local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
  62. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
  63. local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
  64. local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
  65. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
  66. local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
  67. local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
  68. local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
  69. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
  70. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
  71. local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
  72. local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
  73. local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
  74. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
  75. local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
  76. local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
  77. local_deep_research/api/benchmark_functions.py +6 -2
  78. local_deep_research/api/research_functions.py +10 -4
  79. local_deep_research/benchmarks/__init__.py +9 -7
  80. local_deep_research/benchmarks/benchmark_functions.py +6 -2
  81. local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
  82. local_deep_research/benchmarks/cli.py +38 -13
  83. local_deep_research/benchmarks/comparison/__init__.py +4 -2
  84. local_deep_research/benchmarks/comparison/evaluator.py +316 -239
  85. local_deep_research/benchmarks/datasets/__init__.py +1 -1
  86. local_deep_research/benchmarks/datasets/base.py +91 -72
  87. local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
  88. local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
  89. local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
  90. local_deep_research/benchmarks/datasets/utils.py +48 -29
  91. local_deep_research/benchmarks/datasets.py +4 -11
  92. local_deep_research/benchmarks/efficiency/__init__.py +8 -4
  93. local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
  94. local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
  95. local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
  96. local_deep_research/benchmarks/evaluators/composite.py +6 -2
  97. local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
  98. local_deep_research/benchmarks/graders.py +32 -10
  99. local_deep_research/benchmarks/metrics/README.md +1 -1
  100. local_deep_research/benchmarks/metrics/calculation.py +25 -10
  101. local_deep_research/benchmarks/metrics/reporting.py +7 -3
  102. local_deep_research/benchmarks/metrics/visualization.py +42 -23
  103. local_deep_research/benchmarks/metrics.py +1 -1
  104. local_deep_research/benchmarks/optimization/__init__.py +3 -1
  105. local_deep_research/benchmarks/optimization/api.py +7 -1
  106. local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
  107. local_deep_research/benchmarks/runners.py +48 -15
  108. local_deep_research/citation_handler.py +65 -92
  109. local_deep_research/citation_handlers/__init__.py +15 -0
  110. local_deep_research/citation_handlers/base_citation_handler.py +70 -0
  111. local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
  112. local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
  113. local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
  114. local_deep_research/config/llm_config.py +271 -169
  115. local_deep_research/config/search_config.py +14 -5
  116. local_deep_research/defaults/__init__.py +0 -1
  117. local_deep_research/metrics/__init__.py +13 -0
  118. local_deep_research/metrics/database.py +58 -0
  119. local_deep_research/metrics/db_models.py +115 -0
  120. local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
  121. local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
  122. local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
  123. local_deep_research/metrics/migrate_research_ratings.py +31 -0
  124. local_deep_research/metrics/models.py +61 -0
  125. local_deep_research/metrics/pricing/__init__.py +12 -0
  126. local_deep_research/metrics/pricing/cost_calculator.py +237 -0
  127. local_deep_research/metrics/pricing/pricing_cache.py +143 -0
  128. local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
  129. local_deep_research/metrics/query_utils.py +51 -0
  130. local_deep_research/metrics/search_tracker.py +380 -0
  131. local_deep_research/metrics/token_counter.py +1078 -0
  132. local_deep_research/migrate_db.py +3 -1
  133. local_deep_research/report_generator.py +22 -8
  134. local_deep_research/search_system.py +390 -9
  135. local_deep_research/test_migration.py +15 -5
  136. local_deep_research/utilities/db_utils.py +7 -4
  137. local_deep_research/utilities/es_utils.py +115 -104
  138. local_deep_research/utilities/llm_utils.py +15 -5
  139. local_deep_research/utilities/log_utils.py +151 -0
  140. local_deep_research/utilities/search_cache.py +387 -0
  141. local_deep_research/utilities/search_utilities.py +14 -6
  142. local_deep_research/utilities/threading_utils.py +92 -0
  143. local_deep_research/utilities/url_utils.py +6 -0
  144. local_deep_research/web/api.py +347 -0
  145. local_deep_research/web/app.py +13 -17
  146. local_deep_research/web/app_factory.py +71 -66
  147. local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
  148. local_deep_research/web/database/migrations.py +5 -3
  149. local_deep_research/web/database/models.py +51 -2
  150. local_deep_research/web/database/schema_upgrade.py +49 -29
  151. local_deep_research/web/models/database.py +51 -61
  152. local_deep_research/web/routes/api_routes.py +56 -22
  153. local_deep_research/web/routes/benchmark_routes.py +4 -1
  154. local_deep_research/web/routes/globals.py +22 -0
  155. local_deep_research/web/routes/history_routes.py +71 -46
  156. local_deep_research/web/routes/metrics_routes.py +1155 -0
  157. local_deep_research/web/routes/research_routes.py +227 -41
  158. local_deep_research/web/routes/settings_routes.py +156 -55
  159. local_deep_research/web/services/research_service.py +310 -103
  160. local_deep_research/web/services/resource_service.py +36 -11
  161. local_deep_research/web/services/settings_manager.py +55 -17
  162. local_deep_research/web/services/settings_service.py +12 -4
  163. local_deep_research/web/services/socket_service.py +295 -188
  164. local_deep_research/web/static/css/custom_dropdown.css +180 -0
  165. local_deep_research/web/static/css/styles.css +39 -1
  166. local_deep_research/web/static/js/components/detail.js +633 -267
  167. local_deep_research/web/static/js/components/details.js +751 -0
  168. local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
  169. local_deep_research/web/static/js/components/fallback/ui.js +23 -23
  170. local_deep_research/web/static/js/components/history.js +76 -76
  171. local_deep_research/web/static/js/components/logpanel.js +61 -13
  172. local_deep_research/web/static/js/components/progress.js +13 -2
  173. local_deep_research/web/static/js/components/research.js +99 -12
  174. local_deep_research/web/static/js/components/results.js +239 -106
  175. local_deep_research/web/static/js/main.js +40 -40
  176. local_deep_research/web/static/js/services/audio.js +1 -1
  177. local_deep_research/web/static/js/services/formatting.js +11 -11
  178. local_deep_research/web/static/js/services/keyboard.js +157 -0
  179. local_deep_research/web/static/js/services/pdf.js +80 -80
  180. local_deep_research/web/static/sounds/README.md +1 -1
  181. local_deep_research/web/templates/base.html +1 -0
  182. local_deep_research/web/templates/components/log_panel.html +7 -1
  183. local_deep_research/web/templates/components/mobile_nav.html +1 -1
  184. local_deep_research/web/templates/components/sidebar.html +3 -0
  185. local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
  186. local_deep_research/web/templates/pages/details.html +325 -24
  187. local_deep_research/web/templates/pages/history.html +1 -1
  188. local_deep_research/web/templates/pages/metrics.html +1929 -0
  189. local_deep_research/web/templates/pages/progress.html +2 -2
  190. local_deep_research/web/templates/pages/research.html +53 -17
  191. local_deep_research/web/templates/pages/results.html +12 -1
  192. local_deep_research/web/templates/pages/star_reviews.html +803 -0
  193. local_deep_research/web/utils/formatters.py +9 -3
  194. local_deep_research/web_search_engines/default_search_engines.py +5 -3
  195. local_deep_research/web_search_engines/engines/full_search.py +8 -2
  196. local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
  197. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
  198. local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
  199. local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
  200. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
  201. local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
  202. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
  203. local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
  204. local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
  205. local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
  206. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
  207. local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
  208. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
  209. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
  210. local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
  211. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
  212. local_deep_research/web_search_engines/search_engine_base.py +83 -35
  213. local_deep_research/web_search_engines/search_engine_factory.py +25 -8
  214. local_deep_research/web_search_engines/search_engines_config.py +9 -3
  215. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/METADATA +7 -1
  216. local_deep_research-0.5.0.dist-info/RECORD +265 -0
  217. local_deep_research-0.4.4.dist-info/RECORD +0 -177
  218. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/WHEEL +0 -0
  219. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/entry_points.txt +0 -0
  220. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,503 @@
1
+ """
2
+ Intelligent Constraint Relaxation Strategy
3
+
4
+ This module implements progressive constraint relaxation to improve BrowseComp
5
+ performance when strict constraint matching fails.
6
+
7
+ Based on BROWSECOMP_IMPROVEMENT_STRATEGY.md recommendations for handling
8
+ complex multi-constraint queries that may not have perfect matches.
9
+ """
10
+
11
+ import logging
12
+ from typing import Dict, List
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class IntelligentConstraintRelaxer:
18
+ """
19
+ Progressive constraint relaxation based on search results and constraint reliability.
20
+
21
+ Features:
22
+ 1. Maintains essential identifying constraints
23
+ 2. Relaxes problematic constraint types first
24
+ 3. Creates multiple search attempts with different constraint sets
25
+ 4. Preserves constraint importance hierarchy
26
+ """
27
+
28
+ def __init__(self):
29
+ # Constraint priorities (higher = more important, never relax)
30
+ self.constraint_priorities = {
31
+ "NAME_PATTERN": 10, # Never relax - essential for identification
32
+ "EXISTENCE": 9, # Rarely relax - basic entity existence
33
+ "LOCATION": 8, # Usually important for identification
34
+ "TEMPORAL": 7, # Dates often crucial but sometimes fuzzy
35
+ "PROPERTY": 6, # Basic properties, moderately important
36
+ "EVENT": 5, # Events can be important but sometimes optional
37
+ "STATISTIC": 3, # Often relax - numbers frequently imprecise
38
+ "COMPARISON": 1, # Frequently relax - relative comparisons problematic
39
+ "RELATIONSHIP": 2, # Often problematic due to complexity
40
+ }
41
+
42
+ # Minimum constraints to keep for meaningful search
43
+ self.min_constraints = 2
44
+
45
+ # Constraint relaxation strategies by type
46
+ self.relaxation_strategies = {
47
+ "STATISTIC": self._relax_statistical_constraint,
48
+ "COMPARISON": self._relax_comparison_constraint,
49
+ "TEMPORAL": self._relax_temporal_constraint,
50
+ "PROPERTY": self._relax_property_constraint,
51
+ }
52
+
53
+ def relax_constraints_progressively(
54
+ self,
55
+ constraints: List[object],
56
+ candidates_found: List[object],
57
+ target_candidates: int = 5,
58
+ ) -> List[List[object]]:
59
+ """
60
+ Generate progressive constraint relaxation sets based on search results.
61
+
62
+ Args:
63
+ constraints: Original constraint list
64
+ candidates_found: Current candidates found
65
+ target_candidates: Target number of candidates to find
66
+
67
+ Returns:
68
+ List of relaxed constraint sets to try
69
+ """
70
+ if len(candidates_found) >= target_candidates:
71
+ logger.debug("Sufficient candidates found, no relaxation needed")
72
+ return [constraints] # No relaxation needed
73
+
74
+ logger.info(
75
+ f"Only {len(candidates_found)} candidates found, generating relaxation strategies"
76
+ )
77
+
78
+ # Sort constraints by relaxation priority (lowest first)
79
+ relaxable_constraints = sorted(
80
+ constraints,
81
+ key=lambda c: self.constraint_priorities.get(
82
+ self._get_constraint_type(c), 5
83
+ ),
84
+ )
85
+
86
+ relaxed_sets = []
87
+
88
+ # Strategy 1: Remove least important constraints progressively
89
+ for i in range(1, min(len(constraints), 4)): # Max 3 relaxation levels
90
+ relaxed_set = relaxable_constraints[
91
+ :-i
92
+ ] # Remove i lowest priority constraints
93
+
94
+ if len(relaxed_set) >= self.min_constraints:
95
+ relaxed_sets.append(relaxed_set)
96
+ logger.debug(
97
+ f"Relaxation level {i}: Removed {i} constraints, {len(relaxed_set)} remaining"
98
+ )
99
+
100
+ # Strategy 2: Create constraint variations for difficult constraints
101
+ variation_sets = self._create_constraint_variations(constraints)
102
+ relaxed_sets.extend(variation_sets)
103
+
104
+ # Strategy 3: Keep only high-priority constraints
105
+ high_priority_constraints = [
106
+ c
107
+ for c in constraints
108
+ if self.constraint_priorities.get(self._get_constraint_type(c), 5)
109
+ >= 7
110
+ ]
111
+
112
+ if len(high_priority_constraints) >= self.min_constraints:
113
+ relaxed_sets.append(high_priority_constraints)
114
+ logger.debug(
115
+ f"High-priority only: {len(high_priority_constraints)} constraints"
116
+ )
117
+
118
+ # Remove duplicates while preserving order
119
+ unique_sets = []
120
+ seen_sets = set()
121
+
122
+ for constraint_set in relaxed_sets:
123
+ # Create a hashable representation
124
+ set_signature = tuple(sorted(str(c) for c in constraint_set))
125
+ if set_signature not in seen_sets:
126
+ seen_sets.add(set_signature)
127
+ unique_sets.append(constraint_set)
128
+
129
+ logger.info(
130
+ f"Generated {len(unique_sets)} unique relaxation strategies"
131
+ )
132
+ return unique_sets
133
+
134
+ def _create_constraint_variations(
135
+ self, constraints: List[object]
136
+ ) -> List[List[object]]:
137
+ """
138
+ Create variations of difficult constraints to improve matching.
139
+
140
+ Args:
141
+ constraints: Original constraints
142
+
143
+ Returns:
144
+ List of constraint sets with variations
145
+ """
146
+ variation_sets = []
147
+
148
+ for i, constraint in enumerate(constraints):
149
+ constraint_type = self._get_constraint_type(constraint)
150
+
151
+ if constraint_type in self.relaxation_strategies:
152
+ # Create variations for this constraint
153
+ variations = self.relaxation_strategies[constraint_type](
154
+ constraint
155
+ )
156
+
157
+ if variations:
158
+ # Replace original constraint with each variation
159
+ for variation in variations:
160
+ new_set = constraints.copy()
161
+ new_set[i] = variation
162
+ variation_sets.append(new_set)
163
+
164
+ return variation_sets
165
+
166
+ def _relax_statistical_constraint(self, constraint: object) -> List[object]:
167
+ """
168
+ Create relaxed variations of statistical constraints.
169
+
170
+ Statistical constraints often fail due to:
171
+ - Outdated numbers
172
+ - Rounding differences
173
+ - Different measurement units
174
+ """
175
+ variations = []
176
+ constraint_text = str(constraint)
177
+
178
+ # Extract numbers from constraint
179
+ import re
180
+
181
+ numbers = re.findall(r"\d+(?:\.\d+)?", constraint_text)
182
+
183
+ for number_str in numbers:
184
+ try:
185
+ number = float(number_str)
186
+
187
+ # Create range variations (+/- 10%, 20%, 50%)
188
+ for tolerance in [0.1, 0.2, 0.5]:
189
+ lower = number * (1 - tolerance)
190
+ upper = number * (1 + tolerance)
191
+
192
+ # Replace exact number with range
193
+ relaxed_text = constraint_text.replace(
194
+ number_str, f"between {lower:.0f} and {upper:.0f}"
195
+ )
196
+
197
+ variations.append(
198
+ self._create_relaxed_constraint(
199
+ constraint, relaxed_text
200
+ )
201
+ )
202
+
203
+ # Create "approximately" version
204
+ approx_text = constraint_text.replace(
205
+ number_str, f"approximately {number_str}"
206
+ )
207
+ variations.append(
208
+ self._create_relaxed_constraint(constraint, approx_text)
209
+ )
210
+
211
+ except ValueError:
212
+ continue
213
+
214
+ return variations[:3] # Limit to avoid too many variations
215
+
216
+ def _relax_comparison_constraint(self, constraint: object) -> List[object]:
217
+ """
218
+ Create relaxed variations of comparison constraints.
219
+
220
+ Comparison constraints often fail due to:
221
+ - Relative terms are context-dependent
222
+ - "Times more" calculations are complex
223
+ - Baseline comparisons may be unclear
224
+ """
225
+ variations = []
226
+ constraint_text = str(constraint).lower()
227
+
228
+ # Replace strict comparisons with looser ones
229
+ relaxation_mappings = {
230
+ "times more": "significantly more",
231
+ "times larger": "much larger",
232
+ "times bigger": "much bigger",
233
+ "exactly": "approximately",
234
+ "must be": "should be",
235
+ "is the": "is among the",
236
+ "largest": "one of the largest",
237
+ "smallest": "one of the smallest",
238
+ "highest": "among the highest",
239
+ "lowest": "among the lowest",
240
+ }
241
+
242
+ for strict_term, relaxed_term in relaxation_mappings.items():
243
+ if strict_term in constraint_text:
244
+ relaxed_text = constraint_text.replace(
245
+ strict_term, relaxed_term
246
+ )
247
+ variations.append(
248
+ self._create_relaxed_constraint(constraint, relaxed_text)
249
+ )
250
+
251
+ # Remove comparison altogether - focus on the main entity/property
252
+ comparison_indicators = [
253
+ "more than",
254
+ "less than",
255
+ "compared to",
256
+ "relative to",
257
+ ]
258
+ for indicator in comparison_indicators:
259
+ if indicator in constraint_text:
260
+ # Extract the part before the comparison
261
+ parts = constraint_text.split(indicator)
262
+ if len(parts) > 1:
263
+ main_part = parts[0].strip()
264
+ variations.append(
265
+ self._create_relaxed_constraint(constraint, main_part)
266
+ )
267
+
268
+ return variations[:3]
269
+
270
+ def _relax_temporal_constraint(self, constraint: object) -> List[object]:
271
+ """
272
+ Create relaxed variations of temporal constraints.
273
+
274
+ Temporal constraints often fail due to:
275
+ - Exact dates vs approximate dates
276
+ - Different calendar systems
277
+ - Founding vs incorporation dates
278
+ """
279
+ variations = []
280
+ constraint_text = str(constraint)
281
+
282
+ # Extract years
283
+ import re
284
+
285
+ years = re.findall(r"\b(19\d{2}|20\d{2})\b", constraint_text)
286
+
287
+ for year_str in years:
288
+ year = int(year_str)
289
+
290
+ # Create decade ranges
291
+ decade_start = (year // 10) * 10
292
+ decade_text = constraint_text.replace(year_str, f"{decade_start}s")
293
+ variations.append(
294
+ self._create_relaxed_constraint(constraint, decade_text)
295
+ )
296
+
297
+ # Create +/- ranges
298
+ for range_years in [1, 2, 5]:
299
+ range_text = constraint_text.replace(
300
+ year_str,
301
+ f"between {year - range_years} and {year + range_years}",
302
+ )
303
+ variations.append(
304
+ self._create_relaxed_constraint(constraint, range_text)
305
+ )
306
+
307
+ # Replace exact temporal terms with approximate ones
308
+ temporal_relaxations = {
309
+ "founded in": "founded around",
310
+ "established in": "established around",
311
+ "created in": "created around",
312
+ "started in": "started around",
313
+ "exactly": "approximately",
314
+ }
315
+
316
+ for exact_term, relaxed_term in temporal_relaxations.items():
317
+ if exact_term in constraint_text.lower():
318
+ relaxed_text = constraint_text.replace(exact_term, relaxed_term)
319
+ variations.append(
320
+ self._create_relaxed_constraint(constraint, relaxed_text)
321
+ )
322
+
323
+ return variations[:3]
324
+
325
+ def _relax_property_constraint(self, constraint: object) -> List[object]:
326
+ """
327
+ Create relaxed variations of property constraints.
328
+
329
+ Property constraints can be relaxed by:
330
+ - Making specific properties more general
331
+ - Allowing alternative phrasings
332
+ - Focusing on key attributes
333
+ """
334
+ variations = []
335
+ constraint_text = str(constraint).lower()
336
+
337
+ # Make specific properties more general
338
+ property_generalizations = {
339
+ "multinational": "international",
340
+ "conglomerate": "large company",
341
+ "corporation": "company",
342
+ "subsidiary": "part of",
343
+ "headquarters": "based",
344
+ "founded": "established",
345
+ "specialized": "focused",
346
+ "leading": "major",
347
+ }
348
+
349
+ for specific, general in property_generalizations.items():
350
+ if specific in constraint_text:
351
+ relaxed_text = constraint_text.replace(specific, general)
352
+ variations.append(
353
+ self._create_relaxed_constraint(constraint, relaxed_text)
354
+ )
355
+
356
+ # Remove adjectives to make constraints less specific
357
+ adjective_patterns = [
358
+ r"\b(very|extremely|highly|most|largest|biggest|smallest)\s+",
359
+ r"\b(major|minor|primary|secondary|main|key)\s+",
360
+ ]
361
+
362
+ for pattern in adjective_patterns:
363
+ import re
364
+
365
+ if re.search(pattern, constraint_text):
366
+ relaxed_text = re.sub(pattern, "", constraint_text)
367
+ variations.append(
368
+ self._create_relaxed_constraint(constraint, relaxed_text)
369
+ )
370
+
371
+ return variations[:2]
372
+
373
+ def _create_relaxed_constraint(
374
+ self, original_constraint: object, relaxed_text: str
375
+ ) -> object:
376
+ """
377
+ Create a new constraint object with relaxed text.
378
+
379
+ This is a helper method that preserves the constraint structure
380
+ while updating the constraint value/text.
381
+ """
382
+ # Try to create a copy of the constraint with updated text
383
+ if hasattr(original_constraint, "__dict__"):
384
+ # Create a copy of the constraint object
385
+ import copy
386
+
387
+ relaxed_constraint = copy.deepcopy(original_constraint)
388
+
389
+ # Update the constraint value/description
390
+ if hasattr(relaxed_constraint, "value"):
391
+ relaxed_constraint.value = relaxed_text
392
+ elif hasattr(relaxed_constraint, "description"):
393
+ relaxed_constraint.description = relaxed_text
394
+ elif hasattr(relaxed_constraint, "text"):
395
+ relaxed_constraint.text = relaxed_text
396
+
397
+ return relaxed_constraint
398
+ else:
399
+ # If we can't copy the constraint, return a simple string representation
400
+ return relaxed_text
401
+
402
+ def _get_constraint_type(self, constraint: object) -> str:
403
+ """Extract constraint type from constraint object."""
404
+ if hasattr(constraint, "type"):
405
+ if hasattr(constraint.type, "value"):
406
+ return constraint.type.value
407
+ else:
408
+ return str(constraint.type)
409
+ elif hasattr(constraint, "constraint_type"):
410
+ return constraint.constraint_type
411
+ else:
412
+ # Try to infer from constraint text
413
+ constraint_text = str(constraint).lower()
414
+
415
+ if any(
416
+ word in constraint_text
417
+ for word in ["name", "called", "known as"]
418
+ ):
419
+ return "NAME_PATTERN"
420
+ elif any(
421
+ word in constraint_text
422
+ for word in ["location", "country", "city"]
423
+ ):
424
+ return "LOCATION"
425
+ elif any(
426
+ word in constraint_text
427
+ for word in ["year", "date", "when", "time"]
428
+ ):
429
+ return "TEMPORAL"
430
+ elif any(
431
+ word in constraint_text
432
+ for word in ["number", "count", "amount"]
433
+ ):
434
+ return "STATISTIC"
435
+ elif any(
436
+ word in constraint_text
437
+ for word in ["event", "happened", "occurred"]
438
+ ):
439
+ return "EVENT"
440
+ elif any(
441
+ word in constraint_text
442
+ for word in ["than", "more", "less", "compared"]
443
+ ):
444
+ return "COMPARISON"
445
+ else:
446
+ return "PROPERTY"
447
+
448
+ def analyze_relaxation_impact(
449
+ self,
450
+ original_constraints: List[object],
451
+ relaxed_constraints: List[object],
452
+ ) -> Dict:
453
+ """
454
+ Analyze the impact of constraint relaxation.
455
+
456
+ Returns analysis of what was changed and the expected impact.
457
+ """
458
+ analysis = {
459
+ "original_count": len(original_constraints),
460
+ "relaxed_count": len(relaxed_constraints),
461
+ "constraints_removed": len(original_constraints)
462
+ - len(relaxed_constraints),
463
+ "constraint_changes": [],
464
+ "priority_impact": "low",
465
+ "recommendation": "",
466
+ }
467
+
468
+ # Check what types of constraints were removed/modified
469
+ original_types = [
470
+ self._get_constraint_type(c) for c in original_constraints
471
+ ]
472
+ relaxed_types = [
473
+ self._get_constraint_type(c) for c in relaxed_constraints
474
+ ]
475
+
476
+ removed_types = []
477
+ for orig_type in original_types:
478
+ if orig_type not in relaxed_types:
479
+ removed_types.append(orig_type)
480
+
481
+ # Assess impact based on what was removed
482
+ high_impact_types = {"NAME_PATTERN", "EXISTENCE", "LOCATION"}
483
+ medium_impact_types = {"TEMPORAL", "EVENT", "PROPERTY"}
484
+
485
+ if any(t in removed_types for t in high_impact_types):
486
+ analysis["priority_impact"] = "high"
487
+ analysis["recommendation"] = (
488
+ "High-priority constraints removed. Results may be less accurate."
489
+ )
490
+ elif any(t in removed_types for t in medium_impact_types):
491
+ analysis["priority_impact"] = "medium"
492
+ analysis["recommendation"] = (
493
+ "Medium-priority constraints removed. Check results carefully."
494
+ )
495
+ else:
496
+ analysis["priority_impact"] = "low"
497
+ analysis["recommendation"] = (
498
+ "Low-priority constraints removed. Results should remain accurate."
499
+ )
500
+
501
+ analysis["removed_constraint_types"] = removed_types
502
+
503
+ return analysis
@@ -0,0 +1,143 @@
1
+ """
2
+ Rejection engine for constraint-based candidate filtering.
3
+
4
+ This module provides logic for rejecting candidates based on constraint violations.
5
+ """
6
+
7
+ from dataclasses import dataclass
8
+ from typing import Dict, List, Optional
9
+
10
+ from loguru import logger
11
+
12
+ from ..candidates.base_candidate import Candidate
13
+ from ..constraints.base_constraint import Constraint
14
+ from .evidence_analyzer import ConstraintEvidence
15
+
16
+
17
+ @dataclass
18
+ class RejectionResult:
19
+ """Result of a rejection check."""
20
+
21
+ should_reject: bool
22
+ reason: str
23
+ constraint_value: str
24
+ positive_confidence: float
25
+ negative_confidence: float
26
+
27
+
28
+ class RejectionEngine:
29
+ """
30
+ Engine for making rejection decisions based on constraint violations.
31
+
32
+ This engine uses simple, clear rules to determine when candidates
33
+ should be rejected based on their constraint evaluation results.
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ negative_threshold: float = 0.25, # Reject if negative evidence > 25%
39
+ positive_threshold: float = 0.4, # Reject if positive evidence < 40%
40
+ ):
41
+ """
42
+ Initialize the rejection engine.
43
+
44
+ Args:
45
+ negative_threshold: Threshold for negative evidence rejection
46
+ positive_threshold: Minimum positive evidence required
47
+ """
48
+ self.negative_threshold = negative_threshold
49
+ self.positive_threshold = positive_threshold
50
+
51
+ def should_reject_candidate(
52
+ self,
53
+ candidate: Candidate,
54
+ constraint: Constraint,
55
+ evidence_list: List[ConstraintEvidence],
56
+ ) -> RejectionResult:
57
+ """
58
+ Determine if a candidate should be rejected based on constraint evidence.
59
+
60
+ Args:
61
+ candidate: The candidate being evaluated
62
+ constraint: The constraint being checked
63
+ evidence_list: List of evidence for this constraint
64
+
65
+ Returns:
66
+ RejectionResult: Whether to reject and why
67
+ """
68
+ if not evidence_list:
69
+ # No evidence - don't reject but note the lack of evidence
70
+ return RejectionResult(
71
+ should_reject=False,
72
+ reason="No evidence available",
73
+ constraint_value=constraint.value,
74
+ positive_confidence=0.0,
75
+ negative_confidence=0.0,
76
+ )
77
+
78
+ # Calculate average confidence scores
79
+ avg_positive = sum(e.positive_confidence for e in evidence_list) / len(
80
+ evidence_list
81
+ )
82
+ avg_negative = sum(e.negative_confidence for e in evidence_list) / len(
83
+ evidence_list
84
+ )
85
+
86
+ # PRIMARY REJECTION RULE: High negative evidence
87
+ if avg_negative > self.negative_threshold:
88
+ return RejectionResult(
89
+ should_reject=True,
90
+ reason=f"High negative evidence ({avg_negative:.0%})",
91
+ constraint_value=constraint.value,
92
+ positive_confidence=avg_positive,
93
+ negative_confidence=avg_negative,
94
+ )
95
+
96
+ # SECONDARY REJECTION RULE: Low positive evidence
97
+ if avg_positive < self.positive_threshold:
98
+ return RejectionResult(
99
+ should_reject=True,
100
+ reason=f"Insufficient positive evidence ({avg_positive:.0%})",
101
+ constraint_value=constraint.value,
102
+ positive_confidence=avg_positive,
103
+ negative_confidence=avg_negative,
104
+ )
105
+
106
+ # No rejection needed
107
+ return RejectionResult(
108
+ should_reject=False,
109
+ reason="Constraints satisfied",
110
+ constraint_value=constraint.value,
111
+ positive_confidence=avg_positive,
112
+ negative_confidence=avg_negative,
113
+ )
114
+
115
+ def check_all_constraints(
116
+ self,
117
+ candidate: Candidate,
118
+ constraint_results: Dict[Constraint, List[ConstraintEvidence]],
119
+ ) -> Optional[RejectionResult]:
120
+ """
121
+ Check all constraints for a candidate and return first rejection reason.
122
+
123
+ Args:
124
+ candidate: The candidate being evaluated
125
+ constraint_results: Dictionary mapping constraints to their evidence
126
+
127
+ Returns:
128
+ RejectionResult if should reject, None if should accept
129
+ """
130
+ for constraint, evidence_list in constraint_results.items():
131
+ result = self.should_reject_candidate(
132
+ candidate, constraint, evidence_list
133
+ )
134
+
135
+ if result.should_reject:
136
+ logger.info(
137
+ f"❌ REJECTION: {candidate.name} - {constraint.value} - {result.reason}"
138
+ )
139
+ return result
140
+
141
+ # No rejections found
142
+ logger.info(f"✓ ACCEPTED: {candidate.name} - All constraints satisfied")
143
+ return None