local-deep-research 0.4.4__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +7 -0
- local_deep_research/__version__.py +1 -1
- local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
- local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
- local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
- local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
- local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
- local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
- local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
- local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
- local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
- local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
- local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
- local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
- local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
- local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
- local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
- local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
- local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
- local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
- local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
- local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
- local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
- local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
- local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
- local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
- local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
- local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
- local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
- local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
- local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
- local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
- local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
- local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
- local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
- local_deep_research/advanced_search_system/findings/repository.py +54 -17
- local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
- local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
- local_deep_research/advanced_search_system/questions/__init__.py +16 -0
- local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
- local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
- local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
- local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
- local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
- local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
- local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
- local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
- local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
- local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
- local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
- local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
- local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
- local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
- local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
- local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
- local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
- local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
- local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
- local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
- local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
- local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
- local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
- local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
- local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
- local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
- local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
- local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
- local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
- local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
- local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
- local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
- local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
- local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
- local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
- local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
- local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
- local_deep_research/api/benchmark_functions.py +6 -2
- local_deep_research/api/research_functions.py +10 -4
- local_deep_research/benchmarks/__init__.py +9 -7
- local_deep_research/benchmarks/benchmark_functions.py +6 -2
- local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
- local_deep_research/benchmarks/cli.py +38 -13
- local_deep_research/benchmarks/comparison/__init__.py +4 -2
- local_deep_research/benchmarks/comparison/evaluator.py +316 -239
- local_deep_research/benchmarks/datasets/__init__.py +1 -1
- local_deep_research/benchmarks/datasets/base.py +91 -72
- local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
- local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
- local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
- local_deep_research/benchmarks/datasets/utils.py +48 -29
- local_deep_research/benchmarks/datasets.py +4 -11
- local_deep_research/benchmarks/efficiency/__init__.py +8 -4
- local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
- local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
- local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
- local_deep_research/benchmarks/evaluators/composite.py +6 -2
- local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
- local_deep_research/benchmarks/graders.py +32 -10
- local_deep_research/benchmarks/metrics/README.md +1 -1
- local_deep_research/benchmarks/metrics/calculation.py +25 -10
- local_deep_research/benchmarks/metrics/reporting.py +7 -3
- local_deep_research/benchmarks/metrics/visualization.py +42 -23
- local_deep_research/benchmarks/metrics.py +1 -1
- local_deep_research/benchmarks/optimization/__init__.py +3 -1
- local_deep_research/benchmarks/optimization/api.py +7 -1
- local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
- local_deep_research/benchmarks/runners.py +48 -15
- local_deep_research/citation_handler.py +65 -92
- local_deep_research/citation_handlers/__init__.py +15 -0
- local_deep_research/citation_handlers/base_citation_handler.py +70 -0
- local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
- local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
- local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
- local_deep_research/config/llm_config.py +271 -169
- local_deep_research/config/search_config.py +14 -5
- local_deep_research/defaults/__init__.py +0 -1
- local_deep_research/metrics/__init__.py +13 -0
- local_deep_research/metrics/database.py +58 -0
- local_deep_research/metrics/db_models.py +115 -0
- local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
- local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
- local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
- local_deep_research/metrics/migrate_research_ratings.py +31 -0
- local_deep_research/metrics/models.py +61 -0
- local_deep_research/metrics/pricing/__init__.py +12 -0
- local_deep_research/metrics/pricing/cost_calculator.py +237 -0
- local_deep_research/metrics/pricing/pricing_cache.py +143 -0
- local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
- local_deep_research/metrics/query_utils.py +51 -0
- local_deep_research/metrics/search_tracker.py +380 -0
- local_deep_research/metrics/token_counter.py +1078 -0
- local_deep_research/migrate_db.py +3 -1
- local_deep_research/report_generator.py +22 -8
- local_deep_research/search_system.py +390 -9
- local_deep_research/test_migration.py +15 -5
- local_deep_research/utilities/db_utils.py +7 -4
- local_deep_research/utilities/es_utils.py +115 -104
- local_deep_research/utilities/llm_utils.py +15 -5
- local_deep_research/utilities/log_utils.py +151 -0
- local_deep_research/utilities/search_cache.py +387 -0
- local_deep_research/utilities/search_utilities.py +14 -6
- local_deep_research/utilities/threading_utils.py +92 -0
- local_deep_research/utilities/url_utils.py +6 -0
- local_deep_research/web/api.py +347 -0
- local_deep_research/web/app.py +13 -17
- local_deep_research/web/app_factory.py +71 -66
- local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
- local_deep_research/web/database/migrations.py +20 -3
- local_deep_research/web/database/models.py +74 -25
- local_deep_research/web/database/schema_upgrade.py +49 -29
- local_deep_research/web/models/database.py +63 -83
- local_deep_research/web/routes/api_routes.py +56 -22
- local_deep_research/web/routes/benchmark_routes.py +4 -1
- local_deep_research/web/routes/globals.py +22 -0
- local_deep_research/web/routes/history_routes.py +71 -46
- local_deep_research/web/routes/metrics_routes.py +1155 -0
- local_deep_research/web/routes/research_routes.py +192 -54
- local_deep_research/web/routes/settings_routes.py +156 -55
- local_deep_research/web/services/research_service.py +412 -251
- local_deep_research/web/services/resource_service.py +36 -11
- local_deep_research/web/services/settings_manager.py +55 -17
- local_deep_research/web/services/settings_service.py +12 -4
- local_deep_research/web/services/socket_service.py +295 -188
- local_deep_research/web/static/css/custom_dropdown.css +180 -0
- local_deep_research/web/static/css/styles.css +39 -1
- local_deep_research/web/static/js/components/detail.js +633 -267
- local_deep_research/web/static/js/components/details.js +751 -0
- local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
- local_deep_research/web/static/js/components/fallback/ui.js +23 -23
- local_deep_research/web/static/js/components/history.js +76 -76
- local_deep_research/web/static/js/components/logpanel.js +61 -13
- local_deep_research/web/static/js/components/progress.js +13 -2
- local_deep_research/web/static/js/components/research.js +99 -12
- local_deep_research/web/static/js/components/results.js +239 -106
- local_deep_research/web/static/js/main.js +40 -40
- local_deep_research/web/static/js/services/audio.js +1 -1
- local_deep_research/web/static/js/services/formatting.js +11 -11
- local_deep_research/web/static/js/services/keyboard.js +157 -0
- local_deep_research/web/static/js/services/pdf.js +80 -80
- local_deep_research/web/static/sounds/README.md +1 -1
- local_deep_research/web/templates/base.html +1 -0
- local_deep_research/web/templates/components/log_panel.html +7 -1
- local_deep_research/web/templates/components/mobile_nav.html +1 -1
- local_deep_research/web/templates/components/sidebar.html +3 -0
- local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
- local_deep_research/web/templates/pages/details.html +325 -24
- local_deep_research/web/templates/pages/history.html +1 -1
- local_deep_research/web/templates/pages/metrics.html +1929 -0
- local_deep_research/web/templates/pages/progress.html +2 -2
- local_deep_research/web/templates/pages/research.html +53 -17
- local_deep_research/web/templates/pages/results.html +12 -1
- local_deep_research/web/templates/pages/star_reviews.html +803 -0
- local_deep_research/web/utils/formatters.py +9 -3
- local_deep_research/web_search_engines/default_search_engines.py +5 -3
- local_deep_research/web_search_engines/engines/full_search.py +8 -2
- local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
- local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
- local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
- local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
- local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
- local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
- local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
- local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
- local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
- local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
- local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
- local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
- local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
- local_deep_research/web_search_engines/search_engine_base.py +83 -35
- local_deep_research/web_search_engines/search_engine_factory.py +25 -8
- local_deep_research/web_search_engines/search_engines_config.py +9 -3
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/METADATA +7 -1
- local_deep_research-0.5.2.dist-info/RECORD +265 -0
- local_deep_research-0.4.4.dist-info/RECORD +0 -177
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/WHEEL +0 -0
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/entry_points.txt +0 -0
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/licenses/LICENSE +0 -0
local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py
ADDED
@@ -0,0 +1,503 @@
|
|
1
|
+
"""
|
2
|
+
Intelligent Constraint Relaxation Strategy
|
3
|
+
|
4
|
+
This module implements progressive constraint relaxation to improve BrowseComp
|
5
|
+
performance when strict constraint matching fails.
|
6
|
+
|
7
|
+
Based on BROWSECOMP_IMPROVEMENT_STRATEGY.md recommendations for handling
|
8
|
+
complex multi-constraint queries that may not have perfect matches.
|
9
|
+
"""
|
10
|
+
|
11
|
+
import logging
|
12
|
+
from typing import Dict, List
|
13
|
+
|
14
|
+
logger = logging.getLogger(__name__)
|
15
|
+
|
16
|
+
|
17
|
+
class IntelligentConstraintRelaxer:
|
18
|
+
"""
|
19
|
+
Progressive constraint relaxation based on search results and constraint reliability.
|
20
|
+
|
21
|
+
Features:
|
22
|
+
1. Maintains essential identifying constraints
|
23
|
+
2. Relaxes problematic constraint types first
|
24
|
+
3. Creates multiple search attempts with different constraint sets
|
25
|
+
4. Preserves constraint importance hierarchy
|
26
|
+
"""
|
27
|
+
|
28
|
+
def __init__(self):
|
29
|
+
# Constraint priorities (higher = more important, never relax)
|
30
|
+
self.constraint_priorities = {
|
31
|
+
"NAME_PATTERN": 10, # Never relax - essential for identification
|
32
|
+
"EXISTENCE": 9, # Rarely relax - basic entity existence
|
33
|
+
"LOCATION": 8, # Usually important for identification
|
34
|
+
"TEMPORAL": 7, # Dates often crucial but sometimes fuzzy
|
35
|
+
"PROPERTY": 6, # Basic properties, moderately important
|
36
|
+
"EVENT": 5, # Events can be important but sometimes optional
|
37
|
+
"STATISTIC": 3, # Often relax - numbers frequently imprecise
|
38
|
+
"COMPARISON": 1, # Frequently relax - relative comparisons problematic
|
39
|
+
"RELATIONSHIP": 2, # Often problematic due to complexity
|
40
|
+
}
|
41
|
+
|
42
|
+
# Minimum constraints to keep for meaningful search
|
43
|
+
self.min_constraints = 2
|
44
|
+
|
45
|
+
# Constraint relaxation strategies by type
|
46
|
+
self.relaxation_strategies = {
|
47
|
+
"STATISTIC": self._relax_statistical_constraint,
|
48
|
+
"COMPARISON": self._relax_comparison_constraint,
|
49
|
+
"TEMPORAL": self._relax_temporal_constraint,
|
50
|
+
"PROPERTY": self._relax_property_constraint,
|
51
|
+
}
|
52
|
+
|
53
|
+
def relax_constraints_progressively(
|
54
|
+
self,
|
55
|
+
constraints: List[object],
|
56
|
+
candidates_found: List[object],
|
57
|
+
target_candidates: int = 5,
|
58
|
+
) -> List[List[object]]:
|
59
|
+
"""
|
60
|
+
Generate progressive constraint relaxation sets based on search results.
|
61
|
+
|
62
|
+
Args:
|
63
|
+
constraints: Original constraint list
|
64
|
+
candidates_found: Current candidates found
|
65
|
+
target_candidates: Target number of candidates to find
|
66
|
+
|
67
|
+
Returns:
|
68
|
+
List of relaxed constraint sets to try
|
69
|
+
"""
|
70
|
+
if len(candidates_found) >= target_candidates:
|
71
|
+
logger.debug("Sufficient candidates found, no relaxation needed")
|
72
|
+
return [constraints] # No relaxation needed
|
73
|
+
|
74
|
+
logger.info(
|
75
|
+
f"Only {len(candidates_found)} candidates found, generating relaxation strategies"
|
76
|
+
)
|
77
|
+
|
78
|
+
# Sort constraints by relaxation priority (lowest first)
|
79
|
+
relaxable_constraints = sorted(
|
80
|
+
constraints,
|
81
|
+
key=lambda c: self.constraint_priorities.get(
|
82
|
+
self._get_constraint_type(c), 5
|
83
|
+
),
|
84
|
+
)
|
85
|
+
|
86
|
+
relaxed_sets = []
|
87
|
+
|
88
|
+
# Strategy 1: Remove least important constraints progressively
|
89
|
+
for i in range(1, min(len(constraints), 4)): # Max 3 relaxation levels
|
90
|
+
relaxed_set = relaxable_constraints[
|
91
|
+
:-i
|
92
|
+
] # Remove i lowest priority constraints
|
93
|
+
|
94
|
+
if len(relaxed_set) >= self.min_constraints:
|
95
|
+
relaxed_sets.append(relaxed_set)
|
96
|
+
logger.debug(
|
97
|
+
f"Relaxation level {i}: Removed {i} constraints, {len(relaxed_set)} remaining"
|
98
|
+
)
|
99
|
+
|
100
|
+
# Strategy 2: Create constraint variations for difficult constraints
|
101
|
+
variation_sets = self._create_constraint_variations(constraints)
|
102
|
+
relaxed_sets.extend(variation_sets)
|
103
|
+
|
104
|
+
# Strategy 3: Keep only high-priority constraints
|
105
|
+
high_priority_constraints = [
|
106
|
+
c
|
107
|
+
for c in constraints
|
108
|
+
if self.constraint_priorities.get(self._get_constraint_type(c), 5)
|
109
|
+
>= 7
|
110
|
+
]
|
111
|
+
|
112
|
+
if len(high_priority_constraints) >= self.min_constraints:
|
113
|
+
relaxed_sets.append(high_priority_constraints)
|
114
|
+
logger.debug(
|
115
|
+
f"High-priority only: {len(high_priority_constraints)} constraints"
|
116
|
+
)
|
117
|
+
|
118
|
+
# Remove duplicates while preserving order
|
119
|
+
unique_sets = []
|
120
|
+
seen_sets = set()
|
121
|
+
|
122
|
+
for constraint_set in relaxed_sets:
|
123
|
+
# Create a hashable representation
|
124
|
+
set_signature = tuple(sorted(str(c) for c in constraint_set))
|
125
|
+
if set_signature not in seen_sets:
|
126
|
+
seen_sets.add(set_signature)
|
127
|
+
unique_sets.append(constraint_set)
|
128
|
+
|
129
|
+
logger.info(
|
130
|
+
f"Generated {len(unique_sets)} unique relaxation strategies"
|
131
|
+
)
|
132
|
+
return unique_sets
|
133
|
+
|
134
|
+
def _create_constraint_variations(
|
135
|
+
self, constraints: List[object]
|
136
|
+
) -> List[List[object]]:
|
137
|
+
"""
|
138
|
+
Create variations of difficult constraints to improve matching.
|
139
|
+
|
140
|
+
Args:
|
141
|
+
constraints: Original constraints
|
142
|
+
|
143
|
+
Returns:
|
144
|
+
List of constraint sets with variations
|
145
|
+
"""
|
146
|
+
variation_sets = []
|
147
|
+
|
148
|
+
for i, constraint in enumerate(constraints):
|
149
|
+
constraint_type = self._get_constraint_type(constraint)
|
150
|
+
|
151
|
+
if constraint_type in self.relaxation_strategies:
|
152
|
+
# Create variations for this constraint
|
153
|
+
variations = self.relaxation_strategies[constraint_type](
|
154
|
+
constraint
|
155
|
+
)
|
156
|
+
|
157
|
+
if variations:
|
158
|
+
# Replace original constraint with each variation
|
159
|
+
for variation in variations:
|
160
|
+
new_set = constraints.copy()
|
161
|
+
new_set[i] = variation
|
162
|
+
variation_sets.append(new_set)
|
163
|
+
|
164
|
+
return variation_sets
|
165
|
+
|
166
|
+
def _relax_statistical_constraint(self, constraint: object) -> List[object]:
|
167
|
+
"""
|
168
|
+
Create relaxed variations of statistical constraints.
|
169
|
+
|
170
|
+
Statistical constraints often fail due to:
|
171
|
+
- Outdated numbers
|
172
|
+
- Rounding differences
|
173
|
+
- Different measurement units
|
174
|
+
"""
|
175
|
+
variations = []
|
176
|
+
constraint_text = str(constraint)
|
177
|
+
|
178
|
+
# Extract numbers from constraint
|
179
|
+
import re
|
180
|
+
|
181
|
+
numbers = re.findall(r"\d+(?:\.\d+)?", constraint_text)
|
182
|
+
|
183
|
+
for number_str in numbers:
|
184
|
+
try:
|
185
|
+
number = float(number_str)
|
186
|
+
|
187
|
+
# Create range variations (+/- 10%, 20%, 50%)
|
188
|
+
for tolerance in [0.1, 0.2, 0.5]:
|
189
|
+
lower = number * (1 - tolerance)
|
190
|
+
upper = number * (1 + tolerance)
|
191
|
+
|
192
|
+
# Replace exact number with range
|
193
|
+
relaxed_text = constraint_text.replace(
|
194
|
+
number_str, f"between {lower:.0f} and {upper:.0f}"
|
195
|
+
)
|
196
|
+
|
197
|
+
variations.append(
|
198
|
+
self._create_relaxed_constraint(
|
199
|
+
constraint, relaxed_text
|
200
|
+
)
|
201
|
+
)
|
202
|
+
|
203
|
+
# Create "approximately" version
|
204
|
+
approx_text = constraint_text.replace(
|
205
|
+
number_str, f"approximately {number_str}"
|
206
|
+
)
|
207
|
+
variations.append(
|
208
|
+
self._create_relaxed_constraint(constraint, approx_text)
|
209
|
+
)
|
210
|
+
|
211
|
+
except ValueError:
|
212
|
+
continue
|
213
|
+
|
214
|
+
return variations[:3] # Limit to avoid too many variations
|
215
|
+
|
216
|
+
def _relax_comparison_constraint(self, constraint: object) -> List[object]:
|
217
|
+
"""
|
218
|
+
Create relaxed variations of comparison constraints.
|
219
|
+
|
220
|
+
Comparison constraints often fail due to:
|
221
|
+
- Relative terms are context-dependent
|
222
|
+
- "Times more" calculations are complex
|
223
|
+
- Baseline comparisons may be unclear
|
224
|
+
"""
|
225
|
+
variations = []
|
226
|
+
constraint_text = str(constraint).lower()
|
227
|
+
|
228
|
+
# Replace strict comparisons with looser ones
|
229
|
+
relaxation_mappings = {
|
230
|
+
"times more": "significantly more",
|
231
|
+
"times larger": "much larger",
|
232
|
+
"times bigger": "much bigger",
|
233
|
+
"exactly": "approximately",
|
234
|
+
"must be": "should be",
|
235
|
+
"is the": "is among the",
|
236
|
+
"largest": "one of the largest",
|
237
|
+
"smallest": "one of the smallest",
|
238
|
+
"highest": "among the highest",
|
239
|
+
"lowest": "among the lowest",
|
240
|
+
}
|
241
|
+
|
242
|
+
for strict_term, relaxed_term in relaxation_mappings.items():
|
243
|
+
if strict_term in constraint_text:
|
244
|
+
relaxed_text = constraint_text.replace(
|
245
|
+
strict_term, relaxed_term
|
246
|
+
)
|
247
|
+
variations.append(
|
248
|
+
self._create_relaxed_constraint(constraint, relaxed_text)
|
249
|
+
)
|
250
|
+
|
251
|
+
# Remove comparison altogether - focus on the main entity/property
|
252
|
+
comparison_indicators = [
|
253
|
+
"more than",
|
254
|
+
"less than",
|
255
|
+
"compared to",
|
256
|
+
"relative to",
|
257
|
+
]
|
258
|
+
for indicator in comparison_indicators:
|
259
|
+
if indicator in constraint_text:
|
260
|
+
# Extract the part before the comparison
|
261
|
+
parts = constraint_text.split(indicator)
|
262
|
+
if len(parts) > 1:
|
263
|
+
main_part = parts[0].strip()
|
264
|
+
variations.append(
|
265
|
+
self._create_relaxed_constraint(constraint, main_part)
|
266
|
+
)
|
267
|
+
|
268
|
+
return variations[:3]
|
269
|
+
|
270
|
+
def _relax_temporal_constraint(self, constraint: object) -> List[object]:
|
271
|
+
"""
|
272
|
+
Create relaxed variations of temporal constraints.
|
273
|
+
|
274
|
+
Temporal constraints often fail due to:
|
275
|
+
- Exact dates vs approximate dates
|
276
|
+
- Different calendar systems
|
277
|
+
- Founding vs incorporation dates
|
278
|
+
"""
|
279
|
+
variations = []
|
280
|
+
constraint_text = str(constraint)
|
281
|
+
|
282
|
+
# Extract years
|
283
|
+
import re
|
284
|
+
|
285
|
+
years = re.findall(r"\b(19\d{2}|20\d{2})\b", constraint_text)
|
286
|
+
|
287
|
+
for year_str in years:
|
288
|
+
year = int(year_str)
|
289
|
+
|
290
|
+
# Create decade ranges
|
291
|
+
decade_start = (year // 10) * 10
|
292
|
+
decade_text = constraint_text.replace(year_str, f"{decade_start}s")
|
293
|
+
variations.append(
|
294
|
+
self._create_relaxed_constraint(constraint, decade_text)
|
295
|
+
)
|
296
|
+
|
297
|
+
# Create +/- ranges
|
298
|
+
for range_years in [1, 2, 5]:
|
299
|
+
range_text = constraint_text.replace(
|
300
|
+
year_str,
|
301
|
+
f"between {year - range_years} and {year + range_years}",
|
302
|
+
)
|
303
|
+
variations.append(
|
304
|
+
self._create_relaxed_constraint(constraint, range_text)
|
305
|
+
)
|
306
|
+
|
307
|
+
# Replace exact temporal terms with approximate ones
|
308
|
+
temporal_relaxations = {
|
309
|
+
"founded in": "founded around",
|
310
|
+
"established in": "established around",
|
311
|
+
"created in": "created around",
|
312
|
+
"started in": "started around",
|
313
|
+
"exactly": "approximately",
|
314
|
+
}
|
315
|
+
|
316
|
+
for exact_term, relaxed_term in temporal_relaxations.items():
|
317
|
+
if exact_term in constraint_text.lower():
|
318
|
+
relaxed_text = constraint_text.replace(exact_term, relaxed_term)
|
319
|
+
variations.append(
|
320
|
+
self._create_relaxed_constraint(constraint, relaxed_text)
|
321
|
+
)
|
322
|
+
|
323
|
+
return variations[:3]
|
324
|
+
|
325
|
+
def _relax_property_constraint(self, constraint: object) -> List[object]:
|
326
|
+
"""
|
327
|
+
Create relaxed variations of property constraints.
|
328
|
+
|
329
|
+
Property constraints can be relaxed by:
|
330
|
+
- Making specific properties more general
|
331
|
+
- Allowing alternative phrasings
|
332
|
+
- Focusing on key attributes
|
333
|
+
"""
|
334
|
+
variations = []
|
335
|
+
constraint_text = str(constraint).lower()
|
336
|
+
|
337
|
+
# Make specific properties more general
|
338
|
+
property_generalizations = {
|
339
|
+
"multinational": "international",
|
340
|
+
"conglomerate": "large company",
|
341
|
+
"corporation": "company",
|
342
|
+
"subsidiary": "part of",
|
343
|
+
"headquarters": "based",
|
344
|
+
"founded": "established",
|
345
|
+
"specialized": "focused",
|
346
|
+
"leading": "major",
|
347
|
+
}
|
348
|
+
|
349
|
+
for specific, general in property_generalizations.items():
|
350
|
+
if specific in constraint_text:
|
351
|
+
relaxed_text = constraint_text.replace(specific, general)
|
352
|
+
variations.append(
|
353
|
+
self._create_relaxed_constraint(constraint, relaxed_text)
|
354
|
+
)
|
355
|
+
|
356
|
+
# Remove adjectives to make constraints less specific
|
357
|
+
adjective_patterns = [
|
358
|
+
r"\b(very|extremely|highly|most|largest|biggest|smallest)\s+",
|
359
|
+
r"\b(major|minor|primary|secondary|main|key)\s+",
|
360
|
+
]
|
361
|
+
|
362
|
+
for pattern in adjective_patterns:
|
363
|
+
import re
|
364
|
+
|
365
|
+
if re.search(pattern, constraint_text):
|
366
|
+
relaxed_text = re.sub(pattern, "", constraint_text)
|
367
|
+
variations.append(
|
368
|
+
self._create_relaxed_constraint(constraint, relaxed_text)
|
369
|
+
)
|
370
|
+
|
371
|
+
return variations[:2]
|
372
|
+
|
373
|
+
def _create_relaxed_constraint(
|
374
|
+
self, original_constraint: object, relaxed_text: str
|
375
|
+
) -> object:
|
376
|
+
"""
|
377
|
+
Create a new constraint object with relaxed text.
|
378
|
+
|
379
|
+
This is a helper method that preserves the constraint structure
|
380
|
+
while updating the constraint value/text.
|
381
|
+
"""
|
382
|
+
# Try to create a copy of the constraint with updated text
|
383
|
+
if hasattr(original_constraint, "__dict__"):
|
384
|
+
# Create a copy of the constraint object
|
385
|
+
import copy
|
386
|
+
|
387
|
+
relaxed_constraint = copy.deepcopy(original_constraint)
|
388
|
+
|
389
|
+
# Update the constraint value/description
|
390
|
+
if hasattr(relaxed_constraint, "value"):
|
391
|
+
relaxed_constraint.value = relaxed_text
|
392
|
+
elif hasattr(relaxed_constraint, "description"):
|
393
|
+
relaxed_constraint.description = relaxed_text
|
394
|
+
elif hasattr(relaxed_constraint, "text"):
|
395
|
+
relaxed_constraint.text = relaxed_text
|
396
|
+
|
397
|
+
return relaxed_constraint
|
398
|
+
else:
|
399
|
+
# If we can't copy the constraint, return a simple string representation
|
400
|
+
return relaxed_text
|
401
|
+
|
402
|
+
def _get_constraint_type(self, constraint: object) -> str:
|
403
|
+
"""Extract constraint type from constraint object."""
|
404
|
+
if hasattr(constraint, "type"):
|
405
|
+
if hasattr(constraint.type, "value"):
|
406
|
+
return constraint.type.value
|
407
|
+
else:
|
408
|
+
return str(constraint.type)
|
409
|
+
elif hasattr(constraint, "constraint_type"):
|
410
|
+
return constraint.constraint_type
|
411
|
+
else:
|
412
|
+
# Try to infer from constraint text
|
413
|
+
constraint_text = str(constraint).lower()
|
414
|
+
|
415
|
+
if any(
|
416
|
+
word in constraint_text
|
417
|
+
for word in ["name", "called", "known as"]
|
418
|
+
):
|
419
|
+
return "NAME_PATTERN"
|
420
|
+
elif any(
|
421
|
+
word in constraint_text
|
422
|
+
for word in ["location", "country", "city"]
|
423
|
+
):
|
424
|
+
return "LOCATION"
|
425
|
+
elif any(
|
426
|
+
word in constraint_text
|
427
|
+
for word in ["year", "date", "when", "time"]
|
428
|
+
):
|
429
|
+
return "TEMPORAL"
|
430
|
+
elif any(
|
431
|
+
word in constraint_text
|
432
|
+
for word in ["number", "count", "amount"]
|
433
|
+
):
|
434
|
+
return "STATISTIC"
|
435
|
+
elif any(
|
436
|
+
word in constraint_text
|
437
|
+
for word in ["event", "happened", "occurred"]
|
438
|
+
):
|
439
|
+
return "EVENT"
|
440
|
+
elif any(
|
441
|
+
word in constraint_text
|
442
|
+
for word in ["than", "more", "less", "compared"]
|
443
|
+
):
|
444
|
+
return "COMPARISON"
|
445
|
+
else:
|
446
|
+
return "PROPERTY"
|
447
|
+
|
448
|
+
def analyze_relaxation_impact(
|
449
|
+
self,
|
450
|
+
original_constraints: List[object],
|
451
|
+
relaxed_constraints: List[object],
|
452
|
+
) -> Dict:
|
453
|
+
"""
|
454
|
+
Analyze the impact of constraint relaxation.
|
455
|
+
|
456
|
+
Returns analysis of what was changed and the expected impact.
|
457
|
+
"""
|
458
|
+
analysis = {
|
459
|
+
"original_count": len(original_constraints),
|
460
|
+
"relaxed_count": len(relaxed_constraints),
|
461
|
+
"constraints_removed": len(original_constraints)
|
462
|
+
- len(relaxed_constraints),
|
463
|
+
"constraint_changes": [],
|
464
|
+
"priority_impact": "low",
|
465
|
+
"recommendation": "",
|
466
|
+
}
|
467
|
+
|
468
|
+
# Check what types of constraints were removed/modified
|
469
|
+
original_types = [
|
470
|
+
self._get_constraint_type(c) for c in original_constraints
|
471
|
+
]
|
472
|
+
relaxed_types = [
|
473
|
+
self._get_constraint_type(c) for c in relaxed_constraints
|
474
|
+
]
|
475
|
+
|
476
|
+
removed_types = []
|
477
|
+
for orig_type in original_types:
|
478
|
+
if orig_type not in relaxed_types:
|
479
|
+
removed_types.append(orig_type)
|
480
|
+
|
481
|
+
# Assess impact based on what was removed
|
482
|
+
high_impact_types = {"NAME_PATTERN", "EXISTENCE", "LOCATION"}
|
483
|
+
medium_impact_types = {"TEMPORAL", "EVENT", "PROPERTY"}
|
484
|
+
|
485
|
+
if any(t in removed_types for t in high_impact_types):
|
486
|
+
analysis["priority_impact"] = "high"
|
487
|
+
analysis["recommendation"] = (
|
488
|
+
"High-priority constraints removed. Results may be less accurate."
|
489
|
+
)
|
490
|
+
elif any(t in removed_types for t in medium_impact_types):
|
491
|
+
analysis["priority_impact"] = "medium"
|
492
|
+
analysis["recommendation"] = (
|
493
|
+
"Medium-priority constraints removed. Check results carefully."
|
494
|
+
)
|
495
|
+
else:
|
496
|
+
analysis["priority_impact"] = "low"
|
497
|
+
analysis["recommendation"] = (
|
498
|
+
"Low-priority constraints removed. Results should remain accurate."
|
499
|
+
)
|
500
|
+
|
501
|
+
analysis["removed_constraint_types"] = removed_types
|
502
|
+
|
503
|
+
return analysis
|
@@ -0,0 +1,143 @@
|
|
1
|
+
"""
|
2
|
+
Rejection engine for constraint-based candidate filtering.
|
3
|
+
|
4
|
+
This module provides logic for rejecting candidates based on constraint violations.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from dataclasses import dataclass
|
8
|
+
from typing import Dict, List, Optional
|
9
|
+
|
10
|
+
from loguru import logger
|
11
|
+
|
12
|
+
from ..candidates.base_candidate import Candidate
|
13
|
+
from ..constraints.base_constraint import Constraint
|
14
|
+
from .evidence_analyzer import ConstraintEvidence
|
15
|
+
|
16
|
+
|
17
|
+
@dataclass
|
18
|
+
class RejectionResult:
|
19
|
+
"""Result of a rejection check."""
|
20
|
+
|
21
|
+
should_reject: bool
|
22
|
+
reason: str
|
23
|
+
constraint_value: str
|
24
|
+
positive_confidence: float
|
25
|
+
negative_confidence: float
|
26
|
+
|
27
|
+
|
28
|
+
class RejectionEngine:
|
29
|
+
"""
|
30
|
+
Engine for making rejection decisions based on constraint violations.
|
31
|
+
|
32
|
+
This engine uses simple, clear rules to determine when candidates
|
33
|
+
should be rejected based on their constraint evaluation results.
|
34
|
+
"""
|
35
|
+
|
36
|
+
def __init__(
|
37
|
+
self,
|
38
|
+
negative_threshold: float = 0.25, # Reject if negative evidence > 25%
|
39
|
+
positive_threshold: float = 0.4, # Reject if positive evidence < 40%
|
40
|
+
):
|
41
|
+
"""
|
42
|
+
Initialize the rejection engine.
|
43
|
+
|
44
|
+
Args:
|
45
|
+
negative_threshold: Threshold for negative evidence rejection
|
46
|
+
positive_threshold: Minimum positive evidence required
|
47
|
+
"""
|
48
|
+
self.negative_threshold = negative_threshold
|
49
|
+
self.positive_threshold = positive_threshold
|
50
|
+
|
51
|
+
def should_reject_candidate(
|
52
|
+
self,
|
53
|
+
candidate: Candidate,
|
54
|
+
constraint: Constraint,
|
55
|
+
evidence_list: List[ConstraintEvidence],
|
56
|
+
) -> RejectionResult:
|
57
|
+
"""
|
58
|
+
Determine if a candidate should be rejected based on constraint evidence.
|
59
|
+
|
60
|
+
Args:
|
61
|
+
candidate: The candidate being evaluated
|
62
|
+
constraint: The constraint being checked
|
63
|
+
evidence_list: List of evidence for this constraint
|
64
|
+
|
65
|
+
Returns:
|
66
|
+
RejectionResult: Whether to reject and why
|
67
|
+
"""
|
68
|
+
if not evidence_list:
|
69
|
+
# No evidence - don't reject but note the lack of evidence
|
70
|
+
return RejectionResult(
|
71
|
+
should_reject=False,
|
72
|
+
reason="No evidence available",
|
73
|
+
constraint_value=constraint.value,
|
74
|
+
positive_confidence=0.0,
|
75
|
+
negative_confidence=0.0,
|
76
|
+
)
|
77
|
+
|
78
|
+
# Calculate average confidence scores
|
79
|
+
avg_positive = sum(e.positive_confidence for e in evidence_list) / len(
|
80
|
+
evidence_list
|
81
|
+
)
|
82
|
+
avg_negative = sum(e.negative_confidence for e in evidence_list) / len(
|
83
|
+
evidence_list
|
84
|
+
)
|
85
|
+
|
86
|
+
# PRIMARY REJECTION RULE: High negative evidence
|
87
|
+
if avg_negative > self.negative_threshold:
|
88
|
+
return RejectionResult(
|
89
|
+
should_reject=True,
|
90
|
+
reason=f"High negative evidence ({avg_negative:.0%})",
|
91
|
+
constraint_value=constraint.value,
|
92
|
+
positive_confidence=avg_positive,
|
93
|
+
negative_confidence=avg_negative,
|
94
|
+
)
|
95
|
+
|
96
|
+
# SECONDARY REJECTION RULE: Low positive evidence
|
97
|
+
if avg_positive < self.positive_threshold:
|
98
|
+
return RejectionResult(
|
99
|
+
should_reject=True,
|
100
|
+
reason=f"Insufficient positive evidence ({avg_positive:.0%})",
|
101
|
+
constraint_value=constraint.value,
|
102
|
+
positive_confidence=avg_positive,
|
103
|
+
negative_confidence=avg_negative,
|
104
|
+
)
|
105
|
+
|
106
|
+
# No rejection needed
|
107
|
+
return RejectionResult(
|
108
|
+
should_reject=False,
|
109
|
+
reason="Constraints satisfied",
|
110
|
+
constraint_value=constraint.value,
|
111
|
+
positive_confidence=avg_positive,
|
112
|
+
negative_confidence=avg_negative,
|
113
|
+
)
|
114
|
+
|
115
|
+
def check_all_constraints(
|
116
|
+
self,
|
117
|
+
candidate: Candidate,
|
118
|
+
constraint_results: Dict[Constraint, List[ConstraintEvidence]],
|
119
|
+
) -> Optional[RejectionResult]:
|
120
|
+
"""
|
121
|
+
Check all constraints for a candidate and return first rejection reason.
|
122
|
+
|
123
|
+
Args:
|
124
|
+
candidate: The candidate being evaluated
|
125
|
+
constraint_results: Dictionary mapping constraints to their evidence
|
126
|
+
|
127
|
+
Returns:
|
128
|
+
RejectionResult if should reject, None if should accept
|
129
|
+
"""
|
130
|
+
for constraint, evidence_list in constraint_results.items():
|
131
|
+
result = self.should_reject_candidate(
|
132
|
+
candidate, constraint, evidence_list
|
133
|
+
)
|
134
|
+
|
135
|
+
if result.should_reject:
|
136
|
+
logger.info(
|
137
|
+
f"❌ REJECTION: {candidate.name} - {constraint.value} - {result.reason}"
|
138
|
+
)
|
139
|
+
return result
|
140
|
+
|
141
|
+
# No rejections found
|
142
|
+
logger.info(f"✓ ACCEPTED: {candidate.name} - All constraints satisfied")
|
143
|
+
return None
|