local-deep-research 0.4.4__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +7 -0
- local_deep_research/__version__.py +1 -1
- local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
- local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
- local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
- local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
- local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
- local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
- local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
- local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
- local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
- local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
- local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
- local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
- local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
- local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
- local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
- local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
- local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
- local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
- local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
- local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
- local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
- local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
- local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
- local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
- local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
- local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
- local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
- local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
- local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
- local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
- local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
- local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
- local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
- local_deep_research/advanced_search_system/findings/repository.py +54 -17
- local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
- local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
- local_deep_research/advanced_search_system/questions/__init__.py +16 -0
- local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
- local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
- local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
- local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
- local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
- local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
- local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
- local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
- local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
- local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
- local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
- local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
- local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
- local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
- local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
- local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
- local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
- local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
- local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
- local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
- local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
- local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
- local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
- local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
- local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
- local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
- local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
- local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
- local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
- local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
- local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
- local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
- local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
- local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
- local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
- local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
- local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
- local_deep_research/api/benchmark_functions.py +6 -2
- local_deep_research/api/research_functions.py +10 -4
- local_deep_research/benchmarks/__init__.py +9 -7
- local_deep_research/benchmarks/benchmark_functions.py +6 -2
- local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
- local_deep_research/benchmarks/cli.py +38 -13
- local_deep_research/benchmarks/comparison/__init__.py +4 -2
- local_deep_research/benchmarks/comparison/evaluator.py +316 -239
- local_deep_research/benchmarks/datasets/__init__.py +1 -1
- local_deep_research/benchmarks/datasets/base.py +91 -72
- local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
- local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
- local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
- local_deep_research/benchmarks/datasets/utils.py +48 -29
- local_deep_research/benchmarks/datasets.py +4 -11
- local_deep_research/benchmarks/efficiency/__init__.py +8 -4
- local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
- local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
- local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
- local_deep_research/benchmarks/evaluators/composite.py +6 -2
- local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
- local_deep_research/benchmarks/graders.py +32 -10
- local_deep_research/benchmarks/metrics/README.md +1 -1
- local_deep_research/benchmarks/metrics/calculation.py +25 -10
- local_deep_research/benchmarks/metrics/reporting.py +7 -3
- local_deep_research/benchmarks/metrics/visualization.py +42 -23
- local_deep_research/benchmarks/metrics.py +1 -1
- local_deep_research/benchmarks/optimization/__init__.py +3 -1
- local_deep_research/benchmarks/optimization/api.py +7 -1
- local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
- local_deep_research/benchmarks/runners.py +48 -15
- local_deep_research/citation_handler.py +65 -92
- local_deep_research/citation_handlers/__init__.py +15 -0
- local_deep_research/citation_handlers/base_citation_handler.py +70 -0
- local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
- local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
- local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
- local_deep_research/config/llm_config.py +271 -169
- local_deep_research/config/search_config.py +14 -5
- local_deep_research/defaults/__init__.py +0 -1
- local_deep_research/metrics/__init__.py +13 -0
- local_deep_research/metrics/database.py +58 -0
- local_deep_research/metrics/db_models.py +115 -0
- local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
- local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
- local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
- local_deep_research/metrics/migrate_research_ratings.py +31 -0
- local_deep_research/metrics/models.py +61 -0
- local_deep_research/metrics/pricing/__init__.py +12 -0
- local_deep_research/metrics/pricing/cost_calculator.py +237 -0
- local_deep_research/metrics/pricing/pricing_cache.py +143 -0
- local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
- local_deep_research/metrics/query_utils.py +51 -0
- local_deep_research/metrics/search_tracker.py +380 -0
- local_deep_research/metrics/token_counter.py +1078 -0
- local_deep_research/migrate_db.py +3 -1
- local_deep_research/report_generator.py +22 -8
- local_deep_research/search_system.py +390 -9
- local_deep_research/test_migration.py +15 -5
- local_deep_research/utilities/db_utils.py +7 -4
- local_deep_research/utilities/es_utils.py +115 -104
- local_deep_research/utilities/llm_utils.py +15 -5
- local_deep_research/utilities/log_utils.py +151 -0
- local_deep_research/utilities/search_cache.py +387 -0
- local_deep_research/utilities/search_utilities.py +14 -6
- local_deep_research/utilities/threading_utils.py +92 -0
- local_deep_research/utilities/url_utils.py +6 -0
- local_deep_research/web/api.py +347 -0
- local_deep_research/web/app.py +13 -17
- local_deep_research/web/app_factory.py +71 -66
- local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
- local_deep_research/web/database/migrations.py +20 -3
- local_deep_research/web/database/models.py +74 -25
- local_deep_research/web/database/schema_upgrade.py +49 -29
- local_deep_research/web/models/database.py +63 -83
- local_deep_research/web/routes/api_routes.py +56 -22
- local_deep_research/web/routes/benchmark_routes.py +4 -1
- local_deep_research/web/routes/globals.py +22 -0
- local_deep_research/web/routes/history_routes.py +71 -46
- local_deep_research/web/routes/metrics_routes.py +1155 -0
- local_deep_research/web/routes/research_routes.py +192 -54
- local_deep_research/web/routes/settings_routes.py +156 -55
- local_deep_research/web/services/research_service.py +412 -251
- local_deep_research/web/services/resource_service.py +36 -11
- local_deep_research/web/services/settings_manager.py +55 -17
- local_deep_research/web/services/settings_service.py +12 -4
- local_deep_research/web/services/socket_service.py +295 -188
- local_deep_research/web/static/css/custom_dropdown.css +180 -0
- local_deep_research/web/static/css/styles.css +39 -1
- local_deep_research/web/static/js/components/detail.js +633 -267
- local_deep_research/web/static/js/components/details.js +751 -0
- local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
- local_deep_research/web/static/js/components/fallback/ui.js +23 -23
- local_deep_research/web/static/js/components/history.js +76 -76
- local_deep_research/web/static/js/components/logpanel.js +61 -13
- local_deep_research/web/static/js/components/progress.js +13 -2
- local_deep_research/web/static/js/components/research.js +99 -12
- local_deep_research/web/static/js/components/results.js +239 -106
- local_deep_research/web/static/js/main.js +40 -40
- local_deep_research/web/static/js/services/audio.js +1 -1
- local_deep_research/web/static/js/services/formatting.js +11 -11
- local_deep_research/web/static/js/services/keyboard.js +157 -0
- local_deep_research/web/static/js/services/pdf.js +80 -80
- local_deep_research/web/static/sounds/README.md +1 -1
- local_deep_research/web/templates/base.html +1 -0
- local_deep_research/web/templates/components/log_panel.html +7 -1
- local_deep_research/web/templates/components/mobile_nav.html +1 -1
- local_deep_research/web/templates/components/sidebar.html +3 -0
- local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
- local_deep_research/web/templates/pages/details.html +325 -24
- local_deep_research/web/templates/pages/history.html +1 -1
- local_deep_research/web/templates/pages/metrics.html +1929 -0
- local_deep_research/web/templates/pages/progress.html +2 -2
- local_deep_research/web/templates/pages/research.html +53 -17
- local_deep_research/web/templates/pages/results.html +12 -1
- local_deep_research/web/templates/pages/star_reviews.html +803 -0
- local_deep_research/web/utils/formatters.py +9 -3
- local_deep_research/web_search_engines/default_search_engines.py +5 -3
- local_deep_research/web_search_engines/engines/full_search.py +8 -2
- local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
- local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
- local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
- local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
- local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
- local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
- local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
- local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
- local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
- local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
- local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
- local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
- local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
- local_deep_research/web_search_engines/search_engine_base.py +83 -35
- local_deep_research/web_search_engines/search_engine_factory.py +25 -8
- local_deep_research/web_search_engines/search_engines_config.py +9 -3
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/METADATA +7 -1
- local_deep_research-0.5.2.dist-info/RECORD +265 -0
- local_deep_research-0.4.4.dist-info/RECORD +0 -177
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/WHEEL +0 -0
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/entry_points.txt +0 -0
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,259 @@
|
|
1
|
+
"""
|
2
|
+
Strict constraint checker - example of creating a custom variant.
|
3
|
+
|
4
|
+
This implementation is very strict about constraint satisfaction,
|
5
|
+
requiring high confidence for all constraints.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import Dict, List, Tuple
|
9
|
+
|
10
|
+
from loguru import logger
|
11
|
+
|
12
|
+
from ..candidates.base_candidate import Candidate
|
13
|
+
from ..constraints.base_constraint import Constraint, ConstraintType
|
14
|
+
from .base_constraint_checker import (
|
15
|
+
BaseConstraintChecker,
|
16
|
+
ConstraintCheckResult,
|
17
|
+
)
|
18
|
+
|
19
|
+
|
20
|
+
class StrictChecker(BaseConstraintChecker):
|
21
|
+
"""
|
22
|
+
Strict constraint checker that requires high confidence for all constraints.
|
23
|
+
|
24
|
+
This is an example of how to create custom constraint checking variants
|
25
|
+
by inheriting from BaseConstraintChecker.
|
26
|
+
"""
|
27
|
+
|
28
|
+
def __init__(
|
29
|
+
self,
|
30
|
+
*args,
|
31
|
+
strict_threshold: float = 0.9, # Very high threshold
|
32
|
+
name_pattern_required: bool = True, # NAME_PATTERN constraints are mandatory
|
33
|
+
**kwargs,
|
34
|
+
):
|
35
|
+
"""
|
36
|
+
Initialize strict checker.
|
37
|
+
|
38
|
+
Args:
|
39
|
+
strict_threshold: Very high threshold for all constraints
|
40
|
+
name_pattern_required: Whether NAME_PATTERN constraints are mandatory
|
41
|
+
"""
|
42
|
+
super().__init__(*args, **kwargs)
|
43
|
+
self.strict_threshold = strict_threshold
|
44
|
+
self.name_pattern_required = name_pattern_required
|
45
|
+
|
46
|
+
def check_candidate(
|
47
|
+
self, candidate: Candidate, constraints: List[Constraint]
|
48
|
+
) -> ConstraintCheckResult:
|
49
|
+
"""Check candidate with strict requirements."""
|
50
|
+
logger.info(f"Checking candidate: {candidate.name} (strict mode)")
|
51
|
+
|
52
|
+
constraint_scores = {}
|
53
|
+
detailed_results = []
|
54
|
+
rejection_reason = None
|
55
|
+
should_reject = False
|
56
|
+
|
57
|
+
for constraint in constraints:
|
58
|
+
# Gather evidence
|
59
|
+
evidence_list = self._gather_evidence_for_constraint(
|
60
|
+
candidate, constraint
|
61
|
+
)
|
62
|
+
|
63
|
+
# Calculate score
|
64
|
+
score = self._evaluate_constraint_strictly(
|
65
|
+
candidate, constraint, evidence_list
|
66
|
+
)
|
67
|
+
|
68
|
+
# Check for rejection
|
69
|
+
reject, reason = self.should_reject_candidate(
|
70
|
+
candidate, constraint, evidence_list
|
71
|
+
)
|
72
|
+
|
73
|
+
if reject and not should_reject:
|
74
|
+
should_reject = True
|
75
|
+
rejection_reason = reason
|
76
|
+
|
77
|
+
# Store results
|
78
|
+
constraint_scores[constraint.value] = {
|
79
|
+
"total": score,
|
80
|
+
"strict_pass": score >= self.strict_threshold,
|
81
|
+
"weight": constraint.weight,
|
82
|
+
}
|
83
|
+
|
84
|
+
detailed_results.append(
|
85
|
+
{
|
86
|
+
"constraint": constraint.value,
|
87
|
+
"score": score,
|
88
|
+
"strict_pass": score >= self.strict_threshold,
|
89
|
+
"weight": constraint.weight,
|
90
|
+
"type": constraint.type.value,
|
91
|
+
}
|
92
|
+
)
|
93
|
+
|
94
|
+
self._log_constraint_result(candidate, constraint, score, {})
|
95
|
+
|
96
|
+
# Calculate total score
|
97
|
+
if should_reject:
|
98
|
+
total_score = 0.0
|
99
|
+
else:
|
100
|
+
# All constraints must pass strict threshold
|
101
|
+
all_pass = all(r["strict_pass"] for r in detailed_results)
|
102
|
+
total_score = 1.0 if all_pass else 0.0
|
103
|
+
|
104
|
+
logger.info(
|
105
|
+
f"Strict evaluation for {candidate.name}: {'PASS' if total_score > 0 else 'FAIL'}"
|
106
|
+
)
|
107
|
+
|
108
|
+
return ConstraintCheckResult(
|
109
|
+
candidate=candidate,
|
110
|
+
total_score=total_score,
|
111
|
+
constraint_scores=constraint_scores,
|
112
|
+
should_reject=should_reject,
|
113
|
+
rejection_reason=rejection_reason,
|
114
|
+
detailed_results=detailed_results,
|
115
|
+
)
|
116
|
+
|
117
|
+
def should_reject_candidate(
|
118
|
+
self,
|
119
|
+
candidate: Candidate,
|
120
|
+
constraint: Constraint,
|
121
|
+
evidence_data: List[Dict],
|
122
|
+
) -> Tuple[bool, str]:
|
123
|
+
"""Strict rejection rules."""
|
124
|
+
if not evidence_data:
|
125
|
+
return True, f"No evidence for constraint '{constraint.value}'"
|
126
|
+
|
127
|
+
score = self._evaluate_constraint_strictly(
|
128
|
+
candidate, constraint, evidence_data
|
129
|
+
)
|
130
|
+
|
131
|
+
# Special handling for NAME_PATTERN constraints
|
132
|
+
if (
|
133
|
+
constraint.type == ConstraintType.NAME_PATTERN
|
134
|
+
and self.name_pattern_required
|
135
|
+
):
|
136
|
+
if score < 0.95: # Even stricter for name patterns
|
137
|
+
return (
|
138
|
+
True,
|
139
|
+
f"NAME_PATTERN constraint '{constraint.value}' failed strict evaluation",
|
140
|
+
)
|
141
|
+
|
142
|
+
# General strict threshold
|
143
|
+
if score < self.strict_threshold:
|
144
|
+
return (
|
145
|
+
True,
|
146
|
+
f"Constraint '{constraint.value}' below strict threshold ({score:.0%})",
|
147
|
+
)
|
148
|
+
|
149
|
+
return False, ""
|
150
|
+
|
151
|
+
def _evaluate_constraint_strictly(
|
152
|
+
self,
|
153
|
+
candidate: Candidate,
|
154
|
+
constraint: Constraint,
|
155
|
+
evidence_list: List[Dict],
|
156
|
+
) -> float:
|
157
|
+
"""Evaluate constraint with strict criteria."""
|
158
|
+
if not evidence_list:
|
159
|
+
return 0.0
|
160
|
+
|
161
|
+
# For NAME_PATTERN constraints, use direct name checking
|
162
|
+
if constraint.type == ConstraintType.NAME_PATTERN:
|
163
|
+
return self._check_name_pattern_strictly(
|
164
|
+
candidate.name, constraint.value
|
165
|
+
)
|
166
|
+
|
167
|
+
# For other constraints, use LLM with strict prompt
|
168
|
+
combined_evidence = "\n".join(
|
169
|
+
[e.get("text", "")[:300] for e in evidence_list[:2]]
|
170
|
+
)
|
171
|
+
|
172
|
+
prompt = f"""
|
173
|
+
STRICT EVALUATION: Does "{candidate.name}" definitely and clearly satisfy: "{constraint.value}"?
|
174
|
+
|
175
|
+
Evidence:
|
176
|
+
{combined_evidence}
|
177
|
+
|
178
|
+
Be very strict. Only return a high score if there is clear, unambiguous evidence.
|
179
|
+
|
180
|
+
Score (0.0-1.0):
|
181
|
+
"""
|
182
|
+
|
183
|
+
try:
|
184
|
+
response = self.model.invoke(prompt).content.strip()
|
185
|
+
import re
|
186
|
+
|
187
|
+
match = re.search(r"(\d*\.?\d+)", response)
|
188
|
+
if match:
|
189
|
+
return max(0.0, min(float(match.group(1)), 1.0))
|
190
|
+
except Exception as e:
|
191
|
+
logger.error(f"Error in strict evaluation: {e}")
|
192
|
+
|
193
|
+
return 0.0 # Default to fail on error
|
194
|
+
|
195
|
+
def _check_name_pattern_strictly(
|
196
|
+
self, candidate_name: str, pattern_description: str
|
197
|
+
) -> float:
|
198
|
+
"""Strict name pattern checking."""
|
199
|
+
# Example: Check for body parts in name
|
200
|
+
if "body part" in pattern_description.lower():
|
201
|
+
body_parts = [
|
202
|
+
"arm",
|
203
|
+
"leg",
|
204
|
+
"foot",
|
205
|
+
"feet",
|
206
|
+
"hand",
|
207
|
+
"eye",
|
208
|
+
"ear",
|
209
|
+
"nose",
|
210
|
+
"mouth",
|
211
|
+
"tooth",
|
212
|
+
"teeth",
|
213
|
+
"head",
|
214
|
+
"face",
|
215
|
+
"neck",
|
216
|
+
"back",
|
217
|
+
"chest",
|
218
|
+
"heart",
|
219
|
+
"finger",
|
220
|
+
"thumb",
|
221
|
+
"toe",
|
222
|
+
"knee",
|
223
|
+
"elbow",
|
224
|
+
"shoulder",
|
225
|
+
"spine",
|
226
|
+
"bone",
|
227
|
+
]
|
228
|
+
|
229
|
+
name_lower = candidate_name.lower()
|
230
|
+
for part in body_parts:
|
231
|
+
if part in name_lower.split() or part in name_lower:
|
232
|
+
logger.info(
|
233
|
+
f"✓ Found body part '{part}' in '{candidate_name}'"
|
234
|
+
)
|
235
|
+
return 1.0
|
236
|
+
|
237
|
+
logger.info(f"✗ No body part found in '{candidate_name}'")
|
238
|
+
return 0.0
|
239
|
+
|
240
|
+
# For other name patterns, use LLM
|
241
|
+
prompt = f"""
|
242
|
+
Does the name "{candidate_name}" match this pattern: "{pattern_description}"?
|
243
|
+
|
244
|
+
Be very strict. Return 1.0 only if it clearly matches, 0.0 otherwise.
|
245
|
+
|
246
|
+
Score:
|
247
|
+
"""
|
248
|
+
|
249
|
+
try:
|
250
|
+
response = self.model.invoke(prompt).content.strip()
|
251
|
+
import re
|
252
|
+
|
253
|
+
match = re.search(r"(\d*\.?\d+)", response)
|
254
|
+
if match:
|
255
|
+
return max(0.0, min(float(match.group(1)), 1.0))
|
256
|
+
except Exception:
|
257
|
+
pass
|
258
|
+
|
259
|
+
return 0.0
|
@@ -0,0 +1,213 @@
|
|
1
|
+
"""
|
2
|
+
Simple threshold-based constraint checker.
|
3
|
+
|
4
|
+
This implementation uses simple yes/no threshold checking for constraints,
|
5
|
+
making it faster but less nuanced than dual confidence checking.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import Dict, List, Tuple
|
9
|
+
|
10
|
+
from loguru import logger
|
11
|
+
|
12
|
+
from ..candidates.base_candidate import Candidate
|
13
|
+
from ..constraints.base_constraint import Constraint
|
14
|
+
from .base_constraint_checker import (
|
15
|
+
BaseConstraintChecker,
|
16
|
+
ConstraintCheckResult,
|
17
|
+
)
|
18
|
+
|
19
|
+
|
20
|
+
class ThresholdChecker(BaseConstraintChecker):
|
21
|
+
"""
|
22
|
+
Simple threshold-based constraint checker.
|
23
|
+
|
24
|
+
This checker:
|
25
|
+
1. Uses simple LLM yes/no responses for constraint satisfaction
|
26
|
+
2. Makes rejection decisions based on simple thresholds
|
27
|
+
3. Faster than dual confidence but less detailed
|
28
|
+
"""
|
29
|
+
|
30
|
+
def __init__(
|
31
|
+
self,
|
32
|
+
*args,
|
33
|
+
satisfaction_threshold: float = 0.7, # Minimum score to consider satisfied
|
34
|
+
required_satisfaction_rate: float = 0.8, # % of constraints that must be satisfied
|
35
|
+
**kwargs,
|
36
|
+
):
|
37
|
+
"""
|
38
|
+
Initialize threshold checker.
|
39
|
+
|
40
|
+
Args:
|
41
|
+
satisfaction_threshold: Minimum score for constraint satisfaction
|
42
|
+
required_satisfaction_rate: Percentage of constraints that must be satisfied
|
43
|
+
"""
|
44
|
+
super().__init__(*args, **kwargs)
|
45
|
+
self.satisfaction_threshold = satisfaction_threshold
|
46
|
+
self.required_satisfaction_rate = required_satisfaction_rate
|
47
|
+
|
48
|
+
def check_candidate(
|
49
|
+
self, candidate: Candidate, constraints: List[Constraint]
|
50
|
+
) -> ConstraintCheckResult:
|
51
|
+
"""Check candidate using simple threshold analysis."""
|
52
|
+
logger.info(f"Checking candidate: {candidate.name} (threshold)")
|
53
|
+
|
54
|
+
constraint_scores = {}
|
55
|
+
detailed_results = []
|
56
|
+
satisfied_count = 0
|
57
|
+
total_constraints = len(constraints)
|
58
|
+
|
59
|
+
for constraint in constraints:
|
60
|
+
# Gather evidence
|
61
|
+
evidence_list = self._gather_evidence_for_constraint(
|
62
|
+
candidate, constraint
|
63
|
+
)
|
64
|
+
|
65
|
+
if evidence_list:
|
66
|
+
# Simple satisfaction check
|
67
|
+
satisfaction_score = self._check_constraint_satisfaction(
|
68
|
+
candidate, constraint, evidence_list
|
69
|
+
)
|
70
|
+
|
71
|
+
is_satisfied = satisfaction_score >= self.satisfaction_threshold
|
72
|
+
if is_satisfied:
|
73
|
+
satisfied_count += 1
|
74
|
+
|
75
|
+
# Store results
|
76
|
+
constraint_scores[constraint.value] = {
|
77
|
+
"total": satisfaction_score,
|
78
|
+
"satisfied": is_satisfied,
|
79
|
+
"weight": constraint.weight,
|
80
|
+
}
|
81
|
+
|
82
|
+
detailed_results.append(
|
83
|
+
{
|
84
|
+
"constraint": constraint.value,
|
85
|
+
"score": satisfaction_score,
|
86
|
+
"satisfied": is_satisfied,
|
87
|
+
"weight": constraint.weight,
|
88
|
+
"type": constraint.type.value,
|
89
|
+
}
|
90
|
+
)
|
91
|
+
|
92
|
+
self._log_constraint_result(
|
93
|
+
candidate, constraint, satisfaction_score, {}
|
94
|
+
)
|
95
|
+
|
96
|
+
else:
|
97
|
+
# No evidence - consider unsatisfied
|
98
|
+
constraint_scores[constraint.value] = {
|
99
|
+
"total": 0.0,
|
100
|
+
"satisfied": False,
|
101
|
+
"weight": constraint.weight,
|
102
|
+
}
|
103
|
+
|
104
|
+
detailed_results.append(
|
105
|
+
{
|
106
|
+
"constraint": constraint.value,
|
107
|
+
"score": 0.0,
|
108
|
+
"satisfied": False,
|
109
|
+
"weight": constraint.weight,
|
110
|
+
"type": constraint.type.value,
|
111
|
+
}
|
112
|
+
)
|
113
|
+
|
114
|
+
logger.info(
|
115
|
+
f"? {candidate.name} | {constraint.value}: No evidence found"
|
116
|
+
)
|
117
|
+
|
118
|
+
# Check rejection based on satisfaction rate
|
119
|
+
satisfaction_rate = (
|
120
|
+
satisfied_count / total_constraints if total_constraints > 0 else 0
|
121
|
+
)
|
122
|
+
should_reject = satisfaction_rate < self.required_satisfaction_rate
|
123
|
+
|
124
|
+
rejection_reason = None
|
125
|
+
if should_reject:
|
126
|
+
rejection_reason = f"Only {satisfied_count}/{total_constraints} constraints satisfied ({satisfaction_rate:.0%})"
|
127
|
+
|
128
|
+
# Calculate total score
|
129
|
+
if should_reject:
|
130
|
+
total_score = 0.0
|
131
|
+
else:
|
132
|
+
# Use satisfaction rate as score
|
133
|
+
total_score = satisfaction_rate
|
134
|
+
|
135
|
+
logger.info(
|
136
|
+
f"Final score for {candidate.name}: {total_score:.2%} ({satisfied_count}/{total_constraints} satisfied)"
|
137
|
+
)
|
138
|
+
|
139
|
+
return ConstraintCheckResult(
|
140
|
+
candidate=candidate,
|
141
|
+
total_score=total_score,
|
142
|
+
constraint_scores=constraint_scores,
|
143
|
+
should_reject=should_reject,
|
144
|
+
rejection_reason=rejection_reason,
|
145
|
+
detailed_results=detailed_results,
|
146
|
+
)
|
147
|
+
|
148
|
+
def should_reject_candidate(
|
149
|
+
self,
|
150
|
+
candidate: Candidate,
|
151
|
+
constraint: Constraint,
|
152
|
+
evidence_data: List[Dict],
|
153
|
+
) -> Tuple[bool, str]:
|
154
|
+
"""Simple rejection based on evidence availability and quality."""
|
155
|
+
if not evidence_data:
|
156
|
+
return (
|
157
|
+
True,
|
158
|
+
f"No evidence found for constraint '{constraint.value}'",
|
159
|
+
)
|
160
|
+
|
161
|
+
satisfaction_score = self._check_constraint_satisfaction(
|
162
|
+
candidate, constraint, evidence_data
|
163
|
+
)
|
164
|
+
|
165
|
+
if satisfaction_score < self.satisfaction_threshold:
|
166
|
+
return (
|
167
|
+
True,
|
168
|
+
f"Constraint '{constraint.value}' not satisfied (score: {satisfaction_score:.0%})",
|
169
|
+
)
|
170
|
+
|
171
|
+
return False, ""
|
172
|
+
|
173
|
+
def _check_constraint_satisfaction(
|
174
|
+
self,
|
175
|
+
candidate: Candidate,
|
176
|
+
constraint: Constraint,
|
177
|
+
evidence_list: List[Dict],
|
178
|
+
) -> float:
|
179
|
+
"""Check if constraint is satisfied using simple LLM prompt."""
|
180
|
+
# Combine evidence texts
|
181
|
+
combined_evidence = "\n".join(
|
182
|
+
[e.get("text", "")[:200] for e in evidence_list[:3]]
|
183
|
+
)
|
184
|
+
|
185
|
+
prompt = f"""
|
186
|
+
Does the candidate "{candidate.name}" satisfy this constraint: "{constraint.value}"?
|
187
|
+
|
188
|
+
Evidence:
|
189
|
+
{combined_evidence}
|
190
|
+
|
191
|
+
Consider the evidence and respond with a satisfaction score from 0.0 to 1.0 where:
|
192
|
+
- 1.0 = Definitely satisfies the constraint
|
193
|
+
- 0.5 = Partially satisfies or unclear
|
194
|
+
- 0.0 = Definitely does not satisfy the constraint
|
195
|
+
|
196
|
+
Score:
|
197
|
+
"""
|
198
|
+
|
199
|
+
try:
|
200
|
+
response = self.model.invoke(prompt).content.strip()
|
201
|
+
|
202
|
+
# Extract score
|
203
|
+
import re
|
204
|
+
|
205
|
+
match = re.search(r"(\d*\.?\d+)", response)
|
206
|
+
if match:
|
207
|
+
score = float(match.group(1))
|
208
|
+
return max(0.0, min(score, 1.0))
|
209
|
+
|
210
|
+
except Exception as e:
|
211
|
+
logger.error(f"Error checking constraint satisfaction: {e}")
|
212
|
+
|
213
|
+
return 0.5 # Default to neutral if parsing fails
|
@@ -0,0 +1,58 @@
|
|
1
|
+
"""
|
2
|
+
Base constraint classes for query decomposition.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from dataclasses import dataclass
|
6
|
+
from enum import Enum
|
7
|
+
from typing import Any, Dict
|
8
|
+
|
9
|
+
|
10
|
+
class ConstraintType(Enum):
|
11
|
+
"""Types of constraints in queries."""
|
12
|
+
|
13
|
+
PROPERTY = "property" # e.g., "formed during ice age"
|
14
|
+
NAME_PATTERN = "name_pattern" # e.g., "contains body part"
|
15
|
+
EVENT = "event" # e.g., "fall between 2000-2021"
|
16
|
+
STATISTIC = "statistic" # e.g., "84.5x ratio"
|
17
|
+
TEMPORAL = "temporal" # e.g., "in 2014"
|
18
|
+
LOCATION = "location" # e.g., "in Colorado"
|
19
|
+
COMPARISON = "comparison" # e.g., "more than X"
|
20
|
+
EXISTENCE = "existence" # e.g., "has a viewpoint"
|
21
|
+
|
22
|
+
|
23
|
+
@dataclass
|
24
|
+
class Constraint:
|
25
|
+
"""A single constraint extracted from a query."""
|
26
|
+
|
27
|
+
id: str
|
28
|
+
type: ConstraintType
|
29
|
+
description: str
|
30
|
+
value: Any
|
31
|
+
weight: float = 1.0 # Importance of this constraint
|
32
|
+
metadata: Dict[str, Any] = None
|
33
|
+
|
34
|
+
def __post_init__(self):
|
35
|
+
"""Initialize metadata if not provided."""
|
36
|
+
if self.metadata is None:
|
37
|
+
self.metadata = {}
|
38
|
+
|
39
|
+
def to_search_terms(self) -> str:
|
40
|
+
"""Convert constraint to search terms."""
|
41
|
+
if self.type == ConstraintType.PROPERTY:
|
42
|
+
return self.value
|
43
|
+
elif self.type == ConstraintType.NAME_PATTERN:
|
44
|
+
return f"{self.value} name trail mountain"
|
45
|
+
elif self.type == ConstraintType.EVENT:
|
46
|
+
return f"{self.value} accident incident"
|
47
|
+
elif self.type == ConstraintType.STATISTIC:
|
48
|
+
return f"{self.value} statistics data"
|
49
|
+
else:
|
50
|
+
return str(self.value)
|
51
|
+
|
52
|
+
def is_critical(self) -> bool:
|
53
|
+
"""Determine if this is a critical constraint that must be satisfied."""
|
54
|
+
# Consider NAME_PATTERN constraints as critical regardless of weight
|
55
|
+
if self.type == ConstraintType.NAME_PATTERN:
|
56
|
+
return True
|
57
|
+
# Otherwise use weight to determine criticality
|
58
|
+
return self.weight > 0.8
|
@@ -0,0 +1,143 @@
|
|
1
|
+
"""
|
2
|
+
Constraint analyzer for extracting constraints from queries.
|
3
|
+
"""
|
4
|
+
|
5
|
+
import re
|
6
|
+
from typing import List
|
7
|
+
|
8
|
+
from langchain_core.language_models import BaseChatModel
|
9
|
+
from loguru import logger
|
10
|
+
|
11
|
+
from ...utilities.search_utilities import remove_think_tags
|
12
|
+
from .base_constraint import Constraint, ConstraintType
|
13
|
+
|
14
|
+
|
15
|
+
class ConstraintAnalyzer:
|
16
|
+
"""Analyzes queries to extract constraints."""
|
17
|
+
|
18
|
+
def __init__(self, model: BaseChatModel):
|
19
|
+
"""Initialize the constraint analyzer."""
|
20
|
+
self.model = model
|
21
|
+
|
22
|
+
def extract_constraints(self, query: str) -> List[Constraint]:
|
23
|
+
"""Extract constraints from a query."""
|
24
|
+
prompt = f"""
|
25
|
+
Generate constraints to verify if an answer candidate correctly answers this question.
|
26
|
+
|
27
|
+
Question: {query}
|
28
|
+
|
29
|
+
Create constraints that would help verify if a proposed answer is correct. Focus on the RELATIONSHIP between the question and answer, not just query analysis.
|
30
|
+
|
31
|
+
Examples:
|
32
|
+
- "Which university did Alice study at?" → "Alice studied at this university"
|
33
|
+
- "What year was the company founded?" → "The company was founded in this year"
|
34
|
+
- "Who invented the device?" → "This person invented the device"
|
35
|
+
- "Where is the building located?" → "The building is located at this place"
|
36
|
+
|
37
|
+
For each constraint, identify:
|
38
|
+
1. Type: property, name_pattern, event, statistic, temporal, location, comparison, existence
|
39
|
+
2. Description: What relationship must hold between question and answer
|
40
|
+
3. Value: The specific relationship to verify
|
41
|
+
4. Weight: How critical this constraint is (0.0-1.0)
|
42
|
+
|
43
|
+
Format your response as:
|
44
|
+
CONSTRAINT_1:
|
45
|
+
Type: [type]
|
46
|
+
Description: [description]
|
47
|
+
Value: [value]
|
48
|
+
Weight: [0.0-1.0]
|
49
|
+
|
50
|
+
CONSTRAINT_2:
|
51
|
+
Type: [type]
|
52
|
+
Description: [description]
|
53
|
+
Value: [value]
|
54
|
+
Weight: [0.0-1.0]
|
55
|
+
|
56
|
+
Focus on answer verification, not query parsing.
|
57
|
+
"""
|
58
|
+
|
59
|
+
response = self.model.invoke(prompt)
|
60
|
+
content = remove_think_tags(response.content)
|
61
|
+
|
62
|
+
constraints = []
|
63
|
+
current_constraint = {}
|
64
|
+
constraint_id = 1
|
65
|
+
|
66
|
+
for line in content.strip().split("\n"):
|
67
|
+
line = line.strip()
|
68
|
+
|
69
|
+
if line.startswith("CONSTRAINT_"):
|
70
|
+
if current_constraint and all(
|
71
|
+
k in current_constraint
|
72
|
+
for k in ["type", "description", "value"]
|
73
|
+
):
|
74
|
+
constraint = Constraint(
|
75
|
+
id=f"c{constraint_id}",
|
76
|
+
type=self._parse_constraint_type(
|
77
|
+
current_constraint["type"]
|
78
|
+
),
|
79
|
+
description=current_constraint["description"],
|
80
|
+
value=current_constraint["value"],
|
81
|
+
weight=self._parse_weight(
|
82
|
+
current_constraint.get("weight", 1.0)
|
83
|
+
),
|
84
|
+
)
|
85
|
+
constraints.append(constraint)
|
86
|
+
constraint_id += 1
|
87
|
+
current_constraint = {}
|
88
|
+
elif ":" in line:
|
89
|
+
key, value = line.split(":", 1)
|
90
|
+
key = key.strip().lower()
|
91
|
+
value = value.strip()
|
92
|
+
if key in ["type", "description", "value", "weight"]:
|
93
|
+
current_constraint[key] = value
|
94
|
+
|
95
|
+
# Don't forget the last constraint
|
96
|
+
if current_constraint and all(
|
97
|
+
k in current_constraint for k in ["type", "description", "value"]
|
98
|
+
):
|
99
|
+
constraint = Constraint(
|
100
|
+
id=f"c{constraint_id}",
|
101
|
+
type=self._parse_constraint_type(current_constraint["type"]),
|
102
|
+
description=current_constraint["description"],
|
103
|
+
value=current_constraint["value"],
|
104
|
+
weight=self._parse_weight(
|
105
|
+
current_constraint.get("weight", 1.0)
|
106
|
+
),
|
107
|
+
)
|
108
|
+
constraints.append(constraint)
|
109
|
+
|
110
|
+
logger.info(f"Extracted {len(constraints)} constraints from query")
|
111
|
+
return constraints
|
112
|
+
|
113
|
+
def _parse_constraint_type(self, type_str: str) -> ConstraintType:
|
114
|
+
"""Parse constraint type from string."""
|
115
|
+
type_map = {
|
116
|
+
"property": ConstraintType.PROPERTY,
|
117
|
+
"name_pattern": ConstraintType.NAME_PATTERN,
|
118
|
+
"event": ConstraintType.EVENT,
|
119
|
+
"statistic": ConstraintType.STATISTIC,
|
120
|
+
"temporal": ConstraintType.TEMPORAL,
|
121
|
+
"location": ConstraintType.LOCATION,
|
122
|
+
"comparison": ConstraintType.COMPARISON,
|
123
|
+
"existence": ConstraintType.EXISTENCE,
|
124
|
+
}
|
125
|
+
return type_map.get(type_str.lower(), ConstraintType.PROPERTY)
|
126
|
+
|
127
|
+
def _parse_weight(self, weight_value) -> float:
|
128
|
+
"""Parse weight value to float, handling text annotations.
|
129
|
+
|
130
|
+
Args:
|
131
|
+
weight_value: String or numeric weight value, possibly with text annotations
|
132
|
+
|
133
|
+
Returns:
|
134
|
+
float: Parsed weight value
|
135
|
+
"""
|
136
|
+
if isinstance(weight_value, (int, float)):
|
137
|
+
return float(weight_value)
|
138
|
+
if isinstance(weight_value, str):
|
139
|
+
# Extract the first number from the string
|
140
|
+
match = re.search(r"(\d+(\.\d+)?)", weight_value)
|
141
|
+
if match:
|
142
|
+
return float(match.group(1))
|
143
|
+
return 1.0 # Default weight
|
@@ -0,0 +1,12 @@
|
|
1
|
+
# Evidence System Package
|
2
|
+
|
3
|
+
from .base_evidence import Evidence, EvidenceType
|
4
|
+
from .evaluator import EvidenceEvaluator
|
5
|
+
from .requirements import EvidenceRequirements
|
6
|
+
|
7
|
+
__all__ = [
|
8
|
+
"Evidence",
|
9
|
+
"EvidenceType",
|
10
|
+
"EvidenceEvaluator",
|
11
|
+
"EvidenceRequirements",
|
12
|
+
]
|