local-deep-research 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. local_deep_research/__init__.py +7 -0
  2. local_deep_research/__version__.py +1 -1
  3. local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
  4. local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
  5. local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
  6. local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
  7. local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
  8. local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
  9. local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
  10. local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
  11. local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
  12. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
  13. local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
  14. local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
  15. local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
  16. local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
  17. local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
  18. local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
  19. local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
  20. local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
  21. local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
  22. local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
  23. local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
  24. local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
  25. local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
  26. local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
  27. local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
  28. local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
  29. local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
  30. local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
  31. local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
  32. local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
  33. local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
  34. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
  35. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
  36. local_deep_research/advanced_search_system/findings/repository.py +54 -17
  37. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
  38. local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
  39. local_deep_research/advanced_search_system/questions/__init__.py +16 -0
  40. local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
  41. local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
  42. local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
  43. local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
  44. local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
  45. local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
  46. local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
  47. local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
  48. local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
  49. local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
  50. local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
  51. local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
  52. local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
  53. local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
  54. local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
  55. local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
  56. local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
  57. local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
  58. local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
  59. local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
  60. local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
  61. local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
  62. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
  63. local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
  64. local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
  65. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
  66. local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
  67. local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
  68. local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
  69. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
  70. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
  71. local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
  72. local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
  73. local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
  74. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
  75. local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
  76. local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
  77. local_deep_research/api/benchmark_functions.py +6 -2
  78. local_deep_research/api/research_functions.py +10 -4
  79. local_deep_research/benchmarks/__init__.py +9 -7
  80. local_deep_research/benchmarks/benchmark_functions.py +6 -2
  81. local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
  82. local_deep_research/benchmarks/cli.py +38 -13
  83. local_deep_research/benchmarks/comparison/__init__.py +4 -2
  84. local_deep_research/benchmarks/comparison/evaluator.py +316 -239
  85. local_deep_research/benchmarks/datasets/__init__.py +1 -1
  86. local_deep_research/benchmarks/datasets/base.py +91 -72
  87. local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
  88. local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
  89. local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
  90. local_deep_research/benchmarks/datasets/utils.py +48 -29
  91. local_deep_research/benchmarks/datasets.py +4 -11
  92. local_deep_research/benchmarks/efficiency/__init__.py +8 -4
  93. local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
  94. local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
  95. local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
  96. local_deep_research/benchmarks/evaluators/composite.py +6 -2
  97. local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
  98. local_deep_research/benchmarks/graders.py +32 -10
  99. local_deep_research/benchmarks/metrics/README.md +1 -1
  100. local_deep_research/benchmarks/metrics/calculation.py +25 -10
  101. local_deep_research/benchmarks/metrics/reporting.py +7 -3
  102. local_deep_research/benchmarks/metrics/visualization.py +42 -23
  103. local_deep_research/benchmarks/metrics.py +1 -1
  104. local_deep_research/benchmarks/optimization/__init__.py +3 -1
  105. local_deep_research/benchmarks/optimization/api.py +7 -1
  106. local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
  107. local_deep_research/benchmarks/runners.py +48 -15
  108. local_deep_research/citation_handler.py +65 -92
  109. local_deep_research/citation_handlers/__init__.py +15 -0
  110. local_deep_research/citation_handlers/base_citation_handler.py +70 -0
  111. local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
  112. local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
  113. local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
  114. local_deep_research/config/llm_config.py +271 -169
  115. local_deep_research/config/search_config.py +14 -5
  116. local_deep_research/defaults/__init__.py +0 -1
  117. local_deep_research/metrics/__init__.py +13 -0
  118. local_deep_research/metrics/database.py +58 -0
  119. local_deep_research/metrics/db_models.py +115 -0
  120. local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
  121. local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
  122. local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
  123. local_deep_research/metrics/migrate_research_ratings.py +31 -0
  124. local_deep_research/metrics/models.py +61 -0
  125. local_deep_research/metrics/pricing/__init__.py +12 -0
  126. local_deep_research/metrics/pricing/cost_calculator.py +237 -0
  127. local_deep_research/metrics/pricing/pricing_cache.py +143 -0
  128. local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
  129. local_deep_research/metrics/query_utils.py +51 -0
  130. local_deep_research/metrics/search_tracker.py +380 -0
  131. local_deep_research/metrics/token_counter.py +1078 -0
  132. local_deep_research/migrate_db.py +3 -1
  133. local_deep_research/report_generator.py +22 -8
  134. local_deep_research/search_system.py +390 -9
  135. local_deep_research/test_migration.py +15 -5
  136. local_deep_research/utilities/db_utils.py +7 -4
  137. local_deep_research/utilities/es_utils.py +115 -104
  138. local_deep_research/utilities/llm_utils.py +15 -5
  139. local_deep_research/utilities/log_utils.py +151 -0
  140. local_deep_research/utilities/search_cache.py +387 -0
  141. local_deep_research/utilities/search_utilities.py +14 -6
  142. local_deep_research/utilities/threading_utils.py +92 -0
  143. local_deep_research/utilities/url_utils.py +6 -0
  144. local_deep_research/web/api.py +347 -0
  145. local_deep_research/web/app.py +13 -17
  146. local_deep_research/web/app_factory.py +71 -66
  147. local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
  148. local_deep_research/web/database/migrations.py +5 -3
  149. local_deep_research/web/database/models.py +51 -2
  150. local_deep_research/web/database/schema_upgrade.py +49 -29
  151. local_deep_research/web/models/database.py +51 -61
  152. local_deep_research/web/routes/api_routes.py +56 -22
  153. local_deep_research/web/routes/benchmark_routes.py +4 -1
  154. local_deep_research/web/routes/globals.py +22 -0
  155. local_deep_research/web/routes/history_routes.py +71 -46
  156. local_deep_research/web/routes/metrics_routes.py +1155 -0
  157. local_deep_research/web/routes/research_routes.py +227 -41
  158. local_deep_research/web/routes/settings_routes.py +156 -55
  159. local_deep_research/web/services/research_service.py +310 -103
  160. local_deep_research/web/services/resource_service.py +36 -11
  161. local_deep_research/web/services/settings_manager.py +55 -17
  162. local_deep_research/web/services/settings_service.py +12 -4
  163. local_deep_research/web/services/socket_service.py +295 -188
  164. local_deep_research/web/static/css/custom_dropdown.css +180 -0
  165. local_deep_research/web/static/css/styles.css +39 -1
  166. local_deep_research/web/static/js/components/detail.js +633 -267
  167. local_deep_research/web/static/js/components/details.js +751 -0
  168. local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
  169. local_deep_research/web/static/js/components/fallback/ui.js +23 -23
  170. local_deep_research/web/static/js/components/history.js +76 -76
  171. local_deep_research/web/static/js/components/logpanel.js +61 -13
  172. local_deep_research/web/static/js/components/progress.js +13 -2
  173. local_deep_research/web/static/js/components/research.js +99 -12
  174. local_deep_research/web/static/js/components/results.js +239 -106
  175. local_deep_research/web/static/js/main.js +40 -40
  176. local_deep_research/web/static/js/services/audio.js +1 -1
  177. local_deep_research/web/static/js/services/formatting.js +11 -11
  178. local_deep_research/web/static/js/services/keyboard.js +157 -0
  179. local_deep_research/web/static/js/services/pdf.js +80 -80
  180. local_deep_research/web/static/sounds/README.md +1 -1
  181. local_deep_research/web/templates/base.html +1 -0
  182. local_deep_research/web/templates/components/log_panel.html +7 -1
  183. local_deep_research/web/templates/components/mobile_nav.html +1 -1
  184. local_deep_research/web/templates/components/sidebar.html +3 -0
  185. local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
  186. local_deep_research/web/templates/pages/details.html +325 -24
  187. local_deep_research/web/templates/pages/history.html +1 -1
  188. local_deep_research/web/templates/pages/metrics.html +1929 -0
  189. local_deep_research/web/templates/pages/progress.html +2 -2
  190. local_deep_research/web/templates/pages/research.html +53 -17
  191. local_deep_research/web/templates/pages/results.html +12 -1
  192. local_deep_research/web/templates/pages/star_reviews.html +803 -0
  193. local_deep_research/web/utils/formatters.py +9 -3
  194. local_deep_research/web_search_engines/default_search_engines.py +5 -3
  195. local_deep_research/web_search_engines/engines/full_search.py +8 -2
  196. local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
  197. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
  198. local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
  199. local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
  200. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
  201. local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
  202. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
  203. local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
  204. local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
  205. local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
  206. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
  207. local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
  208. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
  209. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
  210. local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
  211. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
  212. local_deep_research/web_search_engines/search_engine_base.py +83 -35
  213. local_deep_research/web_search_engines/search_engine_factory.py +25 -8
  214. local_deep_research/web_search_engines/search_engines_config.py +9 -3
  215. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/METADATA +7 -1
  216. local_deep_research-0.5.0.dist-info/RECORD +265 -0
  217. local_deep_research-0.4.4.dist-info/RECORD +0 -177
  218. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/WHEEL +0 -0
  219. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/entry_points.txt +0 -0
  220. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,624 @@
1
+ """
2
+ Cross-constraint search optimization manager.
3
+ """
4
+
5
+ import itertools
6
+ from collections import defaultdict
7
+ from dataclasses import dataclass, field
8
+ from typing import Dict, List, Optional, Set, Tuple
9
+
10
+ from langchain_core.language_models import BaseChatModel
11
+
12
+ from ...utilities.search_utilities import remove_think_tags
13
+ from ..candidates.base_candidate import Candidate
14
+ from ..constraints.base_constraint import Constraint
15
+
16
+
17
+ @dataclass
18
+ class ConstraintRelationship:
19
+ """Represents a relationship between constraints."""
20
+
21
+ constraint1_id: str
22
+ constraint2_id: str
23
+ relationship_type: str # 'complementary', 'dependent', 'exclusive'
24
+ strength: float # 0.0 to 1.0
25
+ evidence: List[str] = field(default_factory=list)
26
+
27
+
28
+ @dataclass
29
+ class ConstraintCluster:
30
+ """Group of related constraints that should be searched together."""
31
+
32
+ constraints: List[Constraint]
33
+ cluster_type: str # 'temporal', 'spatial', 'causal', 'descriptive'
34
+ coherence_score: float
35
+ search_queries: List[str] = field(default_factory=list)
36
+
37
+
38
+ class CrossConstraintManager:
39
+ """
40
+ Manages cross-constraint relationships and optimizes multi-constraint searches.
41
+
42
+ Key features:
43
+ 1. Identifies relationships between constraints
44
+ 2. Clusters related constraints for efficient searching
45
+ 3. Generates cross-constraint validation queries
46
+ 4. Tracks cross-constraint evidence patterns
47
+ """
48
+
49
+ def __init__(self, model: BaseChatModel):
50
+ """Initialize the cross-constraint manager."""
51
+ self.model = model
52
+ self.relationships: Dict[Tuple[str, str], ConstraintRelationship] = {}
53
+ self.clusters: List[ConstraintCluster] = []
54
+ self.cross_validation_patterns: Dict[str, List[Dict]] = defaultdict(
55
+ list
56
+ )
57
+ self.constraint_graph: Dict[str, Set[str]] = defaultdict(set)
58
+
59
+ def analyze_constraint_relationships(
60
+ self, constraints: List[Constraint]
61
+ ) -> Dict[Tuple[str, str], ConstraintRelationship]:
62
+ """Analyze relationships between constraints."""
63
+ relationships = {}
64
+
65
+ # Analyze each pair of constraints
66
+ for c1, c2 in itertools.combinations(constraints, 2):
67
+ relationship = self._analyze_pair(c1, c2)
68
+ if (
69
+ relationship.strength > 0.3
70
+ ): # Only keep meaningful relationships
71
+ key = (c1.id, c2.id)
72
+ relationships[key] = relationship
73
+
74
+ # Update constraint graph
75
+ self.constraint_graph[c1.id].add(c2.id)
76
+ self.constraint_graph[c2.id].add(c1.id)
77
+
78
+ self.relationships.update(relationships)
79
+ return relationships
80
+
81
+ def _analyze_pair(
82
+ self, c1: Constraint, c2: Constraint
83
+ ) -> ConstraintRelationship:
84
+ """Analyze the relationship between two constraints."""
85
+ prompt = f"""
86
+ Analyze the relationship between these two constraints:
87
+
88
+ Constraint 1: {c1.description} (Type: {c1.type.value})
89
+ Constraint 2: {c2.description} (Type: {c2.type.value})
90
+
91
+ Determine:
92
+ 1. Relationship type (complementary, dependent, exclusive, or none)
93
+ 2. Strength of relationship (0.0 to 1.0)
94
+ 3. Brief explanation
95
+
96
+ Format:
97
+ Type: [relationship_type]
98
+ Strength: [0.0-1.0]
99
+ Evidence: [explanation]
100
+ """
101
+
102
+ response = self.model.invoke(prompt)
103
+ content = remove_think_tags(response.content)
104
+
105
+ # Parse response
106
+ rel_type = "none"
107
+ strength = 0.0
108
+ evidence = []
109
+
110
+ for line in content.strip().split("\n"):
111
+ if line.startswith("Type:"):
112
+ rel_type = line.split(":", 1)[1].strip().lower()
113
+ elif line.startswith("Strength:"):
114
+ try:
115
+ strength = float(line.split(":", 1)[1].strip())
116
+ except ValueError:
117
+ strength = 0.0
118
+ elif line.startswith("Evidence:"):
119
+ evidence.append(line.split(":", 1)[1].strip())
120
+
121
+ return ConstraintRelationship(
122
+ constraint1_id=c1.id,
123
+ constraint2_id=c2.id,
124
+ relationship_type=rel_type,
125
+ strength=strength,
126
+ evidence=evidence,
127
+ )
128
+
129
+ def create_constraint_clusters(
130
+ self, constraints: List[Constraint]
131
+ ) -> List[ConstraintCluster]:
132
+ """Create clusters of related constraints."""
133
+ # First, analyze relationships if not done
134
+ if not self.relationships:
135
+ self.analyze_constraint_relationships(constraints)
136
+
137
+ # Create clusters using different strategies
138
+ clusters = []
139
+
140
+ # 1. Type-based clusters
141
+ type_groups = defaultdict(list)
142
+ for c in constraints:
143
+ type_groups[c.type].append(c)
144
+
145
+ for ctype, group in type_groups.items():
146
+ if len(group) > 1:
147
+ cluster = ConstraintCluster(
148
+ constraints=group,
149
+ cluster_type="type_based",
150
+ coherence_score=0.7,
151
+ )
152
+ clusters.append(cluster)
153
+
154
+ # 2. Relationship-based clusters
155
+ strong_relationships = [
156
+ rel for rel in self.relationships.values() if rel.strength > 0.6
157
+ ]
158
+
159
+ relationship_clusters = self._create_relationship_clusters(
160
+ constraints, strong_relationships
161
+ )
162
+ clusters.extend(relationship_clusters)
163
+
164
+ # 3. Semantic clusters
165
+ semantic_clusters = self._create_semantic_clusters(constraints)
166
+ clusters.extend(semantic_clusters)
167
+
168
+ # Remove duplicate clusters
169
+ unique_clusters = self._deduplicate_clusters(clusters)
170
+
171
+ self.clusters = unique_clusters
172
+ return unique_clusters
173
+
174
+ def _create_relationship_clusters(
175
+ self,
176
+ constraints: List[Constraint],
177
+ relationships: List[ConstraintRelationship],
178
+ ) -> List[ConstraintCluster]:
179
+ """Create clusters based on strong relationships."""
180
+ clusters = []
181
+ processed = set()
182
+
183
+ # Build adjacency list
184
+ adj_list = defaultdict(list)
185
+ for rel in relationships:
186
+ adj_list[rel.constraint1_id].append(rel.constraint2_id)
187
+ adj_list[rel.constraint2_id].append(rel.constraint1_id)
188
+
189
+ # Find connected components
190
+ for constraint in constraints:
191
+ if constraint.id in processed:
192
+ continue
193
+
194
+ # BFS to find connected component
195
+ component = []
196
+ queue = [constraint.id]
197
+ visited = {constraint.id}
198
+
199
+ while queue:
200
+ current_id = queue.pop(0)
201
+ current = next(
202
+ (c for c in constraints if c.id == current_id), None
203
+ )
204
+ if current:
205
+ component.append(current)
206
+ processed.add(current_id)
207
+
208
+ for neighbor_id in adj_list[current_id]:
209
+ if neighbor_id not in visited:
210
+ visited.add(neighbor_id)
211
+ queue.append(neighbor_id)
212
+
213
+ if len(component) > 1:
214
+ cluster = ConstraintCluster(
215
+ constraints=component,
216
+ cluster_type="relationship_based",
217
+ coherence_score=self._calculate_cluster_coherence(
218
+ component
219
+ ),
220
+ )
221
+ clusters.append(cluster)
222
+
223
+ return clusters
224
+
225
+ def _create_semantic_clusters(
226
+ self, constraints: List[Constraint]
227
+ ) -> List[ConstraintCluster]:
228
+ """Create clusters based on semantic similarity."""
229
+ prompt = f"""
230
+ Group these constraints into semantic clusters based on their meaning and intent:
231
+
232
+ {self._format_constraints_for_clustering(constraints)}
233
+
234
+ For each cluster:
235
+ 1. List the constraint IDs
236
+ 2. Describe the cluster theme
237
+ 3. Rate coherence (0.0-1.0)
238
+
239
+ Format:
240
+ CLUSTER_1:
241
+ Constraints: [id1, id2, ...]
242
+ Theme: [description]
243
+ Coherence: [0.0-1.0]
244
+ """
245
+
246
+ response = self.model.invoke(prompt)
247
+ content = remove_think_tags(response.content)
248
+
249
+ clusters = []
250
+ current_cluster = {}
251
+
252
+ for line in content.strip().split("\n"):
253
+ line = line.strip()
254
+
255
+ if line.startswith("CLUSTER_"):
256
+ if current_cluster and "constraints" in current_cluster:
257
+ # Create cluster from previous data
258
+ constraint_ids = current_cluster["constraints"]
259
+ cluster_constraints = [
260
+ c for c in constraints if c.id in constraint_ids
261
+ ]
262
+
263
+ if len(cluster_constraints) > 1:
264
+ cluster = ConstraintCluster(
265
+ constraints=cluster_constraints,
266
+ cluster_type="semantic",
267
+ coherence_score=float(
268
+ current_cluster.get("coherence", 0.5)
269
+ ),
270
+ )
271
+ clusters.append(cluster)
272
+
273
+ current_cluster = {}
274
+
275
+ elif line.startswith("Constraints:"):
276
+ ids_str = line.split(":", 1)[1].strip()
277
+ # Extract IDs from various formats
278
+ import re
279
+
280
+ ids = re.findall(r"c\d+", ids_str)
281
+ current_cluster["constraints"] = ids
282
+
283
+ elif line.startswith("Theme:"):
284
+ current_cluster["theme"] = line.split(":", 1)[1].strip()
285
+
286
+ elif line.startswith("Coherence:"):
287
+ try:
288
+ current_cluster["coherence"] = float(
289
+ line.split(":", 1)[1].strip()
290
+ )
291
+ except ValueError:
292
+ current_cluster["coherence"] = 0.5
293
+
294
+ # Don't forget the last cluster
295
+ if current_cluster and "constraints" in current_cluster:
296
+ constraint_ids = current_cluster["constraints"]
297
+ cluster_constraints = [
298
+ c for c in constraints if c.id in constraint_ids
299
+ ]
300
+
301
+ if len(cluster_constraints) > 1:
302
+ cluster = ConstraintCluster(
303
+ constraints=cluster_constraints,
304
+ cluster_type="semantic",
305
+ coherence_score=float(
306
+ current_cluster.get("coherence", 0.5)
307
+ ),
308
+ )
309
+ clusters.append(cluster)
310
+
311
+ return clusters
312
+
313
+ def generate_cross_constraint_queries(
314
+ self, cluster: ConstraintCluster
315
+ ) -> List[str]:
316
+ """Generate optimized queries for a constraint cluster."""
317
+ queries = []
318
+
319
+ # 1. Combined query (all constraints)
320
+ combined_query = self._generate_combined_query(cluster.constraints)
321
+ queries.append(combined_query)
322
+
323
+ # 2. Progressive queries (build up constraints)
324
+ progressive_queries = self._generate_progressive_queries(
325
+ cluster.constraints
326
+ )
327
+ queries.extend(progressive_queries)
328
+
329
+ # 3. Intersection queries (shared aspects)
330
+ intersection_query = self._generate_intersection_query(
331
+ cluster.constraints
332
+ )
333
+ if intersection_query:
334
+ queries.append(intersection_query)
335
+
336
+ # 4. Validation queries (cross-check)
337
+ validation_queries = self._generate_validation_queries(
338
+ cluster.constraints
339
+ )
340
+ queries.extend(validation_queries)
341
+
342
+ # Store queries in cluster
343
+ cluster.search_queries = queries
344
+
345
+ return queries
346
+
347
+ def _generate_combined_query(self, constraints: List[Constraint]) -> str:
348
+ """Generate a query combining all constraints."""
349
+ prompt = f"""
350
+ Create a search query that finds entities satisfying ALL of these related constraints:
351
+
352
+ {self._format_constraints_for_query(constraints)}
353
+
354
+ The query should:
355
+ 1. Efficiently combine all constraints
356
+ 2. Use appropriate operators (AND, OR)
357
+ 3. Focus on finding specific entities
358
+ 4. Be neither too broad nor too narrow
359
+
360
+ Return only the search query.
361
+ """
362
+
363
+ response = self.model.invoke(prompt)
364
+ return remove_think_tags(response.content).strip()
365
+
366
+ def _generate_progressive_queries(
367
+ self, constraints: List[Constraint]
368
+ ) -> List[str]:
369
+ """Generate queries that progressively add constraints."""
370
+ queries = []
371
+
372
+ # Sort by weight/importance
373
+ sorted_constraints = sorted(
374
+ constraints, key=lambda c: c.weight, reverse=True
375
+ )
376
+
377
+ # Build up constraints
378
+ for i in range(2, min(len(sorted_constraints) + 1, 4)):
379
+ subset = sorted_constraints[:i]
380
+ query = self._generate_combined_query(subset)
381
+ queries.append(query)
382
+
383
+ return queries
384
+
385
+ def _generate_intersection_query(
386
+ self, constraints: List[Constraint]
387
+ ) -> Optional[str]:
388
+ """Generate a query focused on the intersection of constraints."""
389
+ if len(constraints) < 2:
390
+ return None
391
+
392
+ prompt = f"""
393
+ Identify the common theme or intersection among these constraints:
394
+
395
+ {self._format_constraints_for_query(constraints)}
396
+
397
+ Create a search query that targets this common aspect.
398
+ Return only the search query, or 'NONE' if no clear intersection exists.
399
+ """
400
+
401
+ response = self.model.invoke(prompt)
402
+ query = remove_think_tags(response.content).strip()
403
+
404
+ if query.upper() == "NONE":
405
+ return None
406
+
407
+ return query
408
+
409
+ def _generate_validation_queries(
410
+ self, constraints: List[Constraint]
411
+ ) -> List[str]:
412
+ """Generate queries for cross-validation."""
413
+ queries = []
414
+
415
+ # Pairwise validation queries
416
+ for c1, c2 in itertools.combinations(constraints[:3], 2):
417
+ prompt = f"""
418
+ Create a validation query that checks if an entity satisfies both:
419
+ - {c1.description}
420
+ - {c2.description}
421
+
422
+ Return only the search query.
423
+ """
424
+
425
+ response = self.model.invoke(prompt)
426
+ query = remove_think_tags(response.content).strip()
427
+ queries.append(query)
428
+
429
+ return queries[:2] # Limit to 2 validation queries
430
+
431
+ def validate_candidate_across_constraints(
432
+ self, candidate: Candidate, constraints: List[Constraint]
433
+ ) -> Dict[str, float]:
434
+ """Validate a candidate across multiple constraints simultaneously."""
435
+ validation_scores = {}
436
+
437
+ # Find relevant clusters for these constraints
438
+ relevant_clusters = [
439
+ cluster
440
+ for cluster in self.clusters
441
+ if any(c in cluster.constraints for c in constraints)
442
+ ]
443
+
444
+ for cluster in relevant_clusters:
445
+ # Use cluster-specific queries for validation
446
+ cluster_score = self._validate_with_cluster(candidate, cluster)
447
+
448
+ # Update individual constraint scores
449
+ for constraint in cluster.constraints:
450
+ if constraint in constraints:
451
+ validation_scores[constraint.id] = max(
452
+ validation_scores.get(constraint.id, 0.0), cluster_score
453
+ )
454
+
455
+ # Additional pairwise validation
456
+ for c1, c2 in itertools.combinations(constraints, 2):
457
+ if (c1.id, c2.id) in self.relationships:
458
+ rel = self.relationships[(c1.id, c2.id)]
459
+ if rel.relationship_type == "complementary":
460
+ # Boost scores for complementary constraints
461
+ pair_score = self._validate_pair(candidate, c1, c2)
462
+ validation_scores[c1.id] = max(
463
+ validation_scores.get(c1.id, 0.0), pair_score
464
+ )
465
+ validation_scores[c2.id] = max(
466
+ validation_scores.get(c2.id, 0.0), pair_score
467
+ )
468
+
469
+ return validation_scores
470
+
471
+ def _validate_with_cluster(
472
+ self, candidate: Candidate, cluster: ConstraintCluster
473
+ ) -> float:
474
+ """Validate candidate using cluster-based approach."""
475
+ if not cluster.search_queries:
476
+ cluster.search_queries = self.generate_cross_constraint_queries(
477
+ cluster
478
+ )
479
+
480
+ # Use the most comprehensive query
481
+ validation_query = cluster.search_queries[0]
482
+
483
+ prompt = f"""
484
+ Does "{candidate.name}" satisfy this multi-constraint query:
485
+ Query: {validation_query}
486
+
487
+ Constraints being checked:
488
+ {self._format_constraints_for_query(cluster.constraints)}
489
+
490
+ Provide a confidence score (0.0-1.0) based on how well the candidate matches.
491
+
492
+ Format:
493
+ Score: [0.0-1.0]
494
+ Explanation: [brief explanation]
495
+ """
496
+
497
+ response = self.model.invoke(prompt)
498
+ content = remove_think_tags(response.content)
499
+
500
+ # Parse score
501
+ score = 0.0
502
+ for line in content.strip().split("\n"):
503
+ if line.startswith("Score:"):
504
+ try:
505
+ score = float(line.split(":", 1)[1].strip())
506
+ except ValueError:
507
+ score = 0.0
508
+ break
509
+
510
+ return score
511
+
512
+ def _validate_pair(
513
+ self, candidate: Candidate, c1: Constraint, c2: Constraint
514
+ ) -> float:
515
+ """Validate candidate against a pair of constraints."""
516
+ prompt = f"""
517
+ Evaluate if "{candidate.name}" satisfies BOTH constraints:
518
+
519
+ 1. {c1.description} (Type: {c1.type.value})
520
+ 2. {c2.description} (Type: {c2.type.value})
521
+
522
+ Consider how these constraints relate to each other and whether the candidate satisfies both.
523
+
524
+ Provide a confidence score (0.0-1.0).
525
+
526
+ Format:
527
+ Score: [0.0-1.0]
528
+ """
529
+
530
+ response = self.model.invoke(prompt)
531
+ content = remove_think_tags(response.content)
532
+
533
+ # Parse score
534
+ score = 0.0
535
+ for line in content.strip().split("\n"):
536
+ if line.startswith("Score:"):
537
+ try:
538
+ score = float(line.split(":", 1)[1].strip())
539
+ except ValueError:
540
+ score = 0.0
541
+ break
542
+
543
+ return score
544
+
545
+ def _calculate_cluster_coherence(
546
+ self, constraints: List[Constraint]
547
+ ) -> float:
548
+ """Calculate coherence score for a constraint cluster."""
549
+ if len(constraints) < 2:
550
+ return 0.0
551
+
552
+ # Calculate based on relationship strengths
553
+ total_strength = 0.0
554
+ pair_count = 0
555
+
556
+ for c1, c2 in itertools.combinations(constraints, 2):
557
+ key = (c1.id, c2.id)
558
+ if key in self.relationships:
559
+ total_strength += self.relationships[key].strength
560
+ pair_count += 1
561
+
562
+ if pair_count == 0:
563
+ return 0.5 # Default coherence
564
+
565
+ average_strength = total_strength / pair_count
566
+
567
+ # Adjust for cluster size (larger clusters with high average strength are better)
568
+ size_factor = min(len(constraints) / 5.0, 1.0)
569
+
570
+ return average_strength * (0.7 + 0.3 * size_factor)
571
+
572
+ def _deduplicate_clusters(
573
+ self, clusters: List[ConstraintCluster]
574
+ ) -> List[ConstraintCluster]:
575
+ """Remove duplicate clusters."""
576
+ unique_clusters = []
577
+ seen_sets = []
578
+
579
+ for cluster in clusters:
580
+ constraint_set = {c.id for c in cluster.constraints}
581
+
582
+ # Check if we've seen this set
583
+ is_duplicate = False
584
+ for seen_set in seen_sets:
585
+ if constraint_set == seen_set:
586
+ is_duplicate = True
587
+ break
588
+
589
+ if not is_duplicate:
590
+ unique_clusters.append(cluster)
591
+ seen_sets.append(constraint_set)
592
+
593
+ return unique_clusters
594
+
595
+ def _format_constraints_for_clustering(
596
+ self, constraints: List[Constraint]
597
+ ) -> str:
598
+ """Format constraints for clustering prompt."""
599
+ formatted = []
600
+ for c in constraints:
601
+ formatted.append(
602
+ f"{c.id}: {c.description} (Type: {c.type.value}, Weight: {c.weight})"
603
+ )
604
+ return "\n".join(formatted)
605
+
606
+ def _format_constraints_for_query(
607
+ self, constraints: List[Constraint]
608
+ ) -> str:
609
+ """Format constraints for query generation."""
610
+ formatted = []
611
+ for c in constraints:
612
+ formatted.append(f"- {c.description} [{c.type.value}]")
613
+ return "\n".join(formatted)
614
+
615
+ def optimize_search_order(
616
+ self, clusters: List[ConstraintCluster]
617
+ ) -> List[ConstraintCluster]:
618
+ """Optimize the order in which clusters should be searched."""
619
+ # Sort by coherence and cluster size
620
+ return sorted(
621
+ clusters,
622
+ key=lambda c: (c.coherence_score * len(c.constraints)),
623
+ reverse=True,
624
+ )