local-deep-research 0.4.4__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. local_deep_research/__init__.py +7 -0
  2. local_deep_research/__version__.py +1 -1
  3. local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
  4. local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
  5. local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
  6. local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
  7. local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
  8. local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
  9. local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
  10. local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
  11. local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
  12. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
  13. local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
  14. local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
  15. local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
  16. local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
  17. local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
  18. local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
  19. local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
  20. local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
  21. local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
  22. local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
  23. local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
  24. local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
  25. local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
  26. local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
  27. local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
  28. local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
  29. local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
  30. local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
  31. local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
  32. local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
  33. local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
  34. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
  35. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
  36. local_deep_research/advanced_search_system/findings/repository.py +54 -17
  37. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
  38. local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
  39. local_deep_research/advanced_search_system/questions/__init__.py +16 -0
  40. local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
  41. local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
  42. local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
  43. local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
  44. local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
  45. local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
  46. local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
  47. local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
  48. local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
  49. local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
  50. local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
  51. local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
  52. local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
  53. local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
  54. local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
  55. local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
  56. local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
  57. local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
  58. local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
  59. local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
  60. local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
  61. local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
  62. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
  63. local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
  64. local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
  65. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
  66. local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
  67. local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
  68. local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
  69. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
  70. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
  71. local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
  72. local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
  73. local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
  74. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
  75. local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
  76. local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
  77. local_deep_research/api/benchmark_functions.py +6 -2
  78. local_deep_research/api/research_functions.py +10 -4
  79. local_deep_research/benchmarks/__init__.py +9 -7
  80. local_deep_research/benchmarks/benchmark_functions.py +6 -2
  81. local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
  82. local_deep_research/benchmarks/cli.py +38 -13
  83. local_deep_research/benchmarks/comparison/__init__.py +4 -2
  84. local_deep_research/benchmarks/comparison/evaluator.py +316 -239
  85. local_deep_research/benchmarks/datasets/__init__.py +1 -1
  86. local_deep_research/benchmarks/datasets/base.py +91 -72
  87. local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
  88. local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
  89. local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
  90. local_deep_research/benchmarks/datasets/utils.py +48 -29
  91. local_deep_research/benchmarks/datasets.py +4 -11
  92. local_deep_research/benchmarks/efficiency/__init__.py +8 -4
  93. local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
  94. local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
  95. local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
  96. local_deep_research/benchmarks/evaluators/composite.py +6 -2
  97. local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
  98. local_deep_research/benchmarks/graders.py +32 -10
  99. local_deep_research/benchmarks/metrics/README.md +1 -1
  100. local_deep_research/benchmarks/metrics/calculation.py +25 -10
  101. local_deep_research/benchmarks/metrics/reporting.py +7 -3
  102. local_deep_research/benchmarks/metrics/visualization.py +42 -23
  103. local_deep_research/benchmarks/metrics.py +1 -1
  104. local_deep_research/benchmarks/optimization/__init__.py +3 -1
  105. local_deep_research/benchmarks/optimization/api.py +7 -1
  106. local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
  107. local_deep_research/benchmarks/runners.py +48 -15
  108. local_deep_research/citation_handler.py +65 -92
  109. local_deep_research/citation_handlers/__init__.py +15 -0
  110. local_deep_research/citation_handlers/base_citation_handler.py +70 -0
  111. local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
  112. local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
  113. local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
  114. local_deep_research/config/llm_config.py +271 -169
  115. local_deep_research/config/search_config.py +14 -5
  116. local_deep_research/defaults/__init__.py +0 -1
  117. local_deep_research/metrics/__init__.py +13 -0
  118. local_deep_research/metrics/database.py +58 -0
  119. local_deep_research/metrics/db_models.py +115 -0
  120. local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
  121. local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
  122. local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
  123. local_deep_research/metrics/migrate_research_ratings.py +31 -0
  124. local_deep_research/metrics/models.py +61 -0
  125. local_deep_research/metrics/pricing/__init__.py +12 -0
  126. local_deep_research/metrics/pricing/cost_calculator.py +237 -0
  127. local_deep_research/metrics/pricing/pricing_cache.py +143 -0
  128. local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
  129. local_deep_research/metrics/query_utils.py +51 -0
  130. local_deep_research/metrics/search_tracker.py +380 -0
  131. local_deep_research/metrics/token_counter.py +1078 -0
  132. local_deep_research/migrate_db.py +3 -1
  133. local_deep_research/report_generator.py +22 -8
  134. local_deep_research/search_system.py +390 -9
  135. local_deep_research/test_migration.py +15 -5
  136. local_deep_research/utilities/db_utils.py +7 -4
  137. local_deep_research/utilities/es_utils.py +115 -104
  138. local_deep_research/utilities/llm_utils.py +15 -5
  139. local_deep_research/utilities/log_utils.py +151 -0
  140. local_deep_research/utilities/search_cache.py +387 -0
  141. local_deep_research/utilities/search_utilities.py +14 -6
  142. local_deep_research/utilities/threading_utils.py +92 -0
  143. local_deep_research/utilities/url_utils.py +6 -0
  144. local_deep_research/web/api.py +347 -0
  145. local_deep_research/web/app.py +13 -17
  146. local_deep_research/web/app_factory.py +71 -66
  147. local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
  148. local_deep_research/web/database/migrations.py +20 -3
  149. local_deep_research/web/database/models.py +74 -25
  150. local_deep_research/web/database/schema_upgrade.py +49 -29
  151. local_deep_research/web/models/database.py +63 -83
  152. local_deep_research/web/routes/api_routes.py +56 -22
  153. local_deep_research/web/routes/benchmark_routes.py +4 -1
  154. local_deep_research/web/routes/globals.py +22 -0
  155. local_deep_research/web/routes/history_routes.py +71 -46
  156. local_deep_research/web/routes/metrics_routes.py +1155 -0
  157. local_deep_research/web/routes/research_routes.py +192 -54
  158. local_deep_research/web/routes/settings_routes.py +156 -55
  159. local_deep_research/web/services/research_service.py +412 -251
  160. local_deep_research/web/services/resource_service.py +36 -11
  161. local_deep_research/web/services/settings_manager.py +55 -17
  162. local_deep_research/web/services/settings_service.py +12 -4
  163. local_deep_research/web/services/socket_service.py +295 -188
  164. local_deep_research/web/static/css/custom_dropdown.css +180 -0
  165. local_deep_research/web/static/css/styles.css +39 -1
  166. local_deep_research/web/static/js/components/detail.js +633 -267
  167. local_deep_research/web/static/js/components/details.js +751 -0
  168. local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
  169. local_deep_research/web/static/js/components/fallback/ui.js +23 -23
  170. local_deep_research/web/static/js/components/history.js +76 -76
  171. local_deep_research/web/static/js/components/logpanel.js +61 -13
  172. local_deep_research/web/static/js/components/progress.js +13 -2
  173. local_deep_research/web/static/js/components/research.js +99 -12
  174. local_deep_research/web/static/js/components/results.js +239 -106
  175. local_deep_research/web/static/js/main.js +40 -40
  176. local_deep_research/web/static/js/services/audio.js +1 -1
  177. local_deep_research/web/static/js/services/formatting.js +11 -11
  178. local_deep_research/web/static/js/services/keyboard.js +157 -0
  179. local_deep_research/web/static/js/services/pdf.js +80 -80
  180. local_deep_research/web/static/sounds/README.md +1 -1
  181. local_deep_research/web/templates/base.html +1 -0
  182. local_deep_research/web/templates/components/log_panel.html +7 -1
  183. local_deep_research/web/templates/components/mobile_nav.html +1 -1
  184. local_deep_research/web/templates/components/sidebar.html +3 -0
  185. local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
  186. local_deep_research/web/templates/pages/details.html +325 -24
  187. local_deep_research/web/templates/pages/history.html +1 -1
  188. local_deep_research/web/templates/pages/metrics.html +1929 -0
  189. local_deep_research/web/templates/pages/progress.html +2 -2
  190. local_deep_research/web/templates/pages/research.html +53 -17
  191. local_deep_research/web/templates/pages/results.html +12 -1
  192. local_deep_research/web/templates/pages/star_reviews.html +803 -0
  193. local_deep_research/web/utils/formatters.py +9 -3
  194. local_deep_research/web_search_engines/default_search_engines.py +5 -3
  195. local_deep_research/web_search_engines/engines/full_search.py +8 -2
  196. local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
  197. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
  198. local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
  199. local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
  200. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
  201. local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
  202. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
  203. local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
  204. local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
  205. local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
  206. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
  207. local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
  208. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
  209. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
  210. local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
  211. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
  212. local_deep_research/web_search_engines/search_engine_base.py +83 -35
  213. local_deep_research/web_search_engines/search_engine_factory.py +25 -8
  214. local_deep_research/web_search_engines/search_engines_config.py +9 -3
  215. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/METADATA +7 -1
  216. local_deep_research-0.5.2.dist-info/RECORD +265 -0
  217. local_deep_research-0.4.4.dist-info/RECORD +0 -177
  218. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/WHEEL +0 -0
  219. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/entry_points.txt +0 -0
  220. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,405 @@
1
+ """
2
+ Adaptive query generation system for improved search performance.
3
+ """
4
+
5
+ from collections import defaultdict
6
+ from dataclasses import dataclass
7
+ from typing import Dict, List, Optional, Set, Tuple
8
+
9
+ from langchain_core.language_models import BaseChatModel
10
+
11
+ from ...utilities.search_utilities import remove_think_tags
12
+ from ..constraints.base_constraint import Constraint, ConstraintType
13
+
14
+
15
+ @dataclass
16
+ class QueryPattern:
17
+ """Represents a successful query pattern."""
18
+
19
+ template: str
20
+ constraint_types: List[ConstraintType]
21
+ success_rate: float
22
+ example_queries: List[str]
23
+ discovered_entities: Set[str]
24
+
25
+
26
+ class AdaptiveQueryGenerator:
27
+ """
28
+ Generates search queries that adapt based on past performance.
29
+
30
+ Features:
31
+ 1. Pattern learning from successful queries
32
+ 2. Semantic expansion for broader coverage
33
+ 3. Constraint combination optimization
34
+ 4. Failure recovery strategies
35
+ """
36
+
37
+ def __init__(self, model: BaseChatModel):
38
+ """Initialize the adaptive query generator."""
39
+ self.model = model
40
+ self.successful_patterns: List[QueryPattern] = []
41
+ self.failed_queries: Set[str] = set()
42
+ self.semantic_expansions: Dict[str, List[str]] = {}
43
+ self.constraint_combinations: Dict[
44
+ Tuple[ConstraintType, ...], float
45
+ ] = defaultdict(float)
46
+
47
+ # Initialize default patterns
48
+ self._initialize_default_patterns()
49
+
50
+ def _initialize_default_patterns(self):
51
+ """Initialize with proven query patterns."""
52
+ default_patterns = [
53
+ QueryPattern(
54
+ template='"{entity}" {property} {location}',
55
+ constraint_types=[
56
+ ConstraintType.NAME_PATTERN,
57
+ ConstraintType.PROPERTY,
58
+ ConstraintType.LOCATION,
59
+ ],
60
+ success_rate=0.7,
61
+ example_queries=['"mountain" formed ice age Colorado'],
62
+ discovered_entities=set(),
63
+ ),
64
+ QueryPattern(
65
+ template="{event} {temporal} {statistic}",
66
+ constraint_types=[
67
+ ConstraintType.EVENT,
68
+ ConstraintType.TEMPORAL,
69
+ ConstraintType.STATISTIC,
70
+ ],
71
+ success_rate=0.6,
72
+ example_queries=["accident 2000-2021 statistics"],
73
+ discovered_entities=set(),
74
+ ),
75
+ QueryPattern(
76
+ template='"{name_pattern}" AND {comparison} {value}',
77
+ constraint_types=[
78
+ ConstraintType.NAME_PATTERN,
79
+ ConstraintType.COMPARISON,
80
+ ],
81
+ success_rate=0.65,
82
+ example_queries=['"body part" AND "84.5 times" ratio'],
83
+ discovered_entities=set(),
84
+ ),
85
+ ]
86
+ self.successful_patterns.extend(default_patterns)
87
+
88
+ def generate_query(
89
+ self, constraints: List[Constraint], context: Optional[Dict] = None
90
+ ) -> str:
91
+ """Generate an adaptive query based on constraints and context."""
92
+ # Try pattern-based generation first
93
+ pattern_query = self._generate_from_patterns(constraints)
94
+ if pattern_query and pattern_query not in self.failed_queries:
95
+ return pattern_query
96
+
97
+ # Try semantic expansion
98
+ expanded_query = self._generate_with_expansion(constraints)
99
+ if expanded_query and expanded_query not in self.failed_queries:
100
+ return expanded_query
101
+
102
+ # Fall back to LLM-based generation
103
+ return self._generate_with_llm(constraints, context)
104
+
105
+ def _generate_from_patterns(
106
+ self, constraints: List[Constraint]
107
+ ) -> Optional[str]:
108
+ """Generate query using learned patterns."""
109
+ constraint_types = [c.type for c in constraints]
110
+
111
+ # Find matching patterns
112
+ matching_patterns = []
113
+ for pattern in self.successful_patterns:
114
+ if all(t in constraint_types for t in pattern.constraint_types):
115
+ matching_patterns.append(pattern)
116
+
117
+ if not matching_patterns:
118
+ return None
119
+
120
+ # Use the highest success rate pattern
121
+ best_pattern = max(matching_patterns, key=lambda p: p.success_rate)
122
+
123
+ # Fill in the template
124
+ template_vars = {}
125
+ for constraint in constraints:
126
+ if constraint.type == ConstraintType.NAME_PATTERN:
127
+ template_vars["name_pattern"] = constraint.value
128
+ template_vars["entity"] = constraint.value
129
+ elif constraint.type == ConstraintType.PROPERTY:
130
+ template_vars["property"] = constraint.value
131
+ elif constraint.type == ConstraintType.LOCATION:
132
+ template_vars["location"] = constraint.value
133
+ elif constraint.type == ConstraintType.EVENT:
134
+ template_vars["event"] = constraint.value
135
+ elif constraint.type == ConstraintType.TEMPORAL:
136
+ template_vars["temporal"] = constraint.value
137
+ elif constraint.type == ConstraintType.STATISTIC:
138
+ template_vars["statistic"] = constraint.value
139
+ elif constraint.type == ConstraintType.COMPARISON:
140
+ template_vars["comparison"] = f'"{constraint.value}"'
141
+ template_vars["value"] = constraint.value
142
+
143
+ try:
144
+ query = best_pattern.template.format(**template_vars)
145
+ return query
146
+ except KeyError:
147
+ return None
148
+
149
+ def _generate_with_expansion(
150
+ self, constraints: List[Constraint]
151
+ ) -> Optional[str]:
152
+ """Generate query with semantic expansion."""
153
+ expanded_terms = []
154
+
155
+ for constraint in constraints:
156
+ # Get expansions for this value
157
+ if constraint.value not in self.semantic_expansions:
158
+ self.semantic_expansions[constraint.value] = (
159
+ self._get_semantic_expansions(
160
+ constraint.value, constraint.type
161
+ )
162
+ )
163
+
164
+ expansions = self.semantic_expansions[constraint.value]
165
+ if expansions:
166
+ # Use OR to include expansions
167
+ expanded = (
168
+ f"({constraint.value} OR {' OR '.join(expansions[:2])})"
169
+ )
170
+ expanded_terms.append(expanded)
171
+ else:
172
+ expanded_terms.append(f'"{constraint.value}"')
173
+
174
+ return " AND ".join(expanded_terms)
175
+
176
+ def _get_semantic_expansions(
177
+ self, term: str, constraint_type: ConstraintType
178
+ ) -> List[str]:
179
+ """Get semantic expansions for a term."""
180
+ prompt = f"""
181
+ Generate 3 alternative phrases or related terms for "{term}" in the context of {constraint_type.value}.
182
+
183
+ These should be:
184
+ 1. Synonyms or near-synonyms
185
+ 2. Related concepts
186
+ 3. Alternative phrasings
187
+
188
+ Return only the terms, one per line.
189
+ """
190
+
191
+ response = self.model.invoke(prompt)
192
+ expansions = [
193
+ line.strip()
194
+ for line in remove_think_tags(response.content).strip().split("\n")
195
+ if line.strip()
196
+ ]
197
+
198
+ return [f'"{exp}"' for exp in expansions[:3]]
199
+
200
+ def _generate_with_llm(
201
+ self, constraints: List[Constraint], context: Optional[Dict] = None
202
+ ) -> str:
203
+ """Generate query using LLM with context awareness."""
204
+ constraint_desc = self._format_constraints(constraints)
205
+
206
+ context_info = ""
207
+ if context:
208
+ if "failed_queries" in context:
209
+ context_info += "\nFailed queries to avoid:\n" + "\n".join(
210
+ context["failed_queries"][:3]
211
+ )
212
+ if "successful_queries" in context:
213
+ context_info += "\nSuccessful query patterns:\n" + "\n".join(
214
+ context["successful_queries"][:3]
215
+ )
216
+
217
+ prompt = f"""
218
+ Create an effective search query for these constraints:
219
+
220
+ {constraint_desc}
221
+ {context_info}
222
+
223
+ Guidelines:
224
+ 1. Focus on finding specific named entities
225
+ 2. Use operators (AND, OR, quotes) effectively
226
+ 3. Combine constraints strategically
227
+ 4. Make the query neither too broad nor too narrow
228
+
229
+ Return only the search query.
230
+ """
231
+
232
+ response = self.model.invoke(prompt)
233
+ return remove_think_tags(response.content).strip()
234
+
235
+ def update_patterns(
236
+ self,
237
+ query: str,
238
+ constraints: List[Constraint],
239
+ success: bool,
240
+ entities_found: List[str],
241
+ ):
242
+ """Update patterns based on query performance."""
243
+ if success and entities_found:
244
+ # Extract pattern from successful query
245
+ pattern = self._extract_pattern(query, constraints)
246
+ if pattern:
247
+ # Update or add pattern
248
+ existing = next(
249
+ (
250
+ p
251
+ for p in self.successful_patterns
252
+ if p.template == pattern.template
253
+ ),
254
+ None,
255
+ )
256
+
257
+ if existing:
258
+ existing.success_rate = (existing.success_rate + 1.0) / 2
259
+ existing.example_queries.append(query)
260
+ existing.discovered_entities.update(entities_found)
261
+ else:
262
+ self.successful_patterns.append(pattern)
263
+
264
+ # Update constraint combinations
265
+ constraint_types = tuple(sorted(c.type for c in constraints))
266
+ self.constraint_combinations[constraint_types] += 1
267
+ else:
268
+ self.failed_queries.add(query)
269
+
270
+ def _extract_pattern(
271
+ self, query: str, constraints: List[Constraint]
272
+ ) -> Optional[QueryPattern]:
273
+ """Extract a reusable pattern from a successful query."""
274
+ # Simple pattern extraction - could be made more sophisticated
275
+ pattern = query
276
+
277
+ # Replace specific values with placeholders
278
+ for constraint in constraints:
279
+ if constraint.value in query:
280
+ placeholder = f"{{{constraint.type.value}}}"
281
+ pattern = pattern.replace(constraint.value, placeholder)
282
+
283
+ # Only create pattern if it has placeholders
284
+ if "{" in pattern:
285
+ return QueryPattern(
286
+ template=pattern,
287
+ constraint_types=[c.type for c in constraints],
288
+ success_rate=1.0,
289
+ example_queries=[query],
290
+ discovered_entities=set(),
291
+ )
292
+
293
+ return None
294
+
295
+ def _format_constraints(self, constraints: List[Constraint]) -> str:
296
+ """Format constraints for prompts."""
297
+ formatted = []
298
+ for c in constraints:
299
+ formatted.append(
300
+ f"- {c.type.value}: {c.description} [value: {c.value}]"
301
+ )
302
+ return "\n".join(formatted)
303
+
304
+ def generate_fallback_queries(
305
+ self, original_query: str, constraints: List[Constraint]
306
+ ) -> List[str]:
307
+ """Generate fallback queries when the original fails."""
308
+ fallback_queries = []
309
+
310
+ # 1. Simplified query (fewer constraints)
311
+ if len(constraints) > 2:
312
+ priority_constraints = sorted(
313
+ constraints, key=lambda c: c.weight, reverse=True
314
+ )[:2]
315
+ simplified = self.generate_query(priority_constraints)
316
+ fallback_queries.append(simplified)
317
+
318
+ # 2. Broadened query (with OR instead of AND)
319
+ terms = [f'"{c.value}"' for c in constraints]
320
+ broadened = " OR ".join(terms)
321
+ fallback_queries.append(broadened)
322
+
323
+ # 3. Decomposed queries (one constraint at a time)
324
+ for constraint in constraints[:3]:
325
+ single_query = self._generate_single_constraint_query(constraint)
326
+ fallback_queries.append(single_query)
327
+
328
+ # 4. Alternative phrasing
329
+ alt_prompt = f"""
330
+ The query "{original_query}" failed. Create 2 alternative queries with different phrasing.
331
+
332
+ Constraints to satisfy:
333
+ {self._format_constraints(constraints)}
334
+
335
+ Return only the queries, one per line.
336
+ """
337
+
338
+ response = self.model.invoke(alt_prompt)
339
+ alt_queries = [
340
+ line.strip()
341
+ for line in remove_think_tags(response.content).strip().split("\n")
342
+ if line.strip()
343
+ ]
344
+ fallback_queries.extend(alt_queries)
345
+
346
+ # Remove duplicates and failed queries
347
+ unique_fallbacks = []
348
+ for q in fallback_queries:
349
+ if q and q not in self.failed_queries and q not in unique_fallbacks:
350
+ unique_fallbacks.append(q)
351
+
352
+ return unique_fallbacks[:5]
353
+
354
+ def _generate_single_constraint_query(self, constraint: Constraint) -> str:
355
+ """Generate a query for a single constraint."""
356
+ type_specific_templates = {
357
+ ConstraintType.NAME_PATTERN: '"{value}" names list',
358
+ ConstraintType.LOCATION: '"{value}" places locations',
359
+ ConstraintType.EVENT: '"{value}" incidents accidents',
360
+ ConstraintType.PROPERTY: 'things with "{value}" property',
361
+ ConstraintType.STATISTIC: '"{value}" statistics data',
362
+ ConstraintType.TEMPORAL: "events in {value}",
363
+ ConstraintType.COMPARISON: '"{value}" comparison ratio',
364
+ ConstraintType.EXISTENCE: 'has "{value}" feature',
365
+ }
366
+
367
+ template = type_specific_templates.get(constraint.type, '"{value}"')
368
+
369
+ return template.format(value=constraint.value)
370
+
371
+ def optimize_constraint_combinations(
372
+ self, constraints: List[Constraint]
373
+ ) -> List[List[Constraint]]:
374
+ """Optimize constraint combinations based on past success."""
375
+ combinations = []
376
+
377
+ # Sort constraint combinations by success rate
378
+ sorted_combos = sorted(
379
+ self.constraint_combinations.items(),
380
+ key=lambda x: x[1],
381
+ reverse=True,
382
+ )
383
+
384
+ # Try successful combinations first
385
+ for combo_types, _ in sorted_combos:
386
+ matching_constraints = []
387
+ for ctype in combo_types:
388
+ matching = [c for c in constraints if c.type == ctype]
389
+ if matching:
390
+ matching_constraints.append(matching[0])
391
+
392
+ if len(matching_constraints) == len(combo_types):
393
+ combinations.append(matching_constraints)
394
+
395
+ # Add individual constraints
396
+ combinations.extend([[c] for c in constraints])
397
+
398
+ # Add pairs not yet tried
399
+ for i in range(len(constraints)):
400
+ for j in range(i + 1, len(constraints)):
401
+ pair = [constraints[i], constraints[j]]
402
+ if pair not in combinations:
403
+ combinations.append(pair)
404
+
405
+ return combinations[:10] # Limit to top 10
@@ -1 +1,17 @@
1
1
  # Search System Questions Package
2
+
3
+ from .atomic_fact_question import AtomicFactQuestionGenerator
4
+ from .base_question import BaseQuestionGenerator
5
+ from .browsecomp_question import BrowseCompQuestionGenerator
6
+ from .decomposition_question import DecompositionQuestionGenerator
7
+ from .entity_aware_question import EntityAwareQuestionGenerator
8
+ from .standard_question import StandardQuestionGenerator
9
+
10
+ __all__ = [
11
+ "BaseQuestionGenerator",
12
+ "StandardQuestionGenerator",
13
+ "DecompositionQuestionGenerator",
14
+ "AtomicFactQuestionGenerator",
15
+ "EntityAwareQuestionGenerator",
16
+ "BrowseCompQuestionGenerator",
17
+ ]
@@ -0,0 +1,171 @@
1
+ """
2
+ Atomic fact question generator for complex queries.
3
+ Decomposes complex queries into atomic, independently searchable facts.
4
+ """
5
+
6
+ import logging
7
+ from typing import Dict, List
8
+
9
+ from .base_question import BaseQuestionGenerator
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class AtomicFactQuestionGenerator(BaseQuestionGenerator):
15
+ """
16
+ Generates questions by decomposing complex queries into atomic facts.
17
+
18
+ This approach prevents the system from searching for documents that match
19
+ ALL criteria at once, instead finding facts independently and then reasoning
20
+ about connections.
21
+ """
22
+
23
+ def generate_questions(
24
+ self,
25
+ current_knowledge: str,
26
+ query: str,
27
+ questions_per_iteration: int = 5,
28
+ questions_by_iteration: Dict[int, List[str]] = None,
29
+ ) -> List[str]:
30
+ """
31
+ Generate atomic fact questions from a complex query.
32
+
33
+ Args:
34
+ current_knowledge: The accumulated knowledge so far
35
+ query: The original research query
36
+ questions_per_iteration: Number of questions to generate
37
+ questions_by_iteration: Questions generated in previous iterations
38
+
39
+ Returns:
40
+ List of atomic fact questions
41
+ """
42
+ questions_by_iteration = questions_by_iteration or {}
43
+
44
+ # On first iteration, decompose the query
45
+ if not questions_by_iteration:
46
+ return self._decompose_to_atomic_facts(query)
47
+
48
+ # On subsequent iterations, fill knowledge gaps or explore connections
49
+ return self._generate_gap_filling_questions(
50
+ query,
51
+ current_knowledge,
52
+ questions_by_iteration,
53
+ questions_per_iteration,
54
+ )
55
+
56
+ def _decompose_to_atomic_facts(self, query: str) -> List[str]:
57
+ """Decompose complex query into atomic, searchable facts."""
58
+ prompt = f"""Decompose this complex query into simple, atomic facts that can be searched independently.
59
+
60
+ Query: {query}
61
+
62
+ Break this down into individual facts that can be searched separately. Each fact should:
63
+ 1. Be about ONE thing only
64
+ 2. Be searchable on its own
65
+ 3. Not depend on other facts
66
+ 4. Use general terms (e.g., "body parts" not specific ones)
67
+
68
+ For example, if the query is about a location with multiple criteria, create separate questions for:
69
+ - The geographical/geological aspect
70
+ - The naming aspect
71
+ - The historical events
72
+ - The statistical comparisons
73
+
74
+ Return ONLY the questions, one per line.
75
+ Example format:
76
+ What locations were formed by glaciers?
77
+ What geographic features are named after body parts?
78
+ Where did falls occur between specific dates?
79
+ """
80
+
81
+ response = self.model.invoke(prompt)
82
+
83
+ # Extract response text
84
+ response_text = ""
85
+ if hasattr(response, "content"):
86
+ response_text = response.content
87
+ else:
88
+ response_text = str(response)
89
+
90
+ # Parse questions
91
+ questions = []
92
+ for line in response_text.strip().split("\n"):
93
+ line = line.strip()
94
+ if line and not line.startswith("#") and len(line) > 10:
95
+ # Clean up any numbering or bullets
96
+ for prefix in ["1.", "2.", "3.", "4.", "5.", "-", "*", "•"]:
97
+ if line.startswith(prefix):
98
+ line = line[len(prefix) :].strip()
99
+ questions.append(line)
100
+
101
+ logger.info(f"Decomposed query into {len(questions)} atomic facts")
102
+ return questions[:5] # Limit to 5 atomic facts
103
+
104
+ def _generate_gap_filling_questions(
105
+ self,
106
+ original_query: str,
107
+ current_knowledge: str,
108
+ questions_by_iteration: Dict[int, List[str]],
109
+ questions_per_iteration: int,
110
+ ) -> List[str]:
111
+ """Generate questions to fill knowledge gaps or make connections."""
112
+
113
+ # Check if we have enough information to start reasoning
114
+ if len(questions_by_iteration) >= 3:
115
+ prompt = f"""Based on the accumulated knowledge, generate questions that help connect the facts or fill remaining gaps.
116
+
117
+ Original Query: {original_query}
118
+
119
+ Current Knowledge:
120
+ {current_knowledge}
121
+
122
+ Previous Questions:
123
+ {self._format_previous_questions(questions_by_iteration)}
124
+
125
+ Generate {questions_per_iteration} questions that:
126
+ 1. Connect different facts you've found
127
+ 2. Fill specific gaps in knowledge
128
+ 3. Search for locations that match multiple criteria
129
+ 4. Verify specific details
130
+
131
+ Return ONLY the questions, one per line.
132
+ """
133
+ else:
134
+ # Still gathering basic facts
135
+ prompt = f"""Continue gathering atomic facts for this query.
136
+
137
+ Original Query: {original_query}
138
+
139
+ Previous Questions:
140
+ {self._format_previous_questions(questions_by_iteration)}
141
+
142
+ Current Knowledge:
143
+ {current_knowledge}
144
+
145
+ Generate {questions_per_iteration} more atomic fact questions that help build a complete picture.
146
+ Focus on facts not yet explored.
147
+
148
+ Return ONLY the questions, one per line.
149
+ """
150
+
151
+ response = self.model.invoke(prompt)
152
+
153
+ # Extract response text
154
+ response_text = ""
155
+ if hasattr(response, "content"):
156
+ response_text = response.content
157
+ else:
158
+ response_text = str(response)
159
+
160
+ # Parse questions
161
+ questions = []
162
+ for line in response_text.strip().split("\n"):
163
+ line = line.strip()
164
+ if line and not line.startswith("#") and len(line) > 10:
165
+ # Clean up any numbering or bullets
166
+ for prefix in ["1.", "2.", "3.", "4.", "5.", "-", "*", "•"]:
167
+ if line.startswith(prefix):
168
+ line = line[len(prefix) :].strip()
169
+ questions.append(line)
170
+
171
+ return questions[:questions_per_iteration]