local-deep-research 0.4.4__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. local_deep_research/__init__.py +7 -0
  2. local_deep_research/__version__.py +1 -1
  3. local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
  4. local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
  5. local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
  6. local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
  7. local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
  8. local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
  9. local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
  10. local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
  11. local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
  12. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
  13. local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
  14. local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
  15. local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
  16. local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
  17. local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
  18. local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
  19. local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
  20. local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
  21. local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
  22. local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
  23. local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
  24. local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
  25. local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
  26. local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
  27. local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
  28. local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
  29. local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
  30. local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
  31. local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
  32. local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
  33. local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
  34. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
  35. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
  36. local_deep_research/advanced_search_system/findings/repository.py +54 -17
  37. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
  38. local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
  39. local_deep_research/advanced_search_system/questions/__init__.py +16 -0
  40. local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
  41. local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
  42. local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
  43. local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
  44. local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
  45. local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
  46. local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
  47. local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
  48. local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
  49. local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
  50. local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
  51. local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
  52. local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
  53. local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
  54. local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
  55. local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
  56. local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
  57. local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
  58. local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
  59. local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
  60. local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
  61. local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
  62. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
  63. local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
  64. local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
  65. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
  66. local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
  67. local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
  68. local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
  69. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
  70. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
  71. local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
  72. local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
  73. local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
  74. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
  75. local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
  76. local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
  77. local_deep_research/api/benchmark_functions.py +6 -2
  78. local_deep_research/api/research_functions.py +10 -4
  79. local_deep_research/benchmarks/__init__.py +9 -7
  80. local_deep_research/benchmarks/benchmark_functions.py +6 -2
  81. local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
  82. local_deep_research/benchmarks/cli.py +38 -13
  83. local_deep_research/benchmarks/comparison/__init__.py +4 -2
  84. local_deep_research/benchmarks/comparison/evaluator.py +316 -239
  85. local_deep_research/benchmarks/datasets/__init__.py +1 -1
  86. local_deep_research/benchmarks/datasets/base.py +91 -72
  87. local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
  88. local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
  89. local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
  90. local_deep_research/benchmarks/datasets/utils.py +48 -29
  91. local_deep_research/benchmarks/datasets.py +4 -11
  92. local_deep_research/benchmarks/efficiency/__init__.py +8 -4
  93. local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
  94. local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
  95. local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
  96. local_deep_research/benchmarks/evaluators/composite.py +6 -2
  97. local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
  98. local_deep_research/benchmarks/graders.py +32 -10
  99. local_deep_research/benchmarks/metrics/README.md +1 -1
  100. local_deep_research/benchmarks/metrics/calculation.py +25 -10
  101. local_deep_research/benchmarks/metrics/reporting.py +7 -3
  102. local_deep_research/benchmarks/metrics/visualization.py +42 -23
  103. local_deep_research/benchmarks/metrics.py +1 -1
  104. local_deep_research/benchmarks/optimization/__init__.py +3 -1
  105. local_deep_research/benchmarks/optimization/api.py +7 -1
  106. local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
  107. local_deep_research/benchmarks/runners.py +48 -15
  108. local_deep_research/citation_handler.py +65 -92
  109. local_deep_research/citation_handlers/__init__.py +15 -0
  110. local_deep_research/citation_handlers/base_citation_handler.py +70 -0
  111. local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
  112. local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
  113. local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
  114. local_deep_research/config/llm_config.py +271 -169
  115. local_deep_research/config/search_config.py +14 -5
  116. local_deep_research/defaults/__init__.py +0 -1
  117. local_deep_research/metrics/__init__.py +13 -0
  118. local_deep_research/metrics/database.py +58 -0
  119. local_deep_research/metrics/db_models.py +115 -0
  120. local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
  121. local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
  122. local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
  123. local_deep_research/metrics/migrate_research_ratings.py +31 -0
  124. local_deep_research/metrics/models.py +61 -0
  125. local_deep_research/metrics/pricing/__init__.py +12 -0
  126. local_deep_research/metrics/pricing/cost_calculator.py +237 -0
  127. local_deep_research/metrics/pricing/pricing_cache.py +143 -0
  128. local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
  129. local_deep_research/metrics/query_utils.py +51 -0
  130. local_deep_research/metrics/search_tracker.py +380 -0
  131. local_deep_research/metrics/token_counter.py +1078 -0
  132. local_deep_research/migrate_db.py +3 -1
  133. local_deep_research/report_generator.py +22 -8
  134. local_deep_research/search_system.py +390 -9
  135. local_deep_research/test_migration.py +15 -5
  136. local_deep_research/utilities/db_utils.py +7 -4
  137. local_deep_research/utilities/es_utils.py +115 -104
  138. local_deep_research/utilities/llm_utils.py +15 -5
  139. local_deep_research/utilities/log_utils.py +151 -0
  140. local_deep_research/utilities/search_cache.py +387 -0
  141. local_deep_research/utilities/search_utilities.py +14 -6
  142. local_deep_research/utilities/threading_utils.py +92 -0
  143. local_deep_research/utilities/url_utils.py +6 -0
  144. local_deep_research/web/api.py +347 -0
  145. local_deep_research/web/app.py +13 -17
  146. local_deep_research/web/app_factory.py +71 -66
  147. local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
  148. local_deep_research/web/database/migrations.py +20 -3
  149. local_deep_research/web/database/models.py +74 -25
  150. local_deep_research/web/database/schema_upgrade.py +49 -29
  151. local_deep_research/web/models/database.py +63 -83
  152. local_deep_research/web/routes/api_routes.py +56 -22
  153. local_deep_research/web/routes/benchmark_routes.py +4 -1
  154. local_deep_research/web/routes/globals.py +22 -0
  155. local_deep_research/web/routes/history_routes.py +71 -46
  156. local_deep_research/web/routes/metrics_routes.py +1155 -0
  157. local_deep_research/web/routes/research_routes.py +192 -54
  158. local_deep_research/web/routes/settings_routes.py +156 -55
  159. local_deep_research/web/services/research_service.py +412 -251
  160. local_deep_research/web/services/resource_service.py +36 -11
  161. local_deep_research/web/services/settings_manager.py +55 -17
  162. local_deep_research/web/services/settings_service.py +12 -4
  163. local_deep_research/web/services/socket_service.py +295 -188
  164. local_deep_research/web/static/css/custom_dropdown.css +180 -0
  165. local_deep_research/web/static/css/styles.css +39 -1
  166. local_deep_research/web/static/js/components/detail.js +633 -267
  167. local_deep_research/web/static/js/components/details.js +751 -0
  168. local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
  169. local_deep_research/web/static/js/components/fallback/ui.js +23 -23
  170. local_deep_research/web/static/js/components/history.js +76 -76
  171. local_deep_research/web/static/js/components/logpanel.js +61 -13
  172. local_deep_research/web/static/js/components/progress.js +13 -2
  173. local_deep_research/web/static/js/components/research.js +99 -12
  174. local_deep_research/web/static/js/components/results.js +239 -106
  175. local_deep_research/web/static/js/main.js +40 -40
  176. local_deep_research/web/static/js/services/audio.js +1 -1
  177. local_deep_research/web/static/js/services/formatting.js +11 -11
  178. local_deep_research/web/static/js/services/keyboard.js +157 -0
  179. local_deep_research/web/static/js/services/pdf.js +80 -80
  180. local_deep_research/web/static/sounds/README.md +1 -1
  181. local_deep_research/web/templates/base.html +1 -0
  182. local_deep_research/web/templates/components/log_panel.html +7 -1
  183. local_deep_research/web/templates/components/mobile_nav.html +1 -1
  184. local_deep_research/web/templates/components/sidebar.html +3 -0
  185. local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
  186. local_deep_research/web/templates/pages/details.html +325 -24
  187. local_deep_research/web/templates/pages/history.html +1 -1
  188. local_deep_research/web/templates/pages/metrics.html +1929 -0
  189. local_deep_research/web/templates/pages/progress.html +2 -2
  190. local_deep_research/web/templates/pages/research.html +53 -17
  191. local_deep_research/web/templates/pages/results.html +12 -1
  192. local_deep_research/web/templates/pages/star_reviews.html +803 -0
  193. local_deep_research/web/utils/formatters.py +9 -3
  194. local_deep_research/web_search_engines/default_search_engines.py +5 -3
  195. local_deep_research/web_search_engines/engines/full_search.py +8 -2
  196. local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
  197. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
  198. local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
  199. local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
  200. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
  201. local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
  202. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
  203. local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
  204. local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
  205. local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
  206. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
  207. local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
  208. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
  209. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
  210. local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
  211. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
  212. local_deep_research/web_search_engines/search_engine_base.py +83 -35
  213. local_deep_research/web_search_engines/search_engine_factory.py +25 -8
  214. local_deep_research/web_search_engines/search_engines_config.py +9 -3
  215. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/METADATA +7 -1
  216. local_deep_research-0.5.2.dist-info/RECORD +265 -0
  217. local_deep_research-0.4.4.dist-info/RECORD +0 -177
  218. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/WHEEL +0 -0
  219. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/entry_points.txt +0 -0
  220. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,865 @@
1
+ """
2
+ LLM-Driven Modular Strategy with intelligent constraint processing and early rejection.
3
+ """
4
+
5
+ import asyncio
6
+ import json
7
+ from dataclasses import dataclass
8
+ from typing import Dict, List, Optional, Tuple
9
+
10
+ from loguru import logger
11
+
12
+ from ..candidate_exploration import AdaptiveExplorer
13
+ from ..constraint_checking import DualConfidenceChecker
14
+ from ..constraints import ConstraintAnalyzer
15
+ from ..questions import StandardQuestionGenerator
16
+ from .base_strategy import BaseSearchStrategy
17
+
18
+
19
+ @dataclass
20
+ class CandidateConfidence:
21
+ """Track candidate confidence levels for early rejection"""
22
+
23
+ candidate: object
24
+ positive_confidence: float
25
+ negative_confidence: float
26
+ rejection_reason: Optional[str] = None
27
+ should_continue: bool = True
28
+
29
+
30
+ class LLMConstraintProcessor:
31
+ """LLM-driven intelligent constraint processing"""
32
+
33
+ def __init__(self, model):
34
+ self.model = model
35
+
36
+ async def decompose_constraints_intelligently(self, constraints):
37
+ """Let LLM intelligently break down constraints into searchable elements"""
38
+ constraint_text = "\n".join([f"- {c.description}" for c in constraints])
39
+
40
+ prompt = f"""
41
+ I have these constraints from a search query:
42
+ {constraint_text}
43
+
44
+ Please intelligently decompose these constraints into atomic, searchable elements that can be combined in different ways.
45
+
46
+ For each constraint, provide:
47
+ 1. **Atomic elements** - Break it into smallest meaningful parts
48
+ 2. **Variations** - Different ways to express the same concept
49
+ 3. **Granular specifics** - Specific values, years, numbers, etc.
50
+
51
+ Example for "TV show aired between 1960s and 1980s":
52
+ - Atomic elements: ["TV show", "television", "series", "program"]
53
+ - Time variations: ["1960s", "1970s", "1980s", "60s", "70s", "80s"]
54
+ - Granular years: ["1960", "1961", "1962", "1963", "1964", "1965", "1966", "1967", "1968", "1969", "1970", "1971", "1972", "1973", "1974", "1975", "1976", "1977", "1978", "1979", "1980", "1981", "1982", "1983", "1984", "1985", "1986", "1987", "1988", "1989"]
55
+
56
+ Example for "fewer than 50 episodes":
57
+ - Atomic elements: ["episodes", "installments", "parts"]
58
+ - Quantity variations: ["under 50", "less than 50", "limited run", "short series"]
59
+ - Granular numbers: ["13 episodes", "26 episodes", "39 episodes", "single season"]
60
+
61
+ Return as valid JSON format:
62
+ {{
63
+ "constraint_1": {{
64
+ "atomic_elements": [...],
65
+ "variations": [...],
66
+ "granular_specifics": [...]
67
+ }},
68
+ "constraint_2": {{
69
+ "atomic_elements": [...],
70
+ "variations": [...],
71
+ "granular_specifics": [...]
72
+ }}
73
+ }}
74
+ """
75
+
76
+ response = await self.model.ainvoke(prompt)
77
+ return self._parse_decomposition(response.content)
78
+
79
+ async def generate_intelligent_combinations(self, decomposed_constraints):
80
+ """LLM generates smart combinations of atomic elements"""
81
+
82
+ # Flatten all elements for the LLM to see
83
+ all_elements = {}
84
+ for constraint_id, elements in decomposed_constraints.items():
85
+ all_elements[constraint_id] = elements
86
+
87
+ prompt = f"""
88
+ I have decomposed constraints into these atomic elements:
89
+ {json.dumps(all_elements, indent=2)}
90
+
91
+ Now intelligently combine these elements to create targeted search queries. Be creative and systematic:
92
+
93
+ 1. **Year-by-year combinations**: Take specific years and combine with other specifics
94
+ Example: "1960 TV show 13 episodes", "1961 television 26 episodes", etc.
95
+
96
+ 2. **Cross-constraint combinations**: Mix elements from different constraints
97
+ Example: "humor ascetic 1970s", "fourth wall short series vintage"
98
+
99
+ 3. **Granular progression**: Create systematic progressions
100
+ Example: "1960 comedy", "1961 comedy", "1962 comedy"...
101
+
102
+ 4. **Semantic variations**: Same meaning, different words
103
+ Example: "brief TV run 1970s" vs "short television series seventies"
104
+
105
+ 5. **Contextual combinations**: Add implied context
106
+ Example: "monk-trained character 1978 television"
107
+
108
+ Generate 60-80 diverse search combinations that would maximize finding the target.
109
+ Focus on being comprehensive yet targeted.
110
+
111
+ Return as a valid JSON list of search queries:
112
+ ["query1", "query2", "query3"]
113
+ """
114
+
115
+ response = await self.model.ainvoke(prompt)
116
+ return self._parse_combinations(response.content)
117
+
118
+ async def generate_creative_search_angles(
119
+ self, original_query, decomposed_constraints
120
+ ):
121
+ """LLM generates completely creative search approaches"""
122
+
123
+ prompt = f"""
124
+ Original query: "{original_query}"
125
+
126
+ Now think like a detective - what are ALL the different ways someone might search for this character?
127
+ Be extremely creative and think outside the box:
128
+
129
+ 1. **Character name guessing**: What names might this character have?
130
+ 2. **Show title guessing**: What might the TV show be called?
131
+ 3. **Cultural context**: What was happening in those decades?
132
+ 4. **Genre searches**: What genre/category would this fit?
133
+ 5. **Indirect searches**: What related topics might lead to this?
134
+ 6. **Reverse searches**: Start from known similar characters
135
+ 7. **Archetype searches**: What type of character is this?
136
+ 8. **Creator/studio searches**: Who might have made this?
137
+
138
+ Generate 30-40 creative search angles that approach this from completely different directions.
139
+
140
+ Examples of creative thinking:
141
+ - "1970s cartoon characters who talk to camera"
142
+ - "superhero trained by monks television"
143
+ - "vintage comedy shows cancelled after one season"
144
+ - "fourth wall breaking animation 70s"
145
+ - "spiritual mentor origin story TV characters"
146
+ - "Plastic Man TV show episodes"
147
+ - "elastic superhero television series"
148
+
149
+ Return as valid JSON list of creative searches:
150
+ ["creative_query1", "creative_query2"]
151
+ """
152
+
153
+ response = await self.model.ainvoke(prompt)
154
+ return self._parse_creative_searches(response.content)
155
+
156
+ async def optimize_search_combinations(self, all_combinations):
157
+ """LLM optimizes the search list for maximum effectiveness"""
158
+
159
+ prompt = f"""
160
+ I have generated {len(all_combinations)} search combinations. Here are the first 20:
161
+ {json.dumps(all_combinations[:20], indent=2)}
162
+
163
+ Please optimize this search strategy by organizing searches by priority and effectiveness:
164
+
165
+ 1. **Remove redundant searches** that are too similar
166
+ 2. **Prioritize high-value searches** likely to find results
167
+ 3. **Balance specificity vs breadth**
168
+ 4. **Add missing search angles** you notice
169
+ 5. **Organize by search strategy type**
170
+
171
+ Return optimized searches organized by category as valid JSON:
172
+ {{
173
+ "high_priority": ["most likely to succeed - top 15 searches"],
174
+ "systematic_granular": ["year-by-year, episode-by-episode combinations - 20 searches"],
175
+ "creative_angles": ["outside-the-box approaches - 15 searches"],
176
+ "contextual_searches": ["time period + cultural context - 15 searches"],
177
+ "fallback_broad": ["broader searches if specifics fail - 10 searches"]
178
+ }}
179
+ """
180
+
181
+ response = await self.model.ainvoke(prompt)
182
+ return self._parse_optimized_searches(response.content)
183
+
184
+ def _parse_decomposition(self, content):
185
+ """Parse LLM decomposition response"""
186
+ try:
187
+ # Extract JSON from the response
188
+ start = content.find("{")
189
+ end = content.rfind("}") + 1
190
+ if start != -1 and end != -1:
191
+ json_str = content[start:end]
192
+ return json.loads(json_str)
193
+ except Exception as e:
194
+ logger.error(f"Failed to parse decomposition: {e}")
195
+
196
+ # Fallback to simple structure
197
+ return {
198
+ "time_constraint": {
199
+ "atomic_elements": ["TV show", "television", "series"],
200
+ "variations": ["1960s", "1970s", "1980s"],
201
+ "granular_specifics": [str(year) for year in range(1960, 1990)],
202
+ }
203
+ }
204
+
205
+ def _parse_combinations(self, content):
206
+ """Parse LLM combinations response"""
207
+ try:
208
+ start = content.find("[")
209
+ end = content.rfind("]") + 1
210
+ if start != -1 and end != -1:
211
+ json_str = content[start:end]
212
+ return json.loads(json_str)
213
+ except Exception as e:
214
+ logger.error(f"Failed to parse combinations: {e}")
215
+
216
+ # Fallback
217
+ return [
218
+ "fictional character humor",
219
+ "TV show 1970s",
220
+ "fourth wall breaking",
221
+ ]
222
+
223
+ def _parse_creative_searches(self, content):
224
+ """Parse LLM creative searches response"""
225
+ try:
226
+ start = content.find("[")
227
+ end = content.rfind("]") + 1
228
+ if start != -1 and end != -1:
229
+ json_str = content[start:end]
230
+ return json.loads(json_str)
231
+ except Exception as e:
232
+ logger.error(f"Failed to parse creative searches: {e}")
233
+
234
+ # Fallback
235
+ return [
236
+ "vintage cartoon character",
237
+ "superhero TV show 1970s",
238
+ "comedy series short run",
239
+ ]
240
+
241
+ def _parse_optimized_searches(self, content):
242
+ """Parse LLM optimized searches response"""
243
+ try:
244
+ start = content.find("{")
245
+ end = content.rfind("}") + 1
246
+ if start != -1 and end != -1:
247
+ json_str = content[start:end]
248
+ return json.loads(json_str)
249
+ except Exception as e:
250
+ logger.error(f"Failed to parse optimized searches: {e}")
251
+
252
+ # Fallback
253
+ return {
254
+ "high_priority": [
255
+ "fictional character fourth wall humor",
256
+ "1970s TV show limited episodes",
257
+ ],
258
+ "systematic_granular": [
259
+ "1970 TV show",
260
+ "1971 TV show",
261
+ "1972 TV show",
262
+ ],
263
+ "creative_angles": [
264
+ "superhero comedy television",
265
+ "cartoon character talks to audience",
266
+ ],
267
+ "contextual_searches": [
268
+ "vintage TV comedy",
269
+ "classic television humor",
270
+ ],
271
+ "fallback_broad": ["fictional character", "TV show character"],
272
+ }
273
+
274
+
275
+ class EarlyRejectionManager:
276
+ """Manages early rejection and confidence tracking"""
277
+
278
+ def __init__(self, model, positive_threshold=0.6, negative_threshold=0.3):
279
+ self.model = model
280
+ self.positive_threshold = positive_threshold
281
+ self.negative_threshold = negative_threshold
282
+ self.rejected_candidates = set()
283
+
284
+ async def quick_confidence_check(self, candidate, constraints):
285
+ """Quick confidence assessment for early rejection"""
286
+
287
+ prompt = f"""
288
+ Quickly assess if this candidate matches the search criteria:
289
+
290
+ Candidate: {candidate.name}
291
+ Available info: {getattr(candidate, "metadata", {})}
292
+
293
+ Constraints to match:
294
+ {[c.description for c in constraints]}
295
+
296
+ Provide:
297
+ 1. **Positive confidence** (0.0-1.0): How likely this candidate matches
298
+ 2. **Negative confidence** (0.0-1.0): How likely this candidate does NOT match
299
+ 3. **Quick reasoning**: Brief explanation
300
+
301
+ Return as JSON:
302
+ {{
303
+ "positive_confidence": 0.X,
304
+ "negative_confidence": 0.X,
305
+ "reasoning": "brief explanation"
306
+ }}
307
+ """
308
+
309
+ try:
310
+ response = await self.model.ainvoke(prompt)
311
+ return self._parse_confidence(response.content)
312
+ except Exception as e:
313
+ logger.error(f"Quick confidence check failed: {e}")
314
+ return {
315
+ "positive_confidence": 0.5,
316
+ "negative_confidence": 0.3,
317
+ "reasoning": "fallback",
318
+ }
319
+
320
+ def should_reject_early(self, confidence_result):
321
+ """Determine if candidate should be rejected early"""
322
+ positive = confidence_result.get("positive_confidence", 0.5)
323
+ negative = confidence_result.get("negative_confidence", 0.3)
324
+
325
+ # Reject if high negative confidence or very low positive confidence
326
+ if negative > 0.7 or positive < 0.1:
327
+ return (
328
+ True,
329
+ f"High negative confidence ({negative:.2f}) or low positive ({positive:.2f})",
330
+ )
331
+
332
+ return False, None
333
+
334
+ def should_continue_search(self, all_candidates, high_confidence_count):
335
+ """Determine if we should continue searching"""
336
+ # Stop if we have enough high-confidence candidates
337
+ if high_confidence_count >= 5:
338
+ return False, "Found sufficient high-confidence candidates"
339
+
340
+ # Stop if we have many candidates but low quality
341
+ if len(all_candidates) > 50 and high_confidence_count == 0:
342
+ return False, "Too many low-quality candidates"
343
+
344
+ return True, None
345
+
346
+ def _parse_confidence(self, content):
347
+ """Parse confidence assessment"""
348
+ try:
349
+ start = content.find("{")
350
+ end = content.rfind("}") + 1
351
+ if start != -1 and end != -1:
352
+ json_str = content[start:end]
353
+ return json.loads(json_str)
354
+ except Exception as e:
355
+ logger.error(f"Failed to parse confidence: {e}")
356
+
357
+ return {
358
+ "positive_confidence": 0.5,
359
+ "negative_confidence": 0.3,
360
+ "reasoning": "parse_error",
361
+ }
362
+
363
+
364
+ class LLMDrivenModularStrategy(BaseSearchStrategy):
365
+ """
366
+ LLM-driven modular strategy with intelligent constraint processing and early rejection.
367
+ """
368
+
369
+ def __init__(
370
+ self,
371
+ model,
372
+ search,
373
+ all_links_of_system=None,
374
+ constraint_checker_type: str = "dual_confidence",
375
+ exploration_strategy: str = "adaptive",
376
+ early_rejection: bool = True,
377
+ **kwargs,
378
+ ):
379
+ super().__init__(all_links_of_system=all_links_of_system)
380
+
381
+ self.model = model
382
+ self.search_engine = search
383
+ self.search_engines = getattr(search, "search_engines", [])
384
+
385
+ # Initialize components
386
+ self.constraint_analyzer = ConstraintAnalyzer(self.model)
387
+ self.llm_processor = LLMConstraintProcessor(self.model)
388
+ self.early_rejection_manager = (
389
+ EarlyRejectionManager(self.model) if early_rejection else None
390
+ )
391
+
392
+ # Initialize constraint checker
393
+ self.constraint_checker = DualConfidenceChecker(
394
+ model=self.model,
395
+ evidence_gatherer=self._gather_evidence_for_constraint,
396
+ negative_threshold=0.25,
397
+ positive_threshold=0.4,
398
+ uncertainty_penalty=0.2,
399
+ negative_weight=2.0,
400
+ )
401
+
402
+ # Initialize candidate explorer
403
+ self.candidate_explorer = AdaptiveExplorer(
404
+ search_engine=self.search_engine,
405
+ model=self.model,
406
+ learning_rate=0.1,
407
+ max_search_time=45.0, # Reduced since we have more searches
408
+ max_candidates=30, # Increased since we filter early
409
+ )
410
+
411
+ # Initialize question generator
412
+ self.question_generator = StandardQuestionGenerator(model=self.model)
413
+
414
+ # Strategy configuration
415
+ self.constraint_checker_type = constraint_checker_type
416
+ self.exploration_strategy = exploration_strategy
417
+ self.early_rejection = early_rejection
418
+
419
+ logger.info(
420
+ f"Initialized LLMDrivenModularStrategy with {constraint_checker_type} checker, "
421
+ f"{exploration_strategy} explorer, early_rejection={early_rejection}"
422
+ )
423
+
424
+ def analyze_topic(self, query: str) -> Dict:
425
+ """Main entry point - sync wrapper for async search"""
426
+ try:
427
+ import asyncio
428
+
429
+ # Create a new event loop if none exists or if the current loop is running
430
+ try:
431
+ loop = asyncio.get_event_loop()
432
+ if loop.is_running():
433
+ # If we're already in an async context, run in a new thread
434
+ import concurrent.futures
435
+
436
+ with concurrent.futures.ThreadPoolExecutor() as executor:
437
+ future = executor.submit(
438
+ lambda: asyncio.run(self.search(query))
439
+ )
440
+ answer, metadata = future.result()
441
+ else:
442
+ # If not in async context, run directly
443
+ answer, metadata = loop.run_until_complete(
444
+ self.search(query)
445
+ )
446
+ except RuntimeError:
447
+ # No event loop, create one
448
+ answer, metadata = asyncio.run(self.search(query))
449
+
450
+ return {
451
+ "findings": [{"content": answer}],
452
+ "iterations": 1,
453
+ "final_answer": answer,
454
+ "metadata": metadata,
455
+ "links": getattr(self, "all_links_of_system", []),
456
+ "questions_by_iteration": getattr(
457
+ self, "questions_by_iteration", []
458
+ ),
459
+ }
460
+
461
+ except Exception as e:
462
+ logger.error(f"Error in analyze_topic: {e}")
463
+ import traceback
464
+
465
+ logger.error(f"Traceback: {traceback.format_exc()}")
466
+ return {
467
+ "findings": [],
468
+ "iterations": 0,
469
+ "final_answer": f"Analysis failed: {str(e)}",
470
+ "metadata": {"error": str(e)},
471
+ "links": [],
472
+ "questions_by_iteration": [],
473
+ }
474
+
475
+ async def search(
476
+ self,
477
+ query: str,
478
+ search_engines: List[str] = None,
479
+ progress_callback=None,
480
+ **kwargs,
481
+ ) -> Tuple[str, Dict]:
482
+ """Execute the LLM-driven modular search strategy"""
483
+ try:
484
+ logger.info(f"Starting LLM-driven modular search for: {query}")
485
+
486
+ # Phase 1: Extract base constraints
487
+ if progress_callback:
488
+ progress_callback(
489
+ {
490
+ "phase": "constraint_analysis",
491
+ "progress": 5,
492
+ "message": "Analyzing query constraints",
493
+ }
494
+ )
495
+
496
+ base_constraints = self.constraint_analyzer.extract_constraints(
497
+ query
498
+ )
499
+ logger.info(f"Extracted {len(base_constraints)} base constraints")
500
+
501
+ # Phase 2: LLM intelligent decomposition
502
+ if progress_callback:
503
+ progress_callback(
504
+ {
505
+ "phase": "llm_decomposition",
506
+ "progress": 15,
507
+ "message": "LLM decomposing constraints intelligently",
508
+ }
509
+ )
510
+
511
+ decomposed = (
512
+ await self.llm_processor.decompose_constraints_intelligently(
513
+ base_constraints
514
+ )
515
+ )
516
+ logger.info(
517
+ f"LLM decomposed constraints into {len(decomposed)} groups"
518
+ )
519
+
520
+ # Phase 3: LLM intelligent combinations
521
+ if progress_callback:
522
+ progress_callback(
523
+ {
524
+ "phase": "llm_combinations",
525
+ "progress": 25,
526
+ "message": "LLM generating intelligent search combinations",
527
+ }
528
+ )
529
+
530
+ intelligent_combinations = (
531
+ await self.llm_processor.generate_intelligent_combinations(
532
+ decomposed
533
+ )
534
+ )
535
+ logger.info(
536
+ f"LLM generated {len(intelligent_combinations)} intelligent combinations"
537
+ )
538
+
539
+ # Phase 4: LLM creative search angles
540
+ if progress_callback:
541
+ progress_callback(
542
+ {
543
+ "phase": "llm_creative",
544
+ "progress": 35,
545
+ "message": "LLM generating creative search angles",
546
+ }
547
+ )
548
+
549
+ creative_searches = (
550
+ await self.llm_processor.generate_creative_search_angles(
551
+ query, decomposed
552
+ )
553
+ )
554
+ logger.info(
555
+ f"LLM generated {len(creative_searches)} creative searches"
556
+ )
557
+
558
+ # Phase 5: LLM optimization
559
+ if progress_callback:
560
+ progress_callback(
561
+ {
562
+ "phase": "llm_optimization",
563
+ "progress": 45,
564
+ "message": "LLM optimizing search strategy",
565
+ }
566
+ )
567
+
568
+ all_searches = intelligent_combinations + creative_searches
569
+ optimized_searches = (
570
+ await self.llm_processor.optimize_search_combinations(
571
+ all_searches
572
+ )
573
+ )
574
+ total_searches = sum(
575
+ len(searches) for searches in optimized_searches.values()
576
+ )
577
+ logger.info(
578
+ f"LLM optimized to {total_searches} total searches across categories"
579
+ )
580
+
581
+ # Phase 6: Execute searches by priority with early rejection
582
+ all_candidates = []
583
+ high_confidence_count = 0
584
+ search_progress = 50
585
+
586
+ for category, searches in optimized_searches.items():
587
+ if not searches:
588
+ continue
589
+
590
+ logger.info(
591
+ f"Executing {category} searches: {len(searches)} queries"
592
+ )
593
+
594
+ if progress_callback:
595
+ progress_callback(
596
+ {
597
+ "phase": f"search_{category}",
598
+ "progress": search_progress,
599
+ "message": f"Searching with {category} strategy",
600
+ }
601
+ )
602
+
603
+ # Execute in parallel batches
604
+ batch_size = 3 if category == "high_priority" else 5
605
+ category_candidates = []
606
+
607
+ for i in range(0, len(searches), batch_size):
608
+ batch = searches[i : i + batch_size]
609
+
610
+ # Execute batch searches in parallel
611
+ batch_tasks = []
612
+ for search_query in batch:
613
+ task = self.candidate_explorer._execute_search(
614
+ search_query
615
+ )
616
+ batch_tasks.append(task)
617
+
618
+ # Wait for batch completion
619
+ batch_results = await asyncio.gather(
620
+ *batch_tasks, return_exceptions=True
621
+ )
622
+
623
+ # Process batch results
624
+ for j, result in enumerate(batch_results):
625
+ if isinstance(result, Exception):
626
+ logger.error(
627
+ f"Search failed: {batch[j]} - {result}"
628
+ )
629
+ continue
630
+
631
+ candidates = self.candidate_explorer._extract_candidates_from_results(
632
+ result, entity_type="fictional character"
633
+ )
634
+
635
+ # Early rejection if enabled
636
+ if self.early_rejection_manager:
637
+ for candidate in candidates:
638
+ confidence = await self.early_rejection_manager.quick_confidence_check(
639
+ candidate, base_constraints
640
+ )
641
+
642
+ should_reject, reason = (
643
+ self.early_rejection_manager.should_reject_early(
644
+ confidence
645
+ )
646
+ )
647
+ if should_reject:
648
+ logger.debug(
649
+ f"Early rejected {candidate.name}: {reason}"
650
+ )
651
+ continue
652
+
653
+ if (
654
+ confidence.get("positive_confidence", 0)
655
+ > 0.6
656
+ ):
657
+ high_confidence_count += 1
658
+
659
+ category_candidates.append(candidate)
660
+ else:
661
+ category_candidates.extend(candidates)
662
+
663
+ logger.info(
664
+ f"{category} batch {i // batch_size + 1}: found {len(category_candidates)} candidates"
665
+ )
666
+
667
+ # Early stopping check
668
+ if self.early_rejection_manager:
669
+ should_continue, stop_reason = (
670
+ self.early_rejection_manager.should_continue_search(
671
+ all_candidates + category_candidates,
672
+ high_confidence_count,
673
+ )
674
+ )
675
+ if not should_continue:
676
+ logger.info(f"Early stopping: {stop_reason}")
677
+ break
678
+
679
+ all_candidates.extend(category_candidates)
680
+ search_progress += 8 # Distribute remaining progress
681
+
682
+ # Stop if we have enough high-confidence candidates
683
+ if high_confidence_count >= 5:
684
+ logger.info(
685
+ "Found sufficient high-confidence candidates, stopping search"
686
+ )
687
+ break
688
+
689
+ logger.info(
690
+ f"Search completed: {len(all_candidates)} total candidates, {high_confidence_count} high-confidence"
691
+ )
692
+
693
+ # Phase 7: Constraint checking on remaining candidates
694
+ if progress_callback:
695
+ progress_callback(
696
+ {
697
+ "phase": "constraint_evaluation",
698
+ "progress": 85,
699
+ "message": f"Evaluating {len(all_candidates)} candidates",
700
+ }
701
+ )
702
+
703
+ if not all_candidates:
704
+ return "No valid candidates found", {
705
+ "strategy": "llm_driven_modular",
706
+ "total_searches": total_searches,
707
+ "candidates_found": 0,
708
+ "high_confidence_count": 0,
709
+ }
710
+
711
+ # Evaluate top candidates (limit to avoid long processing)
712
+ candidates_to_evaluate = all_candidates[:20] # Top 20 candidates
713
+ evaluated_candidates = []
714
+
715
+ for i, candidate in enumerate(candidates_to_evaluate):
716
+ try:
717
+ result = self.constraint_checker.check_candidate(
718
+ candidate, base_constraints
719
+ )
720
+
721
+ candidate.evaluation_results = result.detailed_results
722
+ candidate.score = result.total_score
723
+ candidate.should_reject = result.should_reject
724
+
725
+ if not result.should_reject:
726
+ evaluated_candidates.append(candidate)
727
+
728
+ except Exception as e:
729
+ logger.error(
730
+ f"Error evaluating candidate {candidate.name}: {e}"
731
+ )
732
+ continue
733
+
734
+ if not evaluated_candidates:
735
+ return "No valid candidates passed constraint evaluation", {
736
+ "strategy": "llm_driven_modular",
737
+ "total_searches": total_searches,
738
+ "candidates_found": len(all_candidates),
739
+ "candidates_evaluated": len(candidates_to_evaluate),
740
+ "high_confidence_count": high_confidence_count,
741
+ }
742
+
743
+ # Select best candidate
744
+ evaluated_candidates.sort(key=lambda x: x.score, reverse=True)
745
+ best_candidate = evaluated_candidates[0]
746
+
747
+ logger.info(
748
+ f"Best candidate: {best_candidate.name} with score {best_candidate.score:.2%}"
749
+ )
750
+
751
+ # Generate final answer
752
+ if progress_callback:
753
+ progress_callback(
754
+ {
755
+ "phase": "final_answer",
756
+ "progress": 95,
757
+ "message": "Generating final answer",
758
+ }
759
+ )
760
+
761
+ answer = await self._generate_final_answer(
762
+ query, best_candidate, base_constraints
763
+ )
764
+
765
+ metadata = {
766
+ "strategy": "llm_driven_modular",
767
+ "constraint_checker": self.constraint_checker_type,
768
+ "exploration_strategy": self.exploration_strategy,
769
+ "early_rejection_enabled": self.early_rejection,
770
+ "total_searches_generated": total_searches,
771
+ "candidates_found": len(all_candidates),
772
+ "candidates_evaluated": len(candidates_to_evaluate),
773
+ "candidates_valid": len(evaluated_candidates),
774
+ "high_confidence_count": high_confidence_count,
775
+ "best_candidate": best_candidate.name,
776
+ "best_score": best_candidate.score,
777
+ }
778
+
779
+ return answer, metadata
780
+
781
+ except Exception as e:
782
+ logger.error(f"Error in LLM-driven search: {e}")
783
+ import traceback
784
+
785
+ logger.error(f"Traceback: {traceback.format_exc()}")
786
+ return f"Search failed: {str(e)}", {"error": str(e)}
787
+
788
+ async def _generate_final_answer(self, query, best_candidate, constraints):
789
+ """Generate comprehensive final answer"""
790
+ constraint_info = "\n".join([f"- {c.description}" for c in constraints])
791
+
792
+ evaluation_info = ""
793
+ if hasattr(best_candidate, "evaluation_results"):
794
+ evaluation_info = "\n".join(
795
+ [
796
+ f"- {result.get('constraint', 'Unknown')}: {result.get('score', 0):.0%}"
797
+ for result in best_candidate.evaluation_results
798
+ ]
799
+ )
800
+
801
+ prompt = f"""Based on the search results, provide a comprehensive answer to: {query}
802
+
803
+ Best candidate found: {best_candidate.name}
804
+ Score: {best_candidate.score:.0%}
805
+
806
+ Constraints analyzed:
807
+ {constraint_info}
808
+
809
+ Constraint evaluation results:
810
+ {evaluation_info}
811
+
812
+ Evidence summary: {getattr(best_candidate, "summary", "No summary available")}
813
+
814
+ Provide a clear, factual answer that addresses the original question and explains how the candidate satisfies the constraints."""
815
+
816
+ response = await self.model.ainvoke(prompt)
817
+ return response.content
818
+
819
+ def _gather_evidence_for_constraint(self, candidate, constraint):
820
+ """Gather evidence for a constraint using actual search"""
821
+ try:
822
+ # Create a focused search query
823
+ query = f"{candidate.name} {constraint.description}"
824
+
825
+ # Use the search engine properly
826
+ if hasattr(self.search_engine, "run"):
827
+ results = self.search_engine.run(query)
828
+ else:
829
+ logger.warning("Search engine doesn't have run method")
830
+ return []
831
+
832
+ # Handle different result formats
833
+ if isinstance(results, list):
834
+ result_list = results[:3] # Top 3 results
835
+ elif isinstance(results, dict):
836
+ result_list = results.get("results", [])[:3] # Top 3 results
837
+ else:
838
+ logger.warning(f"Unknown search result format: {type(results)}")
839
+ return []
840
+
841
+ # Extract evidence from search results
842
+ evidence = []
843
+ for result in result_list:
844
+ evidence.append(
845
+ {
846
+ "text": result.get("snippet", "")
847
+ or result.get("content", ""),
848
+ "source": result.get("url", "search_result"),
849
+ "confidence": 0.7,
850
+ "title": result.get("title", ""),
851
+ }
852
+ )
853
+
854
+ return evidence
855
+
856
+ except Exception as e:
857
+ logger.error(f"Error gathering evidence: {e}")
858
+ # Fallback to mock evidence
859
+ return [
860
+ {
861
+ "text": f"Evidence about {candidate.name} regarding {constraint.description}",
862
+ "source": "mock_result",
863
+ "confidence": 0.5,
864
+ }
865
+ ]