local-deep-research 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. local_deep_research/__init__.py +7 -0
  2. local_deep_research/__version__.py +1 -1
  3. local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
  4. local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
  5. local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
  6. local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
  7. local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
  8. local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
  9. local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
  10. local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
  11. local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
  12. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
  13. local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
  14. local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
  15. local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
  16. local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
  17. local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
  18. local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
  19. local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
  20. local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
  21. local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
  22. local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
  23. local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
  24. local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
  25. local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
  26. local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
  27. local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
  28. local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
  29. local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
  30. local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
  31. local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
  32. local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
  33. local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
  34. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
  35. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
  36. local_deep_research/advanced_search_system/findings/repository.py +54 -17
  37. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
  38. local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
  39. local_deep_research/advanced_search_system/questions/__init__.py +16 -0
  40. local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
  41. local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
  42. local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
  43. local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
  44. local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
  45. local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
  46. local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
  47. local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
  48. local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
  49. local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
  50. local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
  51. local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
  52. local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
  53. local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
  54. local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
  55. local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
  56. local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
  57. local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
  58. local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
  59. local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
  60. local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
  61. local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
  62. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
  63. local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
  64. local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
  65. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
  66. local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
  67. local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
  68. local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
  69. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
  70. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
  71. local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
  72. local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
  73. local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
  74. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
  75. local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
  76. local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
  77. local_deep_research/api/benchmark_functions.py +6 -2
  78. local_deep_research/api/research_functions.py +10 -4
  79. local_deep_research/benchmarks/__init__.py +9 -7
  80. local_deep_research/benchmarks/benchmark_functions.py +6 -2
  81. local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
  82. local_deep_research/benchmarks/cli.py +38 -13
  83. local_deep_research/benchmarks/comparison/__init__.py +4 -2
  84. local_deep_research/benchmarks/comparison/evaluator.py +316 -239
  85. local_deep_research/benchmarks/datasets/__init__.py +1 -1
  86. local_deep_research/benchmarks/datasets/base.py +91 -72
  87. local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
  88. local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
  89. local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
  90. local_deep_research/benchmarks/datasets/utils.py +48 -29
  91. local_deep_research/benchmarks/datasets.py +4 -11
  92. local_deep_research/benchmarks/efficiency/__init__.py +8 -4
  93. local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
  94. local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
  95. local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
  96. local_deep_research/benchmarks/evaluators/composite.py +6 -2
  97. local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
  98. local_deep_research/benchmarks/graders.py +32 -10
  99. local_deep_research/benchmarks/metrics/README.md +1 -1
  100. local_deep_research/benchmarks/metrics/calculation.py +25 -10
  101. local_deep_research/benchmarks/metrics/reporting.py +7 -3
  102. local_deep_research/benchmarks/metrics/visualization.py +42 -23
  103. local_deep_research/benchmarks/metrics.py +1 -1
  104. local_deep_research/benchmarks/optimization/__init__.py +3 -1
  105. local_deep_research/benchmarks/optimization/api.py +7 -1
  106. local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
  107. local_deep_research/benchmarks/runners.py +48 -15
  108. local_deep_research/citation_handler.py +65 -92
  109. local_deep_research/citation_handlers/__init__.py +15 -0
  110. local_deep_research/citation_handlers/base_citation_handler.py +70 -0
  111. local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
  112. local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
  113. local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
  114. local_deep_research/config/llm_config.py +271 -169
  115. local_deep_research/config/search_config.py +14 -5
  116. local_deep_research/defaults/__init__.py +0 -1
  117. local_deep_research/metrics/__init__.py +13 -0
  118. local_deep_research/metrics/database.py +58 -0
  119. local_deep_research/metrics/db_models.py +115 -0
  120. local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
  121. local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
  122. local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
  123. local_deep_research/metrics/migrate_research_ratings.py +31 -0
  124. local_deep_research/metrics/models.py +61 -0
  125. local_deep_research/metrics/pricing/__init__.py +12 -0
  126. local_deep_research/metrics/pricing/cost_calculator.py +237 -0
  127. local_deep_research/metrics/pricing/pricing_cache.py +143 -0
  128. local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
  129. local_deep_research/metrics/query_utils.py +51 -0
  130. local_deep_research/metrics/search_tracker.py +380 -0
  131. local_deep_research/metrics/token_counter.py +1078 -0
  132. local_deep_research/migrate_db.py +3 -1
  133. local_deep_research/report_generator.py +22 -8
  134. local_deep_research/search_system.py +390 -9
  135. local_deep_research/test_migration.py +15 -5
  136. local_deep_research/utilities/db_utils.py +7 -4
  137. local_deep_research/utilities/es_utils.py +115 -104
  138. local_deep_research/utilities/llm_utils.py +15 -5
  139. local_deep_research/utilities/log_utils.py +151 -0
  140. local_deep_research/utilities/search_cache.py +387 -0
  141. local_deep_research/utilities/search_utilities.py +14 -6
  142. local_deep_research/utilities/threading_utils.py +92 -0
  143. local_deep_research/utilities/url_utils.py +6 -0
  144. local_deep_research/web/api.py +347 -0
  145. local_deep_research/web/app.py +13 -17
  146. local_deep_research/web/app_factory.py +71 -66
  147. local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
  148. local_deep_research/web/database/migrations.py +5 -3
  149. local_deep_research/web/database/models.py +51 -2
  150. local_deep_research/web/database/schema_upgrade.py +49 -29
  151. local_deep_research/web/models/database.py +51 -61
  152. local_deep_research/web/routes/api_routes.py +56 -22
  153. local_deep_research/web/routes/benchmark_routes.py +4 -1
  154. local_deep_research/web/routes/globals.py +22 -0
  155. local_deep_research/web/routes/history_routes.py +71 -46
  156. local_deep_research/web/routes/metrics_routes.py +1155 -0
  157. local_deep_research/web/routes/research_routes.py +227 -41
  158. local_deep_research/web/routes/settings_routes.py +156 -55
  159. local_deep_research/web/services/research_service.py +310 -103
  160. local_deep_research/web/services/resource_service.py +36 -11
  161. local_deep_research/web/services/settings_manager.py +55 -17
  162. local_deep_research/web/services/settings_service.py +12 -4
  163. local_deep_research/web/services/socket_service.py +295 -188
  164. local_deep_research/web/static/css/custom_dropdown.css +180 -0
  165. local_deep_research/web/static/css/styles.css +39 -1
  166. local_deep_research/web/static/js/components/detail.js +633 -267
  167. local_deep_research/web/static/js/components/details.js +751 -0
  168. local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
  169. local_deep_research/web/static/js/components/fallback/ui.js +23 -23
  170. local_deep_research/web/static/js/components/history.js +76 -76
  171. local_deep_research/web/static/js/components/logpanel.js +61 -13
  172. local_deep_research/web/static/js/components/progress.js +13 -2
  173. local_deep_research/web/static/js/components/research.js +99 -12
  174. local_deep_research/web/static/js/components/results.js +239 -106
  175. local_deep_research/web/static/js/main.js +40 -40
  176. local_deep_research/web/static/js/services/audio.js +1 -1
  177. local_deep_research/web/static/js/services/formatting.js +11 -11
  178. local_deep_research/web/static/js/services/keyboard.js +157 -0
  179. local_deep_research/web/static/js/services/pdf.js +80 -80
  180. local_deep_research/web/static/sounds/README.md +1 -1
  181. local_deep_research/web/templates/base.html +1 -0
  182. local_deep_research/web/templates/components/log_panel.html +7 -1
  183. local_deep_research/web/templates/components/mobile_nav.html +1 -1
  184. local_deep_research/web/templates/components/sidebar.html +3 -0
  185. local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
  186. local_deep_research/web/templates/pages/details.html +325 -24
  187. local_deep_research/web/templates/pages/history.html +1 -1
  188. local_deep_research/web/templates/pages/metrics.html +1929 -0
  189. local_deep_research/web/templates/pages/progress.html +2 -2
  190. local_deep_research/web/templates/pages/research.html +53 -17
  191. local_deep_research/web/templates/pages/results.html +12 -1
  192. local_deep_research/web/templates/pages/star_reviews.html +803 -0
  193. local_deep_research/web/utils/formatters.py +9 -3
  194. local_deep_research/web_search_engines/default_search_engines.py +5 -3
  195. local_deep_research/web_search_engines/engines/full_search.py +8 -2
  196. local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
  197. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
  198. local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
  199. local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
  200. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
  201. local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
  202. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
  203. local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
  204. local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
  205. local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
  206. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
  207. local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
  208. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
  209. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
  210. local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
  211. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
  212. local_deep_research/web_search_engines/search_engine_base.py +83 -35
  213. local_deep_research/web_search_engines/search_engine_factory.py +25 -8
  214. local_deep_research/web_search_engines/search_engines_config.py +9 -3
  215. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/METADATA +7 -1
  216. local_deep_research-0.5.0.dist-info/RECORD +265 -0
  217. local_deep_research-0.4.4.dist-info/RECORD +0 -177
  218. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/WHEEL +0 -0
  219. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/entry_points.txt +0 -0
  220. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,57 @@
1
+ """
2
+ Base evidence classes for the advanced search system.
3
+ """
4
+
5
+ from dataclasses import dataclass, field
6
+ from datetime import datetime
7
+ from enum import Enum
8
+ from typing import Any, Dict, Optional
9
+
10
+
11
+ class EvidenceType(Enum):
12
+ """Types of evidence with inherent reliability scores."""
13
+
14
+ DIRECT_STATEMENT = "direct_statement"
15
+ OFFICIAL_RECORD = "official_record"
16
+ RESEARCH_FINDING = "research_finding"
17
+ NEWS_REPORT = "news_report"
18
+ STATISTICAL_DATA = "statistical_data"
19
+ INFERENCE = "inference"
20
+ CORRELATION = "correlation"
21
+ SPECULATION = "speculation"
22
+
23
+ @property
24
+ def base_confidence(self) -> float:
25
+ """Get base confidence for this evidence type."""
26
+ confidence_map = {
27
+ EvidenceType.DIRECT_STATEMENT: 0.95,
28
+ EvidenceType.OFFICIAL_RECORD: 0.90,
29
+ EvidenceType.RESEARCH_FINDING: 0.85,
30
+ EvidenceType.STATISTICAL_DATA: 0.85,
31
+ EvidenceType.NEWS_REPORT: 0.75,
32
+ EvidenceType.INFERENCE: 0.50,
33
+ EvidenceType.CORRELATION: 0.30,
34
+ EvidenceType.SPECULATION: 0.10,
35
+ }
36
+ return confidence_map.get(self, 0.5)
37
+
38
+
39
+ @dataclass
40
+ class Evidence:
41
+ """Evidence supporting or refuting a claim."""
42
+
43
+ claim: str
44
+ type: EvidenceType
45
+ source: str
46
+ confidence: float = 0.0
47
+ reasoning: Optional[str] = None
48
+ raw_text: Optional[str] = None
49
+ timestamp: str = field(
50
+ default_factory=lambda: datetime.utcnow().isoformat()
51
+ )
52
+ metadata: Dict[str, Any] = field(default_factory=dict)
53
+
54
+ def __post_init__(self):
55
+ """Calculate initial confidence if not provided."""
56
+ if self.confidence == 0.0:
57
+ self.confidence = self.type.base_confidence
@@ -0,0 +1,159 @@
1
+ """
2
+ Evidence evaluator for assessing evidence quality and relevance.
3
+ """
4
+
5
+ from typing import Dict
6
+
7
+ from langchain_core.language_models import BaseChatModel
8
+ from loguru import logger
9
+
10
+ from ...utilities.search_utilities import remove_think_tags
11
+ from ..constraints.base_constraint import Constraint
12
+ from .base_evidence import Evidence, EvidenceType
13
+
14
+
15
+ class EvidenceEvaluator:
16
+ """Evaluates evidence quality and relevance."""
17
+
18
+ def __init__(self, model: BaseChatModel):
19
+ """Initialize the evidence evaluator."""
20
+ self.model = model
21
+ self.source_reliability = {
22
+ "official": 1.0,
23
+ "research": 0.95,
24
+ "news": 0.8,
25
+ "community": 0.6,
26
+ "inference": 0.5,
27
+ "speculation": 0.3,
28
+ }
29
+
30
+ def extract_evidence(
31
+ self, search_result: str, candidate: str, constraint: Constraint
32
+ ) -> Evidence:
33
+ """Extract evidence from search results for a specific constraint."""
34
+ prompt = f"""
35
+ Extract evidence regarding whether "{candidate}" satisfies this constraint:
36
+
37
+ Constraint: {constraint.description}
38
+ Constraint Type: {constraint.type.value}
39
+ Required Value: {constraint.value}
40
+
41
+ Search Results:
42
+ {search_result[:3000]}
43
+
44
+ Provide:
45
+ 1. CLAIM: What the evidence claims about the constraint
46
+ 2. TYPE: direct_statement, official_record, research_finding, news_report, statistical_data, inference, correlation, or speculation
47
+ 3. SOURCE: Where this evidence comes from
48
+ 4. CONFIDENCE: How confident you are this evidence is accurate (0.0-1.0)
49
+ 5. REASONING: Why this evidence supports or refutes the constraint
50
+ 6. QUOTE: Relevant quote from the search results (if any)
51
+
52
+ Format:
53
+ CLAIM: [specific claim]
54
+ TYPE: [evidence type]
55
+ SOURCE: [source description]
56
+ CONFIDENCE: [0.0-1.0]
57
+ REASONING: [explanation]
58
+ QUOTE: [relevant text]
59
+ """
60
+
61
+ response = self.model.invoke(prompt)
62
+ content = remove_think_tags(response.content)
63
+
64
+ # Parse response
65
+ parsed = self._parse_evidence_response(content)
66
+
67
+ # Create evidence object
68
+ # Safely parse confidence value, handling potential errors
69
+ confidence_str = parsed.get("confidence", "0.5")
70
+ try:
71
+ confidence = float(confidence_str)
72
+ # Ensure confidence is between 0 and 1
73
+ confidence = max(0.0, min(1.0, confidence))
74
+ except ValueError:
75
+ logger.warning(
76
+ f"Failed to parse confidence value: {confidence_str}"
77
+ )
78
+ confidence = 0.5
79
+
80
+ evidence = Evidence(
81
+ claim=parsed.get("claim", "No clear claim"),
82
+ type=self._parse_evidence_type(parsed.get("type", "speculation")),
83
+ source=parsed.get("source", "Unknown"),
84
+ confidence=confidence,
85
+ reasoning=parsed.get("reasoning", ""),
86
+ raw_text=parsed.get("quote", ""),
87
+ metadata={
88
+ "candidate": candidate,
89
+ "constraint_id": constraint.id,
90
+ "constraint_type": constraint.type.value,
91
+ },
92
+ )
93
+
94
+ # Adjust confidence based on how well it matches the constraint
95
+ evidence.confidence *= self._assess_match_quality(evidence, constraint)
96
+
97
+ return evidence
98
+
99
+ def _parse_evidence_response(self, content: str) -> Dict[str, str]:
100
+ """Parse the LLM response into evidence components."""
101
+ import re
102
+
103
+ parsed = {}
104
+
105
+ for line in content.strip().split("\n"):
106
+ if ":" in line:
107
+ key, value = line.split(":", 1)
108
+ key = key.strip().lower()
109
+ value = value.strip()
110
+
111
+ if key in [
112
+ "claim",
113
+ "type",
114
+ "source",
115
+ "confidence",
116
+ "reasoning",
117
+ "quote",
118
+ ]:
119
+ # Special handling for confidence to extract just the float value
120
+ if key == "confidence":
121
+ # Extract the first float from the value string
122
+ match = re.search(r"(\d*\.?\d+)", value)
123
+ if match:
124
+ parsed[key] = match.group(1)
125
+ else:
126
+ parsed[key] = value
127
+ else:
128
+ parsed[key] = value
129
+
130
+ return parsed
131
+
132
+ def _parse_evidence_type(self, type_str: str) -> EvidenceType:
133
+ """Parse evidence type from string."""
134
+ type_map = {
135
+ "direct_statement": EvidenceType.DIRECT_STATEMENT,
136
+ "official_record": EvidenceType.OFFICIAL_RECORD,
137
+ "research_finding": EvidenceType.RESEARCH_FINDING,
138
+ "news_report": EvidenceType.NEWS_REPORT,
139
+ "statistical_data": EvidenceType.STATISTICAL_DATA,
140
+ "inference": EvidenceType.INFERENCE,
141
+ "correlation": EvidenceType.CORRELATION,
142
+ "speculation": EvidenceType.SPECULATION,
143
+ }
144
+ return type_map.get(type_str.lower(), EvidenceType.SPECULATION)
145
+
146
+ def _assess_match_quality(
147
+ self, evidence: Evidence, constraint: Constraint
148
+ ) -> float:
149
+ """Assess how well the evidence matches the constraint."""
150
+ # This is a simplified version - could be made more sophisticated
151
+ if constraint.value.lower() in evidence.claim.lower():
152
+ return 1.0
153
+ elif any(
154
+ word in evidence.claim.lower()
155
+ for word in constraint.value.lower().split()
156
+ ):
157
+ return 0.8
158
+ else:
159
+ return 0.6 # Partial match at best
@@ -0,0 +1,122 @@
1
+ """
2
+ Evidence requirements for different constraint types.
3
+ """
4
+
5
+ from typing import Dict, List
6
+
7
+ from ..constraints.base_constraint import ConstraintType
8
+
9
+
10
+ class EvidenceRequirements:
11
+ """Define evidence requirements for different constraint types."""
12
+
13
+ @staticmethod
14
+ def get_requirements(
15
+ constraint_type: ConstraintType,
16
+ ) -> Dict[str, List[str]]:
17
+ """Get evidence requirements for a constraint type.
18
+
19
+ Args:
20
+ constraint_type: The type of constraint
21
+
22
+ Returns:
23
+ Dictionary of evidence types and their sources
24
+ """
25
+ requirements = {
26
+ ConstraintType.PROPERTY: {
27
+ "preferred": ["direct_statement", "official_record"],
28
+ "acceptable": ["research_finding", "inference"],
29
+ "sources": [
30
+ "scientific papers",
31
+ "official documents",
32
+ "encyclopedias",
33
+ ],
34
+ },
35
+ ConstraintType.NAME_PATTERN: {
36
+ "preferred": ["direct_statement", "linguistic_analysis"],
37
+ "acceptable": ["correlation", "inference"],
38
+ "sources": [
39
+ "etymology sources",
40
+ "naming databases",
41
+ "historical records",
42
+ ],
43
+ },
44
+ ConstraintType.EVENT: {
45
+ "preferred": ["news_report", "official_record"],
46
+ "acceptable": ["testimonial", "correlation"],
47
+ "sources": [
48
+ "news archives",
49
+ "government reports",
50
+ "witness accounts",
51
+ ],
52
+ },
53
+ ConstraintType.STATISTIC: {
54
+ "preferred": ["statistical_data", "official_record"],
55
+ "acceptable": ["research_finding"],
56
+ "sources": [
57
+ "government databases",
58
+ "research papers",
59
+ "official reports",
60
+ ],
61
+ },
62
+ ConstraintType.TEMPORAL: {
63
+ "preferred": ["official_record", "news_report"],
64
+ "acceptable": ["historical_record", "inference"],
65
+ "sources": ["archives", "newspapers", "official timelines"],
66
+ },
67
+ ConstraintType.LOCATION: {
68
+ "preferred": ["geographical_data", "official_record"],
69
+ "acceptable": ["mapping_data", "inference"],
70
+ "sources": [
71
+ "geographical surveys",
72
+ "maps",
73
+ "location databases",
74
+ ],
75
+ },
76
+ ConstraintType.COMPARISON: {
77
+ "preferred": ["statistical_comparison", "research_finding"],
78
+ "acceptable": ["inference", "correlation"],
79
+ "sources": [
80
+ "comparative studies",
81
+ "statistical analyses",
82
+ "research papers",
83
+ ],
84
+ },
85
+ ConstraintType.EXISTENCE: {
86
+ "preferred": ["direct_statement", "official_record"],
87
+ "acceptable": ["news_report", "inference"],
88
+ "sources": [
89
+ "official registries",
90
+ "databases",
91
+ "authoritative sources",
92
+ ],
93
+ },
94
+ }
95
+
96
+ return requirements.get(
97
+ constraint_type,
98
+ {
99
+ "preferred": ["direct_statement"],
100
+ "acceptable": ["inference"],
101
+ "sources": ["general sources"],
102
+ },
103
+ )
104
+
105
+ @staticmethod
106
+ def get_minimum_confidence(constraint_type: ConstraintType) -> float:
107
+ """Get minimum confidence required for constraint type.
108
+
109
+ Args:
110
+ constraint_type: The type of constraint
111
+
112
+ Returns:
113
+ Minimum confidence threshold
114
+ """
115
+ thresholds = {
116
+ ConstraintType.STATISTIC: 0.8, # High accuracy needed
117
+ ConstraintType.EVENT: 0.7, # Moderate accuracy
118
+ ConstraintType.PROPERTY: 0.6, # Some flexibility
119
+ ConstraintType.NAME_PATTERN: 0.5, # More interpretive
120
+ }
121
+
122
+ return thresholds.get(constraint_type, 0.6)
@@ -22,7 +22,9 @@ class BaseFilter(ABC):
22
22
  self.model = model
23
23
 
24
24
  @abstractmethod
25
- def filter_results(self, results: List[Dict], query: str, **kwargs) -> List[Dict]:
25
+ def filter_results(
26
+ self, results: List[Dict], query: str, **kwargs
27
+ ) -> List[Dict]:
26
28
  """
27
29
  Filter search results by relevance to the query.
28
30
 
@@ -16,7 +16,11 @@ class CrossEngineFilter(BaseFilter):
16
16
  """Filter that ranks and filters results from multiple search engines."""
17
17
 
18
18
  def __init__(
19
- self, model, max_results=None, default_reorder=True, default_reindex=True
19
+ self,
20
+ model,
21
+ max_results=None,
22
+ default_reorder=True,
23
+ default_reindex=True,
20
24
  ):
21
25
  """
22
26
  Initialize the cross-engine filter.
@@ -30,7 +34,9 @@ class CrossEngineFilter(BaseFilter):
30
34
  super().__init__(model)
31
35
  # Get max_results from database settings if not provided
32
36
  if max_results is None:
33
- max_results = int(get_db_setting("search.cross_engine_max_results", 100))
37
+ max_results = int(
38
+ get_db_setting("search.cross_engine_max_results", 100)
39
+ )
34
40
  self.max_results = max_results
35
41
  self.default_reorder = default_reorder
36
42
  self.default_reindex = default_reindex
@@ -69,13 +69,17 @@ class JournalReputationFilter(BaseFilter):
69
69
  self.__exclude_non_published = exclude_non_published
70
70
  if self.__exclude_non_published is None:
71
71
  self.__exclude_non_published = bool(
72
- get_db_setting("search.journal_reputation.exclude_non_published", False)
72
+ get_db_setting(
73
+ "search.journal_reputation.exclude_non_published", False
74
+ )
73
75
  )
74
76
  self.__quality_reanalysis_period = quality_reanalysis_period
75
77
  if self.__quality_reanalysis_period is None:
76
78
  self.__quality_reanalysis_period = timedelta(
77
79
  days=int(
78
- get_db_setting("search.journal_reputation.reanalysis_period", 365)
80
+ get_db_setting(
81
+ "search.journal_reputation.reanalysis_period", 365
82
+ )
79
83
  )
80
84
  )
81
85
 
@@ -85,8 +89,6 @@ class JournalReputationFilter(BaseFilter):
85
89
  if self.__engine is None:
86
90
  raise JournalFilterError("SearXNG initialization failed.")
87
91
 
88
- self.__db_session = get_db_session()
89
-
90
92
  @classmethod
91
93
  def create_default(
92
94
  cls, model: BaseChatModel | None = None, *, engine_name: str
@@ -159,7 +161,9 @@ class JournalReputationFilter(BaseFilter):
159
161
  f"ranking and peer review status. Be sure to specify the journal "
160
162
  f"name in any generated questions."
161
163
  )
162
- journal_info = "\n".join([f["content"] for f in journal_info["findings"]])
164
+ journal_info = "\n".join(
165
+ [f["content"] for f in journal_info["findings"]]
166
+ )
163
167
  logger.debug(f"Received raw info about journal: {journal_info}")
164
168
 
165
169
  # Have the LLM assess the reliability based on this information.
@@ -190,7 +194,9 @@ class JournalReputationFilter(BaseFilter):
190
194
  reputation_score = int(response.strip())
191
195
  except ValueError:
192
196
  logger.error("Failed to parse reputation score from LLM response.")
193
- raise ValueError("Failed to parse reputation score from LLM response.")
197
+ raise ValueError(
198
+ "Failed to parse reputation score from LLM response."
199
+ )
194
200
 
195
201
  return max(min(reputation_score, 10), 1)
196
202
 
@@ -203,21 +209,22 @@ class JournalReputationFilter(BaseFilter):
203
209
  quality: The quality assessment for the journal.
204
210
 
205
211
  """
206
- journal = self.__db_session.query(Journal).filter_by(name=name).first()
207
- if journal is not None:
208
- journal.quality = quality
209
- journal.quality_model = self.model.name
210
- journal.quality_analysis_time = int(time.time())
211
- else:
212
- journal = Journal(
213
- name=name,
214
- quality=quality,
215
- quality_model=self.model.name,
216
- quality_analysis_time=int(time.time()),
217
- )
218
- self.__db_session.add(journal)
212
+ with get_db_session() as db_session:
213
+ journal = db_session.query(Journal).filter_by(name=name).first()
214
+ if journal is not None:
215
+ journal.quality = quality
216
+ journal.quality_model = self.model.name
217
+ journal.quality_analysis_time = int(time.time())
218
+ else:
219
+ journal = Journal(
220
+ name=name,
221
+ quality=quality,
222
+ quality_model=self.model.name,
223
+ quality_analysis_time=int(time.time()),
224
+ )
225
+ db_session.add(journal)
219
226
 
220
- self.__db_session.commit()
227
+ db_session.commit()
221
228
 
222
229
  def __clean_journal_name(self, journal_name: str) -> str:
223
230
  """
@@ -268,14 +275,19 @@ class JournalReputationFilter(BaseFilter):
268
275
  journal_name = self.__clean_journal_name(journal_name)
269
276
 
270
277
  # Check the database first.
271
- journal = self.__db_session.query(Journal).filter_by(name=journal_name).first()
272
- if (
273
- journal is not None
274
- and (time.time() - journal.quality_analysis_time)
275
- < self.__quality_reanalysis_period.total_seconds()
276
- ):
277
- logger.debug(f"Found existing reputation for {journal_name} in database.")
278
- return journal.quality >= self.__threshold
278
+ with get_db_session() as session:
279
+ journal = (
280
+ session.query(Journal).filter_by(name=journal_name).first()
281
+ )
282
+ if (
283
+ journal is not None
284
+ and (time.time() - journal.quality_analysis_time)
285
+ < self.__quality_reanalysis_period.total_seconds()
286
+ ):
287
+ logger.debug(
288
+ f"Found existing reputation for {journal_name} in database."
289
+ )
290
+ return journal.quality >= self.__threshold
279
291
 
280
292
  # Evaluate reputation.
281
293
  try:
@@ -288,7 +300,9 @@ class JournalReputationFilter(BaseFilter):
288
300
  # okay.
289
301
  return True
290
302
 
291
- def filter_results(self, results: List[Dict], query: str, **kwargs) -> List[Dict]:
303
+ def filter_results(
304
+ self, results: List[Dict], query: str, **kwargs
305
+ ) -> List[Dict]:
292
306
  try:
293
307
  return list(filter(self.__check_result, results))
294
308
  except Exception as e:
@@ -63,7 +63,10 @@ class FindingsRepository(BaseFindingsRepository):
63
63
 
64
64
  # Store raw synthesized content if it's the final synthesis
65
65
  # Only check for phase if it's a dictionary
66
- if isinstance(finding, dict) and finding.get("phase") == "Final synthesis":
66
+ if (
67
+ isinstance(finding, dict)
68
+ and finding.get("phase") == "Final synthesis"
69
+ ):
67
70
  self.findings[query + "_synthesis"] = [
68
71
  {
69
72
  "phase": "Synthesis",
@@ -117,7 +120,9 @@ class FindingsRepository(BaseFindingsRepository):
117
120
  questions_by_iteration: Dictionary mapping iteration numbers to lists of questions
118
121
  """
119
122
  self.questions_by_iteration = questions_by_iteration.copy()
120
- logger.info(f"Set questions for {len(questions_by_iteration)} iterations")
123
+ logger.info(
124
+ f"Set questions for {len(questions_by_iteration)} iterations"
125
+ )
121
126
 
122
127
  def format_findings_to_text(
123
128
  self, findings_list: List[Dict], synthesized_content: str
@@ -142,7 +147,9 @@ class FindingsRepository(BaseFindingsRepository):
142
147
  f"Questions by iteration keys: {list(self.questions_by_iteration.keys())}"
143
148
  )
144
149
  if findings_list:
145
- logger.debug(f"First finding item keys: {list(findings_list[0].keys())}")
150
+ logger.debug(
151
+ f"First finding item keys: {list(findings_list[0].keys())}"
152
+ )
146
153
 
147
154
  try:
148
155
  # Pass the detailed findings list, the synthesized content (as current_knowledge), and the stored questions
@@ -211,14 +218,18 @@ class FindingsRepository(BaseFindingsRepository):
211
218
  )
212
219
  elif isinstance(findings[0], str):
213
220
  logger.info(f"first finding string length: {len(findings[0])}")
214
- logger.info(f"first finding string preview: {findings[0][:100]}...")
221
+ logger.info(
222
+ f"first finding string preview: {findings[0][:100]}..."
223
+ )
215
224
 
216
225
  if old_formatting:
217
226
  # Convert findings list if it contains strings instead of dictionaries
218
227
  findings_list = []
219
228
  for i, item in enumerate(findings):
220
229
  if isinstance(item, str):
221
- findings_list.append({"phase": f"Finding {i + 1}", "content": item})
230
+ findings_list.append(
231
+ {"phase": f"Finding {i + 1}", "content": item}
232
+ )
222
233
  elif isinstance(item, dict):
223
234
  findings_list.append(item)
224
235
 
@@ -237,12 +248,16 @@ class FindingsRepository(BaseFindingsRepository):
237
248
  finding_texts.append(item)
238
249
 
239
250
  # Use finding_texts for the prompt
240
- current_knowledge = "\n\n".join(finding_texts) if finding_texts else ""
251
+ current_knowledge = (
252
+ "\n\n".join(finding_texts) if finding_texts else ""
253
+ )
241
254
 
242
255
  # Check if knowledge exceeds a reasonable token limit (rough estimate based on characters)
243
256
  # 1 token ≈ 4 characters in English
244
257
  estimated_tokens = len(current_knowledge) / 4
245
- max_safe_tokens = 12000 # Adjust based on your model's context window
258
+ max_safe_tokens = (
259
+ 12000 # Adjust based on your model's context window
260
+ )
246
261
 
247
262
  if estimated_tokens > max_safe_tokens:
248
263
  logger.warning(
@@ -251,10 +266,16 @@ class FindingsRepository(BaseFindingsRepository):
251
266
  # Truncate if needed (keeping the beginning and end which are often most important)
252
267
  # This is a simple approach - a more sophisticated chunking might be better
253
268
  if len(current_knowledge) > 24000: # ~6000 tokens
254
- first_part = current_knowledge[:12000] # ~3000 tokens from start
255
- last_part = current_knowledge[-12000:] # ~3000 tokens from end
269
+ first_part = current_knowledge[
270
+ :12000
271
+ ] # ~3000 tokens from start
272
+ last_part = current_knowledge[
273
+ -12000:
274
+ ] # ~3000 tokens from end
256
275
  current_knowledge = f"{first_part}\n\n[...content truncated due to length...]\n\n{last_part}"
257
- logger.info("Knowledge truncated to fit within token limits")
276
+ logger.info(
277
+ "Knowledge truncated to fit within token limits"
278
+ )
258
279
 
259
280
  prompt = f"""Use IEEE style citations [1], [2], etc. Never make up your own citations. Synthesize the following accumulated knowledge into a comprehensive answer for the original query.
260
281
  Format the response with clear sections, citations, and a concise summary.
@@ -280,7 +301,9 @@ Use IEEE style citations [1], [2], etc. Never make up your own citations.
280
301
  f"Synthesizing final answer. Query: '{query}'. Knowledge length: {len(current_knowledge)}. Prompt length: {len(prompt)}"
281
302
  )
282
303
  # Log first 500 chars of prompt for debugging context length issues
283
- logger.debug(f"Synthesis prompt (first 500 chars): {prompt[:500]}...")
304
+ logger.debug(
305
+ f"Synthesis prompt (first 500 chars): {prompt[:500]}..."
306
+ )
284
307
 
285
308
  try:
286
309
  # Add timeout handling
@@ -300,7 +323,9 @@ Use IEEE style citations [1], [2], etc. Never make up your own citations.
300
323
  timer.daemon = True
301
324
  return timer
302
325
 
303
- def invoke_with_timeout(timeout_seconds, func, *args, **kwargs):
326
+ def invoke_with_timeout(
327
+ timeout_seconds, func, *args, **kwargs
328
+ ):
304
329
  """
305
330
  Function for implementing timeouts on Windows
306
331
  """
@@ -338,7 +363,9 @@ Use IEEE style citations [1], [2], etc. Never make up your own citations.
338
363
  logger.info(
339
364
  "Using Windows-compatible timeout for LLM invocation"
340
365
  )
341
- response = invoke_with_timeout(120, self.model.invoke, prompt)
366
+ response = invoke_with_timeout(
367
+ 120, self.model.invoke, prompt
368
+ )
342
369
 
343
370
  # Handle different response types (string or object with content attribute)
344
371
  if hasattr(response, "content"):
@@ -376,7 +403,9 @@ Use IEEE style citations [1], [2], etc. Never make up your own citations.
376
403
 
377
404
  # Try with a timeout (adjust seconds as needed)
378
405
  try:
379
- with timeout(120, "LLM invocation timed out after 120 seconds"):
406
+ with timeout(
407
+ 120, "LLM invocation timed out after 120 seconds"
408
+ ):
380
409
  response = self.model.invoke(prompt)
381
410
 
382
411
  # Handle different response types (string or object with content attribute)
@@ -417,11 +446,19 @@ Use IEEE style citations [1], [2], etc. Never make up your own citations.
417
446
  or "token limit" in error_message
418
447
  ):
419
448
  error_type = "token_limit"
420
- elif "rate limit" in error_message or "rate_limit" in error_message:
449
+ elif (
450
+ "rate limit" in error_message
451
+ or "rate_limit" in error_message
452
+ ):
421
453
  error_type = "rate_limit"
422
- elif "connection" in error_message or "network" in error_message:
454
+ elif (
455
+ "connection" in error_message or "network" in error_message
456
+ ):
423
457
  error_type = "connection"
424
- elif "api key" in error_message or "authentication" in error_message:
458
+ elif (
459
+ "api key" in error_message
460
+ or "authentication" in error_message
461
+ ):
425
462
  error_type = "authentication"
426
463
 
427
464
  # Return more detailed error message based on type
@@ -132,7 +132,9 @@ Compressed Knowledge:"""
132
132
  try:
133
133
  response = self.model.invoke(prompt)
134
134
  compressed_knowledge = response.content
135
- logger.info(f"Compressed knowledge length: {len(compressed_knowledge)}")
135
+ logger.info(
136
+ f"Compressed knowledge length: {len(compressed_knowledge)}"
137
+ )
136
138
  return compressed_knowledge
137
139
  except Exception as e:
138
140
  logger.error(f"Error compressing knowledge: {str(e)}")