local-deep-research 0.1.26__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. local_deep_research/__init__.py +23 -22
  2. local_deep_research/__main__.py +16 -0
  3. local_deep_research/advanced_search_system/__init__.py +7 -0
  4. local_deep_research/advanced_search_system/filters/__init__.py +8 -0
  5. local_deep_research/advanced_search_system/filters/base_filter.py +38 -0
  6. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +200 -0
  7. local_deep_research/advanced_search_system/findings/base_findings.py +81 -0
  8. local_deep_research/advanced_search_system/findings/repository.py +452 -0
  9. local_deep_research/advanced_search_system/knowledge/__init__.py +1 -0
  10. local_deep_research/advanced_search_system/knowledge/base_knowledge.py +151 -0
  11. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +159 -0
  12. local_deep_research/advanced_search_system/questions/__init__.py +1 -0
  13. local_deep_research/advanced_search_system/questions/base_question.py +64 -0
  14. local_deep_research/advanced_search_system/questions/decomposition_question.py +445 -0
  15. local_deep_research/advanced_search_system/questions/standard_question.py +119 -0
  16. local_deep_research/advanced_search_system/repositories/__init__.py +7 -0
  17. local_deep_research/advanced_search_system/strategies/__init__.py +1 -0
  18. local_deep_research/advanced_search_system/strategies/base_strategy.py +118 -0
  19. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +450 -0
  20. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +312 -0
  21. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +270 -0
  22. local_deep_research/advanced_search_system/strategies/standard_strategy.py +300 -0
  23. local_deep_research/advanced_search_system/tools/__init__.py +1 -0
  24. local_deep_research/advanced_search_system/tools/base_tool.py +100 -0
  25. local_deep_research/advanced_search_system/tools/knowledge_tools/__init__.py +1 -0
  26. local_deep_research/advanced_search_system/tools/question_tools/__init__.py +1 -0
  27. local_deep_research/advanced_search_system/tools/search_tools/__init__.py +1 -0
  28. local_deep_research/api/__init__.py +5 -5
  29. local_deep_research/api/research_functions.py +154 -160
  30. local_deep_research/app.py +8 -0
  31. local_deep_research/citation_handler.py +25 -16
  32. local_deep_research/{config.py → config/config_files.py} +102 -110
  33. local_deep_research/config/llm_config.py +472 -0
  34. local_deep_research/config/search_config.py +77 -0
  35. local_deep_research/defaults/__init__.py +10 -5
  36. local_deep_research/defaults/main.toml +2 -2
  37. local_deep_research/defaults/search_engines.toml +60 -34
  38. local_deep_research/main.py +121 -19
  39. local_deep_research/migrate_db.py +147 -0
  40. local_deep_research/report_generator.py +87 -45
  41. local_deep_research/search_system.py +153 -283
  42. local_deep_research/setup_data_dir.py +35 -0
  43. local_deep_research/test_migration.py +178 -0
  44. local_deep_research/utilities/__init__.py +0 -0
  45. local_deep_research/utilities/db_utils.py +49 -0
  46. local_deep_research/{utilties → utilities}/enums.py +2 -2
  47. local_deep_research/{utilties → utilities}/llm_utils.py +63 -29
  48. local_deep_research/utilities/search_utilities.py +242 -0
  49. local_deep_research/{utilties → utilities}/setup_utils.py +4 -2
  50. local_deep_research/web/__init__.py +0 -1
  51. local_deep_research/web/app.py +86 -1709
  52. local_deep_research/web/app_factory.py +289 -0
  53. local_deep_research/web/database/README.md +70 -0
  54. local_deep_research/web/database/migrate_to_ldr_db.py +289 -0
  55. local_deep_research/web/database/migrations.py +447 -0
  56. local_deep_research/web/database/models.py +117 -0
  57. local_deep_research/web/database/schema_upgrade.py +107 -0
  58. local_deep_research/web/models/database.py +294 -0
  59. local_deep_research/web/models/settings.py +94 -0
  60. local_deep_research/web/routes/api_routes.py +559 -0
  61. local_deep_research/web/routes/history_routes.py +354 -0
  62. local_deep_research/web/routes/research_routes.py +715 -0
  63. local_deep_research/web/routes/settings_routes.py +1583 -0
  64. local_deep_research/web/services/research_service.py +947 -0
  65. local_deep_research/web/services/resource_service.py +149 -0
  66. local_deep_research/web/services/settings_manager.py +669 -0
  67. local_deep_research/web/services/settings_service.py +187 -0
  68. local_deep_research/web/services/socket_service.py +210 -0
  69. local_deep_research/web/static/css/custom_dropdown.css +277 -0
  70. local_deep_research/web/static/css/settings.css +1223 -0
  71. local_deep_research/web/static/css/styles.css +525 -48
  72. local_deep_research/web/static/js/components/custom_dropdown.js +428 -0
  73. local_deep_research/web/static/js/components/detail.js +348 -0
  74. local_deep_research/web/static/js/components/fallback/formatting.js +122 -0
  75. local_deep_research/web/static/js/components/fallback/ui.js +215 -0
  76. local_deep_research/web/static/js/components/history.js +487 -0
  77. local_deep_research/web/static/js/components/logpanel.js +949 -0
  78. local_deep_research/web/static/js/components/progress.js +1107 -0
  79. local_deep_research/web/static/js/components/research.js +1865 -0
  80. local_deep_research/web/static/js/components/results.js +766 -0
  81. local_deep_research/web/static/js/components/settings.js +3981 -0
  82. local_deep_research/web/static/js/components/settings_sync.js +106 -0
  83. local_deep_research/web/static/js/main.js +226 -0
  84. local_deep_research/web/static/js/services/api.js +253 -0
  85. local_deep_research/web/static/js/services/audio.js +31 -0
  86. local_deep_research/web/static/js/services/formatting.js +119 -0
  87. local_deep_research/web/static/js/services/pdf.js +622 -0
  88. local_deep_research/web/static/js/services/socket.js +882 -0
  89. local_deep_research/web/static/js/services/ui.js +546 -0
  90. local_deep_research/web/templates/base.html +72 -0
  91. local_deep_research/web/templates/components/custom_dropdown.html +47 -0
  92. local_deep_research/web/templates/components/log_panel.html +32 -0
  93. local_deep_research/web/templates/components/mobile_nav.html +22 -0
  94. local_deep_research/web/templates/components/settings_form.html +299 -0
  95. local_deep_research/web/templates/components/sidebar.html +21 -0
  96. local_deep_research/web/templates/pages/details.html +73 -0
  97. local_deep_research/web/templates/pages/history.html +51 -0
  98. local_deep_research/web/templates/pages/progress.html +57 -0
  99. local_deep_research/web/templates/pages/research.html +139 -0
  100. local_deep_research/web/templates/pages/results.html +59 -0
  101. local_deep_research/web/templates/settings_dashboard.html +78 -192
  102. local_deep_research/web/utils/__init__.py +0 -0
  103. local_deep_research/web/utils/formatters.py +76 -0
  104. local_deep_research/web_search_engines/engines/full_search.py +18 -16
  105. local_deep_research/web_search_engines/engines/meta_search_engine.py +182 -131
  106. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +224 -139
  107. local_deep_research/web_search_engines/engines/search_engine_brave.py +88 -71
  108. local_deep_research/web_search_engines/engines/search_engine_ddg.py +48 -39
  109. local_deep_research/web_search_engines/engines/search_engine_github.py +415 -204
  110. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +123 -90
  111. local_deep_research/web_search_engines/engines/search_engine_guardian.py +210 -157
  112. local_deep_research/web_search_engines/engines/search_engine_local.py +532 -369
  113. local_deep_research/web_search_engines/engines/search_engine_local_all.py +42 -36
  114. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +358 -266
  115. local_deep_research/web_search_engines/engines/search_engine_searxng.py +212 -160
  116. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +213 -170
  117. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +84 -68
  118. local_deep_research/web_search_engines/engines/search_engine_wayback.py +186 -154
  119. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +115 -77
  120. local_deep_research/web_search_engines/search_engine_base.py +174 -99
  121. local_deep_research/web_search_engines/search_engine_factory.py +192 -102
  122. local_deep_research/web_search_engines/search_engines_config.py +22 -15
  123. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/METADATA +177 -97
  124. local_deep_research-0.2.2.dist-info/RECORD +135 -0
  125. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/WHEEL +1 -2
  126. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/entry_points.txt +3 -0
  127. local_deep_research/defaults/llm_config.py +0 -338
  128. local_deep_research/utilties/search_utilities.py +0 -114
  129. local_deep_research/web/static/js/app.js +0 -3763
  130. local_deep_research/web/templates/api_keys_config.html +0 -82
  131. local_deep_research/web/templates/collections_config.html +0 -90
  132. local_deep_research/web/templates/index.html +0 -348
  133. local_deep_research/web/templates/llm_config.html +0 -120
  134. local_deep_research/web/templates/main_config.html +0 -89
  135. local_deep_research/web/templates/search_engines_config.html +0 -154
  136. local_deep_research/web/templates/settings.html +0 -519
  137. local_deep_research-0.1.26.dist-info/RECORD +0 -61
  138. local_deep_research-0.1.26.dist-info/top_level.txt +0 -1
  139. /local_deep_research/{utilties → config}/__init__.py +0 -0
  140. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,178 @@
1
+ #!/usr/bin/env python
2
+ """
3
+ Migration test script for Local Deep Research.
4
+ This script checks the contents of both the legacy and new databases to diagnose migration issues.
5
+ """
6
+
7
+ import os
8
+ import sqlite3
9
+ import sys
10
+ import time
11
+
12
+
13
+ def check_db_content(db_path, description):
14
+ """Check what tables and how many rows are in a database."""
15
+ if not os.path.exists(db_path):
16
+ print(f"❌ {description} database not found at: {db_path}")
17
+ return False
18
+
19
+ print(f"📊 Examining {description} database at: {db_path}")
20
+ try:
21
+ conn = sqlite3.connect(db_path)
22
+ cursor = conn.cursor()
23
+
24
+ # Get list of tables
25
+ cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
26
+ tables = [
27
+ row[0] for row in cursor.fetchall() if not row[0].startswith("sqlite_")
28
+ ]
29
+
30
+ if not tables:
31
+ print(" ℹ️ No user tables found in database")
32
+ conn.close()
33
+ return False
34
+
35
+ print(f" 📋 Tables found: {', '.join(tables)}")
36
+
37
+ # For each table, count rows
38
+ for table in tables:
39
+ cursor.execute(f"SELECT COUNT(*) FROM {table}")
40
+ count = cursor.fetchone()[0]
41
+ print(f" 📝 Table '{table}' has {count} rows")
42
+
43
+ # If table has rows, show sample
44
+ if count > 0:
45
+ cursor.execute(f"SELECT * FROM {table} LIMIT 1")
46
+ columns = [description[0] for description in cursor.description]
47
+ print(f" Columns: {', '.join(columns)}")
48
+
49
+ # For specific tables, get key columns
50
+ if table in [
51
+ "research_history",
52
+ "research_logs",
53
+ "research",
54
+ "settings",
55
+ ]:
56
+ key_cols = (
57
+ "id, query, status"
58
+ if table == "research_history"
59
+ else "id, key, value" if table == "settings" else "id, message"
60
+ )
61
+ cursor.execute(f"SELECT {key_cols} FROM {table} LIMIT 3")
62
+ sample = cursor.fetchall()
63
+ for row in sample:
64
+ print(f" Sample data: {row}")
65
+
66
+ conn.close()
67
+ return True
68
+ except Exception as e:
69
+ print(f"❌ Error examining database: {e}")
70
+ return False
71
+
72
+
73
+ def main():
74
+ """Main function to test the migration."""
75
+ # Import necessary constants
76
+ try:
77
+ # Set up paths
78
+ current_dir = os.path.dirname(os.path.abspath(__file__))
79
+ project_root = os.path.abspath(os.path.join(current_dir, "..", ".."))
80
+
81
+ # Determine paths
82
+ data_dir = os.path.join(project_root, "data")
83
+ new_db_path = os.path.join(data_dir, "ldr.db")
84
+
85
+ legacy_research_history_db = os.path.join(
86
+ project_root, "src", "local_deep_research", "research_history.db"
87
+ )
88
+ legacy_deep_research_db = os.path.join(data_dir, "deep_research.db")
89
+
90
+ # Print paths for verification
91
+ print("=" * 60)
92
+ print("DATABASE PATHS")
93
+ print("=" * 60)
94
+ print(f"New database path: {new_db_path}")
95
+ print(f"Legacy research history DB: {legacy_research_history_db}")
96
+ print(f"Legacy deep research DB: {legacy_deep_research_db}")
97
+ print("=" * 60)
98
+
99
+ # Check all databases
100
+ check_db_content(legacy_research_history_db, "Legacy research_history")
101
+ check_db_content(legacy_deep_research_db, "Legacy deep_research")
102
+
103
+ # Now check for the new database or create it if needed
104
+ if os.path.exists(new_db_path):
105
+ check_db_content(new_db_path, "New ldr")
106
+ else:
107
+ print(f"ℹ️ New database doesn't exist yet at: {new_db_path}")
108
+ print("Would you like to run a test migration? (y/n)")
109
+ choice = input("> ").lower()
110
+ if choice == "y":
111
+ # Run the migration script directly
112
+ try:
113
+ from src.local_deep_research.setup_data_dir import setup_data_dir
114
+ except ImportError:
115
+ # If that fails, try with the direct import
116
+ sys.path.append(
117
+ os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
118
+ )
119
+ from local_deep_research.setup_data_dir import setup_data_dir
120
+
121
+ setup_data_dir()
122
+
123
+ # Import migration function
124
+ try:
125
+ from src.local_deep_research.web.database.migrate_to_ldr_db import (
126
+ migrate_to_ldr_db,
127
+ )
128
+ except ImportError:
129
+ # If that fails, try with the direct import
130
+ from local_deep_research.web.database.migrate_to_ldr_db import (
131
+ migrate_to_ldr_db,
132
+ )
133
+
134
+ print("Running migration...")
135
+ success = migrate_to_ldr_db()
136
+
137
+ # Wait briefly to ensure file system has time to update
138
+ time.sleep(1)
139
+
140
+ if success:
141
+ print("\n✅ Migration completed. Checking new database:")
142
+ check_db_content(new_db_path, "New ldr")
143
+ else:
144
+ print("❌ Migration failed")
145
+
146
+ # Get the paths from the migration script to verify
147
+ try:
148
+ try:
149
+ from src.local_deep_research.web.models.database import (
150
+ DB_PATH,
151
+ LEGACY_DEEP_RESEARCH_DB,
152
+ LEGACY_RESEARCH_HISTORY_DB,
153
+ )
154
+ except ImportError:
155
+ from local_deep_research.web.models.database import (
156
+ DB_PATH,
157
+ LEGACY_DEEP_RESEARCH_DB,
158
+ LEGACY_RESEARCH_HISTORY_DB,
159
+ )
160
+
161
+ print("\n" + "=" * 60)
162
+ print("PATHS FROM DATABASE MODULE")
163
+ print("=" * 60)
164
+ print(f"DB_PATH: {DB_PATH}")
165
+ print(f"LEGACY_RESEARCH_HISTORY_DB: {LEGACY_RESEARCH_HISTORY_DB}")
166
+ print(f"LEGACY_DEEP_RESEARCH_DB: {LEGACY_DEEP_RESEARCH_DB}")
167
+ except ImportError as e:
168
+ print(f"Could not import paths from database module: {e}")
169
+
170
+ except Exception as e:
171
+ print(f"Error in test script: {e}")
172
+ return 1
173
+
174
+ return 0
175
+
176
+
177
+ if __name__ == "__main__":
178
+ sys.exit(main())
File without changes
@@ -0,0 +1,49 @@
1
+ import logging
2
+ import os
3
+ from functools import cache
4
+
5
+ from sqlalchemy import create_engine
6
+ from sqlalchemy.orm import Session, sessionmaker
7
+
8
+ from ..web.services.settings_manager import SettingsManager
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ # Database path.
14
+ DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "data"))
15
+ DB_PATH = os.path.join(DATA_DIR, "ldr.db")
16
+
17
+
18
+ @cache
19
+ def get_db_session() -> Session:
20
+ """
21
+ Returns:
22
+ The singleton DB session.
23
+ """
24
+ engine = create_engine(f"sqlite:///{DB_PATH}")
25
+ session_class = sessionmaker(bind=engine)
26
+ return session_class()
27
+
28
+
29
+ @cache
30
+ def get_settings_manager() -> SettingsManager:
31
+ """
32
+ Returns:
33
+ The singleton settings manager.
34
+
35
+ """
36
+ return SettingsManager(db_session=get_db_session())
37
+
38
+
39
+ def get_db_setting(key, default_value=None):
40
+ """Get a setting from the database with fallback to default value"""
41
+ try:
42
+ # Get settings manager which handles database access
43
+ value = get_settings_manager().get_setting(key)
44
+
45
+ if value is not None:
46
+ return value
47
+ except Exception as e:
48
+ logger.error(f"Error getting setting {key} from database: {e}")
49
+ return default_value
@@ -1,9 +1,9 @@
1
1
  # config/enums.py
2
- from enum import Enum, auto
2
+ from enum import Enum
3
+
3
4
 
4
5
  class KnowledgeAccumulationApproach(Enum):
5
6
  QUESTION = "QUESTION"
6
7
  ITERATION = "ITERATION"
7
8
  NO_KNOWLEDGE = "NO_KNOWLEDGE"
8
9
  MAX_NR_OF_CHARACTERS = "MAX_NR_OF_CHARACTERS"
9
-
@@ -1,4 +1,4 @@
1
- # utilties/llm_utils.py
1
+ # utilities/llm_utils.py
2
2
  """
3
3
  LLM utilities for Local Deep Research.
4
4
 
@@ -6,64 +6,75 @@ This module provides utility functions for working with language models
6
6
  when the user's llm_config.py is missing or incomplete.
7
7
  """
8
8
 
9
- import os
10
9
  import logging
11
- from typing import Dict, Any, Optional
10
+ import os
11
+ from typing import Any, Optional
12
12
 
13
13
  # Setup logging
14
14
  logger = logging.getLogger(__name__)
15
15
 
16
+
16
17
  def get_model(
17
18
  model_name: Optional[str] = None,
18
19
  model_type: Optional[str] = None,
19
20
  temperature: Optional[float] = None,
20
- **kwargs
21
+ **kwargs,
21
22
  ) -> Any:
22
23
  """
23
24
  Get a language model instance as fallback when llm_config.get_llm is not available.
24
-
25
+
25
26
  Args:
26
27
  model_name: Name of the model to use
27
28
  model_type: Type of the model provider
28
29
  temperature: Model temperature
29
30
  **kwargs: Additional parameters
30
-
31
+
31
32
  Returns:
32
33
  LangChain language model instance
33
34
  """
34
35
  # Get default values from kwargs or use reasonable defaults
35
- model_name = model_name or kwargs.get('DEFAULT_MODEL', 'mistral')
36
- model_type = model_type or kwargs.get('DEFAULT_MODEL_TYPE', 'ollama')
37
- temperature = temperature or kwargs.get('DEFAULT_TEMPERATURE', 0.7)
38
- max_tokens = kwargs.get('max_tokens', kwargs.get('MAX_TOKENS', 30000))
39
-
36
+ model_name = model_name or kwargs.get("DEFAULT_MODEL", "mistral")
37
+ model_type = model_type or kwargs.get("DEFAULT_MODEL_TYPE", "ollama")
38
+ temperature = temperature or kwargs.get("DEFAULT_TEMPERATURE", 0.7)
39
+ max_tokens = kwargs.get("max_tokens", kwargs.get("MAX_TOKENS", 30000))
40
+
40
41
  # Common parameters
41
42
  common_params = {
42
43
  "temperature": temperature,
43
44
  "max_tokens": max_tokens,
44
45
  }
45
-
46
+
46
47
  # Add additional kwargs
47
48
  for key, value in kwargs.items():
48
- if key not in ['DEFAULT_MODEL', 'DEFAULT_MODEL_TYPE', 'DEFAULT_TEMPERATURE', 'MAX_TOKENS']:
49
+ if key not in [
50
+ "DEFAULT_MODEL",
51
+ "DEFAULT_MODEL_TYPE",
52
+ "DEFAULT_TEMPERATURE",
53
+ "MAX_TOKENS",
54
+ ]:
49
55
  common_params[key] = value
50
-
56
+
51
57
  # Try to load the model based on type
52
58
  if model_type == "ollama":
53
59
  try:
54
60
  from langchain_ollama import ChatOllama
61
+
55
62
  return ChatOllama(model=model_name, **common_params)
56
63
  except ImportError:
57
64
  try:
58
65
  from langchain_community.llms import Ollama
66
+
59
67
  return Ollama(model=model_name, **common_params)
60
68
  except ImportError:
61
- logger.error("Neither langchain_ollama nor langchain_community.llms.Ollama available")
69
+ logger.error(
70
+ "Neither langchain_ollama nor langchain_community.llms.Ollama available"
71
+ )
62
72
  raise
63
-
73
+
64
74
  elif model_type == "openai":
65
75
  try:
66
76
  from langchain_openai import ChatOpenAI
77
+
67
78
  api_key = os.getenv("OPENAI_API_KEY")
68
79
  if not api_key:
69
80
  raise ValueError("OPENAI_API_KEY environment variable not set")
@@ -71,46 +82,69 @@ def get_model(
71
82
  except ImportError:
72
83
  logger.error("langchain_openai not available")
73
84
  raise
74
-
85
+
75
86
  elif model_type == "anthropic":
76
87
  try:
77
88
  from langchain_anthropic import ChatAnthropic
89
+
78
90
  api_key = os.getenv("ANTHROPIC_API_KEY")
79
91
  if not api_key:
80
92
  raise ValueError("ANTHROPIC_API_KEY environment variable not set")
81
- return ChatAnthropic(model=model_name, anthropic_api_key=api_key, **common_params)
93
+ return ChatAnthropic(
94
+ model=model_name, anthropic_api_key=api_key, **common_params
95
+ )
82
96
  except ImportError:
83
97
  logger.error("langchain_anthropic not available")
84
98
  raise
85
-
99
+
86
100
  elif model_type == "openai_endpoint":
87
101
  try:
88
102
  from langchain_openai import ChatOpenAI
103
+
89
104
  api_key = os.getenv("OPENAI_ENDPOINT_API_KEY")
90
105
  if not api_key:
91
106
  raise ValueError("OPENAI_ENDPOINT_API_KEY environment variable not set")
92
-
93
- endpoint_url = kwargs.get("OPENAI_ENDPOINT_URL", "https://openrouter.ai/api/v1")
94
-
95
- if model_name is None and not kwargs.get("OPENAI_ENDPOINT_REQUIRES_MODEL", True):
96
- return ChatOpenAI(api_key=api_key, openai_api_base=endpoint_url, **common_params)
107
+
108
+ endpoint_url = kwargs.get(
109
+ "OPENAI_ENDPOINT_URL", "https://openrouter.ai/api/v1"
110
+ )
111
+
112
+ if model_name is None and not kwargs.get(
113
+ "OPENAI_ENDPOINT_REQUIRES_MODEL", True
114
+ ):
115
+ return ChatOpenAI(
116
+ api_key=api_key, openai_api_base=endpoint_url, **common_params
117
+ )
97
118
  else:
98
- return ChatOpenAI(model=model_name, api_key=api_key, openai_api_base=endpoint_url, **common_params)
119
+ return ChatOpenAI(
120
+ model=model_name,
121
+ api_key=api_key,
122
+ openai_api_base=endpoint_url,
123
+ **common_params,
124
+ )
99
125
  except ImportError:
100
126
  logger.error("langchain_openai not available")
101
127
  raise
102
-
128
+
103
129
  # Default fallback
104
130
  try:
105
131
  from langchain_ollama import ChatOllama
132
+
106
133
  logger.warning(f"Unknown model type '{model_type}', defaulting to Ollama")
107
134
  return ChatOllama(model=model_name, **common_params)
108
135
  except (ImportError, Exception) as e:
109
136
  logger.error(f"Failed to load any model: {e}")
110
-
137
+
111
138
  # Last resort: create a dummy model
112
139
  try:
113
140
  from langchain_community.llms.fake import FakeListLLM
114
- return FakeListLLM(responses=["No language models are available. Please install Ollama or set up API keys."])
141
+
142
+ return FakeListLLM(
143
+ responses=[
144
+ "No language models are available. Please install Ollama or set up API keys."
145
+ ]
146
+ )
115
147
  except ImportError:
116
- raise ValueError("No language models available and could not create dummy model")
148
+ raise ValueError(
149
+ "No language models available and could not create dummy model"
150
+ )
@@ -0,0 +1,242 @@
1
+ import logging
2
+ import re
3
+ from typing import Dict, List
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+
8
+ def remove_think_tags(text: str) -> str:
9
+ text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()
10
+ return text
11
+
12
+
13
+ def extract_links_from_search_results(search_results: List[Dict]) -> List[Dict]:
14
+ """
15
+ Extracts links and titles from a list of search result dictionaries.
16
+
17
+ Each dictionary is expected to have at least the keys "title" and "link".
18
+
19
+ Returns a list of dictionaries with 'title' and 'url' keys.
20
+ """
21
+ links = []
22
+ if not search_results:
23
+ return links
24
+
25
+ for result in search_results:
26
+ try:
27
+ # Ensure we handle None values safely before calling strip()
28
+ title = result.get("title", "")
29
+ url = result.get("link", "")
30
+ index = result.get("index", "")
31
+
32
+ # Apply strip() only if the values are not None
33
+ title = title.strip() if title is not None else ""
34
+ url = url.strip() if url is not None else ""
35
+ index = index.strip() if index is not None else ""
36
+
37
+ if title and url:
38
+ links.append({"title": title, "url": url, "index": index})
39
+ except Exception as e:
40
+ # Log the specific error for debugging
41
+ logger.error(f"Error extracting link from result: {str(e)}")
42
+ continue
43
+ return links
44
+
45
+
46
+ def format_links(links: List[Dict]) -> str:
47
+ formatted_links = format_links_to_markdown(links)
48
+ return formatted_links
49
+
50
+
51
+ def format_links_to_markdown(all_links: List[Dict]) -> str:
52
+ formatted_text = ""
53
+ if all_links:
54
+
55
+ # Group links by URL and collect all their indices
56
+ url_to_indices = {}
57
+ for link in all_links:
58
+ url = link.get("url")
59
+ index = link.get("index", "")
60
+ if url:
61
+ if url not in url_to_indices:
62
+ url_to_indices[url] = []
63
+ url_to_indices[url].append(index)
64
+
65
+ # Format each unique URL with all its indices
66
+ seen_urls = set() # Initialize the set here
67
+ for link in all_links:
68
+ url = link.get("url")
69
+ title = link.get("title", "Untitled")
70
+ if url and url not in seen_urls:
71
+ # Get all indices for this URL
72
+ indices = set(url_to_indices[url])
73
+ # Format as [1, 3, 5] if multiple indices, or just [1] if single
74
+ indices_str = f"[{', '.join(map(str, indices))}]"
75
+ formatted_text += f"{indices_str} {title}\n URL: {url}\n\n"
76
+ seen_urls.add(url)
77
+
78
+ formatted_text += "\n"
79
+
80
+ return formatted_text
81
+
82
+
83
+ def format_findings(
84
+ findings_list: List[Dict],
85
+ synthesized_content: str,
86
+ questions_by_iteration: Dict[int, List[str]],
87
+ ) -> str:
88
+ """Format findings into a detailed text output.
89
+
90
+ Args:
91
+ findings_list: List of finding dictionaries
92
+ synthesized_content: The synthesized content from the LLM.
93
+ questions_by_iteration: Dictionary mapping iteration numbers to lists of questions
94
+
95
+ Returns:
96
+ str: Formatted text output
97
+ """
98
+ logger.info(
99
+ f"Inside format_findings utility. Findings count: {len(findings_list)}, Questions iterations: {len(questions_by_iteration)}"
100
+ )
101
+ formatted_text = ""
102
+
103
+ # Extract all sources from findings
104
+ all_links = []
105
+ for finding in findings_list:
106
+ search_results = finding.get("search_results", [])
107
+ if search_results:
108
+ try:
109
+ links = extract_links_from_search_results(search_results)
110
+ all_links.extend(links)
111
+ except Exception as link_err:
112
+ logger.error(f"Error processing search results/links: {link_err}")
113
+
114
+ # Start with the synthesized content (passed as synthesized_content)
115
+ formatted_text += f"{synthesized_content}\n\n"
116
+
117
+ # Add sources section after synthesized content if sources exist
118
+ formatted_text += format_links_to_markdown(all_links)
119
+
120
+ formatted_text += "\n\n" # Separator after synthesized content
121
+
122
+ # Add Search Questions by Iteration section
123
+ if questions_by_iteration:
124
+ formatted_text += "## SEARCH QUESTIONS BY ITERATION\n"
125
+ formatted_text += "\n"
126
+ for iter_num, questions in questions_by_iteration.items():
127
+ formatted_text += f"\n #### Iteration {iter_num}:\n"
128
+ for i, q in enumerate(questions, 1):
129
+ formatted_text += f"{i}. {q}\n"
130
+ formatted_text += "\n" + "\n\n"
131
+ else:
132
+ logger.warning("No questions by iteration found to format.")
133
+
134
+ # Add Detailed Findings section
135
+ if findings_list:
136
+ formatted_text += "## DETAILED FINDINGS\n\n"
137
+ logger.info(f"Formatting {len(findings_list)} detailed finding items.")
138
+
139
+ for idx, finding in enumerate(findings_list):
140
+ logger.debug(f"Formatting finding item {idx}. Keys: {list(finding.keys())}")
141
+ # Use .get() for safety
142
+ phase = finding.get("phase", "Unknown Phase")
143
+ content = finding.get("content", "No content available.")
144
+ search_results = finding.get("search_results", [])
145
+
146
+ # Phase header
147
+ formatted_text += "\n"
148
+ formatted_text += f"### {phase}\n"
149
+ formatted_text += "\n\n"
150
+
151
+ question_displayed = False
152
+ # If this is a follow-up phase, try to show the corresponding question
153
+ if isinstance(phase, str) and phase.startswith("Follow-up"):
154
+ try:
155
+ parts = phase.replace("Follow-up Iteration ", "").split(".")
156
+ if len(parts) == 2:
157
+ iteration = int(parts[0])
158
+ question_index = int(parts[1]) - 1
159
+ if (
160
+ iteration in questions_by_iteration
161
+ and 0
162
+ <= question_index
163
+ < len(questions_by_iteration[iteration])
164
+ ):
165
+ formatted_text += f"#### {questions_by_iteration[iteration][question_index]}\n\n"
166
+ question_displayed = True
167
+ else:
168
+ logger.warning(
169
+ f"Could not find matching question for phase: {phase}"
170
+ )
171
+ else:
172
+ logger.warning(
173
+ f"Could not parse iteration/index from phase: {phase}"
174
+ )
175
+ except ValueError:
176
+ logger.warning(
177
+ f"Could not parse iteration/index from phase: {phase}"
178
+ )
179
+ # Handle Sub-query phases from IterDRAG strategy
180
+ elif isinstance(phase, str) and phase.startswith("Sub-query"):
181
+ try:
182
+ # Extract the index number from "Sub-query X"
183
+ query_index = int(phase.replace("Sub-query ", "")) - 1
184
+ # In IterDRAG, sub-queries are stored in iteration 0
185
+ if 0 in questions_by_iteration and query_index < len(
186
+ questions_by_iteration[0]
187
+ ):
188
+ formatted_text += (
189
+ f"#### {questions_by_iteration[0][query_index]}\n\n"
190
+ )
191
+ question_displayed = True
192
+ else:
193
+ logger.warning(
194
+ f"Could not find matching question for phase: {phase}"
195
+ )
196
+ except ValueError:
197
+ logger.warning(
198
+ f"Could not parse question index from phase: {phase}"
199
+ )
200
+
201
+ # If the question is in the finding itself, display it
202
+ if not question_displayed and "question" in finding and finding["question"]:
203
+ formatted_text += f"### SEARCH QUESTION:\n{finding['question']}\n\n"
204
+
205
+ # Content
206
+ formatted_text += f"\n\n{content}\n\n"
207
+
208
+ # Search results if they exist
209
+ if search_results:
210
+ try:
211
+ links = extract_links_from_search_results(search_results)
212
+ if links:
213
+ formatted_text += "### SOURCES USED IN THIS SECTION:\n"
214
+ formatted_text += format_links(links) + "\n\n"
215
+ except Exception as link_err:
216
+ logger.error(
217
+ f"Error processing search results/links for finding {idx}: {link_err}"
218
+ )
219
+ else:
220
+ logger.debug(f"No search_results found for finding item {idx}.")
221
+
222
+ formatted_text += f"{'_' * 80}\n\n"
223
+ else:
224
+ logger.warning("No detailed findings found to format.")
225
+
226
+ # Add summary of all sources at the end
227
+ if all_links:
228
+ formatted_text += "## ALL SOURCES:\n"
229
+ formatted_text += format_links_to_markdown(all_links)
230
+ else:
231
+ logger.info("No unique sources found across all findings to list.")
232
+
233
+ logger.info("Finished format_findings utility.")
234
+ return formatted_text
235
+
236
+
237
+ def print_search_results(search_results):
238
+ formatted_text = ""
239
+ links = extract_links_from_search_results(search_results)
240
+ if links:
241
+ formatted_text = format_links(links=links)
242
+ logger.info(formatted_text)
@@ -1,6 +1,8 @@
1
1
  """Setup utilities (legacy wrapper)."""
2
2
 
3
+
3
4
  def setup_user_directories():
4
5
  """Set up directories and ensure config files exist."""
5
- from local_deep_research.config import init_config_files
6
- init_config_files()
6
+ from ..config.config_files import init_config_files
7
+
8
+ init_config_files()
@@ -1,2 +1 @@
1
1
  """Web interface for Local Deep Research"""
2
- from . import app