local-deep-research 0.1.26__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. local_deep_research/__init__.py +23 -22
  2. local_deep_research/__main__.py +16 -0
  3. local_deep_research/advanced_search_system/__init__.py +7 -0
  4. local_deep_research/advanced_search_system/filters/__init__.py +8 -0
  5. local_deep_research/advanced_search_system/filters/base_filter.py +38 -0
  6. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +200 -0
  7. local_deep_research/advanced_search_system/findings/base_findings.py +81 -0
  8. local_deep_research/advanced_search_system/findings/repository.py +452 -0
  9. local_deep_research/advanced_search_system/knowledge/__init__.py +1 -0
  10. local_deep_research/advanced_search_system/knowledge/base_knowledge.py +151 -0
  11. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +159 -0
  12. local_deep_research/advanced_search_system/questions/__init__.py +1 -0
  13. local_deep_research/advanced_search_system/questions/base_question.py +64 -0
  14. local_deep_research/advanced_search_system/questions/decomposition_question.py +445 -0
  15. local_deep_research/advanced_search_system/questions/standard_question.py +119 -0
  16. local_deep_research/advanced_search_system/repositories/__init__.py +7 -0
  17. local_deep_research/advanced_search_system/strategies/__init__.py +1 -0
  18. local_deep_research/advanced_search_system/strategies/base_strategy.py +118 -0
  19. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +450 -0
  20. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +312 -0
  21. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +270 -0
  22. local_deep_research/advanced_search_system/strategies/standard_strategy.py +300 -0
  23. local_deep_research/advanced_search_system/tools/__init__.py +1 -0
  24. local_deep_research/advanced_search_system/tools/base_tool.py +100 -0
  25. local_deep_research/advanced_search_system/tools/knowledge_tools/__init__.py +1 -0
  26. local_deep_research/advanced_search_system/tools/question_tools/__init__.py +1 -0
  27. local_deep_research/advanced_search_system/tools/search_tools/__init__.py +1 -0
  28. local_deep_research/api/__init__.py +5 -5
  29. local_deep_research/api/research_functions.py +96 -84
  30. local_deep_research/app.py +8 -0
  31. local_deep_research/citation_handler.py +25 -16
  32. local_deep_research/{config.py → config/config_files.py} +102 -110
  33. local_deep_research/config/llm_config.py +472 -0
  34. local_deep_research/config/search_config.py +77 -0
  35. local_deep_research/defaults/__init__.py +10 -5
  36. local_deep_research/defaults/main.toml +2 -2
  37. local_deep_research/defaults/search_engines.toml +60 -34
  38. local_deep_research/main.py +121 -19
  39. local_deep_research/migrate_db.py +147 -0
  40. local_deep_research/report_generator.py +72 -44
  41. local_deep_research/search_system.py +147 -283
  42. local_deep_research/setup_data_dir.py +35 -0
  43. local_deep_research/test_migration.py +178 -0
  44. local_deep_research/utilities/__init__.py +0 -0
  45. local_deep_research/utilities/db_utils.py +49 -0
  46. local_deep_research/{utilties → utilities}/enums.py +2 -2
  47. local_deep_research/{utilties → utilities}/llm_utils.py +63 -29
  48. local_deep_research/utilities/search_utilities.py +242 -0
  49. local_deep_research/{utilties → utilities}/setup_utils.py +4 -2
  50. local_deep_research/web/__init__.py +0 -1
  51. local_deep_research/web/app.py +86 -1709
  52. local_deep_research/web/app_factory.py +289 -0
  53. local_deep_research/web/database/README.md +70 -0
  54. local_deep_research/web/database/migrate_to_ldr_db.py +289 -0
  55. local_deep_research/web/database/migrations.py +447 -0
  56. local_deep_research/web/database/models.py +117 -0
  57. local_deep_research/web/database/schema_upgrade.py +107 -0
  58. local_deep_research/web/models/database.py +294 -0
  59. local_deep_research/web/models/settings.py +94 -0
  60. local_deep_research/web/routes/api_routes.py +559 -0
  61. local_deep_research/web/routes/history_routes.py +354 -0
  62. local_deep_research/web/routes/research_routes.py +715 -0
  63. local_deep_research/web/routes/settings_routes.py +1592 -0
  64. local_deep_research/web/services/research_service.py +947 -0
  65. local_deep_research/web/services/resource_service.py +149 -0
  66. local_deep_research/web/services/settings_manager.py +669 -0
  67. local_deep_research/web/services/settings_service.py +187 -0
  68. local_deep_research/web/services/socket_service.py +210 -0
  69. local_deep_research/web/static/css/custom_dropdown.css +277 -0
  70. local_deep_research/web/static/css/settings.css +1223 -0
  71. local_deep_research/web/static/css/styles.css +525 -48
  72. local_deep_research/web/static/js/components/custom_dropdown.js +428 -0
  73. local_deep_research/web/static/js/components/detail.js +348 -0
  74. local_deep_research/web/static/js/components/fallback/formatting.js +122 -0
  75. local_deep_research/web/static/js/components/fallback/ui.js +215 -0
  76. local_deep_research/web/static/js/components/history.js +487 -0
  77. local_deep_research/web/static/js/components/logpanel.js +949 -0
  78. local_deep_research/web/static/js/components/progress.js +1107 -0
  79. local_deep_research/web/static/js/components/research.js +1865 -0
  80. local_deep_research/web/static/js/components/results.js +766 -0
  81. local_deep_research/web/static/js/components/settings.js +3981 -0
  82. local_deep_research/web/static/js/components/settings_sync.js +106 -0
  83. local_deep_research/web/static/js/main.js +226 -0
  84. local_deep_research/web/static/js/services/api.js +253 -0
  85. local_deep_research/web/static/js/services/audio.js +31 -0
  86. local_deep_research/web/static/js/services/formatting.js +119 -0
  87. local_deep_research/web/static/js/services/pdf.js +622 -0
  88. local_deep_research/web/static/js/services/socket.js +882 -0
  89. local_deep_research/web/static/js/services/ui.js +546 -0
  90. local_deep_research/web/templates/base.html +72 -0
  91. local_deep_research/web/templates/components/custom_dropdown.html +47 -0
  92. local_deep_research/web/templates/components/log_panel.html +32 -0
  93. local_deep_research/web/templates/components/mobile_nav.html +22 -0
  94. local_deep_research/web/templates/components/settings_form.html +299 -0
  95. local_deep_research/web/templates/components/sidebar.html +21 -0
  96. local_deep_research/web/templates/pages/details.html +73 -0
  97. local_deep_research/web/templates/pages/history.html +51 -0
  98. local_deep_research/web/templates/pages/progress.html +57 -0
  99. local_deep_research/web/templates/pages/research.html +139 -0
  100. local_deep_research/web/templates/pages/results.html +59 -0
  101. local_deep_research/web/templates/settings_dashboard.html +78 -192
  102. local_deep_research/web/utils/__init__.py +0 -0
  103. local_deep_research/web/utils/formatters.py +76 -0
  104. local_deep_research/web_search_engines/engines/full_search.py +18 -16
  105. local_deep_research/web_search_engines/engines/meta_search_engine.py +182 -131
  106. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +224 -139
  107. local_deep_research/web_search_engines/engines/search_engine_brave.py +88 -71
  108. local_deep_research/web_search_engines/engines/search_engine_ddg.py +48 -39
  109. local_deep_research/web_search_engines/engines/search_engine_github.py +415 -204
  110. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +123 -90
  111. local_deep_research/web_search_engines/engines/search_engine_guardian.py +210 -157
  112. local_deep_research/web_search_engines/engines/search_engine_local.py +532 -369
  113. local_deep_research/web_search_engines/engines/search_engine_local_all.py +42 -36
  114. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +358 -266
  115. local_deep_research/web_search_engines/engines/search_engine_searxng.py +211 -159
  116. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +213 -170
  117. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +84 -68
  118. local_deep_research/web_search_engines/engines/search_engine_wayback.py +186 -154
  119. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +115 -77
  120. local_deep_research/web_search_engines/search_engine_base.py +174 -99
  121. local_deep_research/web_search_engines/search_engine_factory.py +192 -102
  122. local_deep_research/web_search_engines/search_engines_config.py +22 -15
  123. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/METADATA +177 -97
  124. local_deep_research-0.2.0.dist-info/RECORD +135 -0
  125. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/WHEEL +1 -2
  126. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/entry_points.txt +3 -0
  127. local_deep_research/defaults/llm_config.py +0 -338
  128. local_deep_research/utilties/search_utilities.py +0 -114
  129. local_deep_research/web/static/js/app.js +0 -3763
  130. local_deep_research/web/templates/api_keys_config.html +0 -82
  131. local_deep_research/web/templates/collections_config.html +0 -90
  132. local_deep_research/web/templates/index.html +0 -348
  133. local_deep_research/web/templates/llm_config.html +0 -120
  134. local_deep_research/web/templates/main_config.html +0 -89
  135. local_deep_research/web/templates/search_engines_config.html +0 -154
  136. local_deep_research/web/templates/settings.html +0 -519
  137. local_deep_research-0.1.26.dist-info/RECORD +0 -61
  138. local_deep_research-0.1.26.dist-info/top_level.txt +0 -1
  139. /local_deep_research/{utilties → config}/__init__.py +0 -0
  140. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,289 @@
1
+ import logging
2
+ import os
3
+ from importlib import resources as importlib_resources
4
+
5
+ from flask import (
6
+ Flask,
7
+ jsonify,
8
+ make_response,
9
+ redirect,
10
+ request,
11
+ send_from_directory,
12
+ url_for,
13
+ )
14
+ from flask_socketio import SocketIO
15
+ from flask_wtf.csrf import CSRFProtect
16
+
17
+ from .models.database import DB_PATH, init_db
18
+
19
+ # Initialize logger
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ def create_app():
24
+ """
25
+ Create and configure the Flask application.
26
+
27
+ Returns:
28
+ tuple: (app, socketio) - The configured Flask app and SocketIO instance
29
+ """
30
+ # Configure logging
31
+ logging.basicConfig(level=logging.INFO)
32
+
33
+ # Set Werkzeug logger to WARNING level to suppress Socket.IO polling logs
34
+ logging.getLogger("werkzeug").setLevel(logging.WARNING)
35
+
36
+ try:
37
+ # Get directories based on package installation
38
+ PACKAGE_DIR = importlib_resources.files("local_deep_research") / "web"
39
+ with importlib_resources.as_file(PACKAGE_DIR) as package_dir:
40
+ STATIC_DIR = (package_dir / "static").as_posix()
41
+ TEMPLATE_DIR = (package_dir / "templates").as_posix()
42
+
43
+ # Initialize Flask app with package directories
44
+ app = Flask(__name__, static_folder=STATIC_DIR, template_folder=TEMPLATE_DIR)
45
+ print(f"Using package static path: {STATIC_DIR}")
46
+ print(f"Using package template path: {TEMPLATE_DIR}")
47
+ except Exception as e:
48
+ # Fallback for development
49
+ print(f"Package directories not found, using fallback paths: {str(e)}")
50
+ app = Flask(
51
+ __name__,
52
+ static_folder=os.path.abspath("static"),
53
+ template_folder=os.path.abspath("templates"),
54
+ )
55
+
56
+ # App configuration
57
+ app.config["SECRET_KEY"] = "deep-research-secret-key"
58
+
59
+ # Initialize CSRF protection
60
+ csrf = CSRFProtect(app)
61
+ # Exempt Socket.IO from CSRF protection
62
+ csrf.exempt("research.socket_io")
63
+
64
+ # Database configuration - Use unified ldr.db from the database module
65
+ db_path = DB_PATH
66
+ app.config["SQLALCHEMY_DATABASE_URI"] = f"sqlite:///{db_path}"
67
+ logger.info(f"Using database at {db_path}")
68
+ app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False
69
+ app.config["SQLALCHEMY_ECHO"] = False
70
+
71
+ # Initialize extensions
72
+ socketio = SocketIO(
73
+ app,
74
+ cors_allowed_origins="*",
75
+ async_mode="threading",
76
+ path="/research/socket.io",
77
+ logger=False,
78
+ engineio_logger=False,
79
+ ping_timeout=20,
80
+ ping_interval=5,
81
+ )
82
+
83
+ # Initialize the database
84
+ create_database(app)
85
+ init_db()
86
+
87
+ # Register socket service
88
+ from .services.socket_service import set_socketio
89
+
90
+ set_socketio(socketio)
91
+
92
+ # Apply middleware
93
+ apply_middleware(app)
94
+
95
+ # Register blueprints
96
+ register_blueprints(app)
97
+
98
+ # Register error handlers
99
+ register_error_handlers(app)
100
+
101
+ # Register socket event handlers
102
+ register_socket_events(socketio)
103
+
104
+ return app, socketio
105
+
106
+
107
+ def apply_middleware(app):
108
+ """Apply middleware to the Flask app."""
109
+
110
+ # Add Content Security Policy headers to allow Socket.IO to function
111
+ @app.after_request
112
+ def add_security_headers(response):
113
+ # Define a permissive CSP for development that allows Socket.IO to function
114
+ csp = (
115
+ "default-src 'self'; "
116
+ "connect-src 'self' ws: wss: http: https:; "
117
+ "script-src 'self' 'unsafe-inline' 'unsafe-eval' cdnjs.cloudflare.com cdn.jsdelivr.net unpkg.com; "
118
+ "style-src 'self' 'unsafe-inline' cdnjs.cloudflare.com; "
119
+ "font-src 'self' cdnjs.cloudflare.com; "
120
+ "img-src 'self' data:; "
121
+ "worker-src blob:; "
122
+ "frame-src 'self';"
123
+ )
124
+
125
+ response.headers["Content-Security-Policy"] = csp
126
+ response.headers["X-Content-Security-Policy"] = csp
127
+
128
+ # Add CORS headers for API requests
129
+ if request.path.startswith("/api/"):
130
+ response.headers["Access-Control-Allow-Origin"] = "*"
131
+ response.headers["Access-Control-Allow-Methods"] = (
132
+ "GET, POST, DELETE, OPTIONS"
133
+ )
134
+ response.headers["Access-Control-Allow-Headers"] = "Content-Type"
135
+
136
+ return response
137
+
138
+ # Add a middleware layer to handle abrupt disconnections
139
+ @app.before_request
140
+ def handle_websocket_requests():
141
+ if request.path.startswith("/research/socket.io"):
142
+ try:
143
+ if not request.environ.get("werkzeug.socket"):
144
+ return
145
+ except Exception as e:
146
+ print(f"WebSocket preprocessing error: {e}")
147
+ # Return empty response to prevent further processing
148
+ return "", 200
149
+
150
+
151
+ def register_blueprints(app):
152
+ """Register blueprints with the Flask app."""
153
+
154
+ # Import blueprints
155
+ from .routes.api_routes import api_bp # Import the API blueprint
156
+ from .routes.history_routes import history_bp
157
+ from .routes.research_routes import research_bp
158
+ from .routes.settings_routes import settings_bp
159
+
160
+ # Register blueprints
161
+ app.register_blueprint(research_bp)
162
+ app.register_blueprint(history_bp, url_prefix="/research/api")
163
+ app.register_blueprint(settings_bp)
164
+ app.register_blueprint(
165
+ api_bp, url_prefix="/research/api"
166
+ ) # Register API blueprint with prefix
167
+
168
+ # Configure settings paths
169
+ # Import config inside the function to avoid circular dependencies
170
+ def configure_settings_routes():
171
+ try:
172
+ from ..config.config_files import SEARCH_ENGINES_FILE, get_config_dir
173
+ from .routes.settings_routes import set_config_paths
174
+
175
+ CONFIG_DIR = get_config_dir() / "config"
176
+ MAIN_CONFIG_FILE = CONFIG_DIR / "settings.toml"
177
+ LOCAL_COLLECTIONS_FILE = CONFIG_DIR / "local_collections.toml"
178
+
179
+ set_config_paths(
180
+ CONFIG_DIR,
181
+ SEARCH_ENGINES_FILE,
182
+ MAIN_CONFIG_FILE,
183
+ LOCAL_COLLECTIONS_FILE,
184
+ )
185
+ except Exception as e:
186
+ logger.error(f"Error configuring settings routes: {e}")
187
+
188
+ # Call this after all blueprints are registered
189
+ configure_settings_routes()
190
+
191
+ # Add root route redirect
192
+ @app.route("/")
193
+ def root_index():
194
+ return redirect(url_for("research.index"))
195
+
196
+ # Add favicon route
197
+ @app.route("/favicon.ico")
198
+ def favicon():
199
+ return send_from_directory(
200
+ app.static_folder, "favicon.ico", mimetype="image/x-icon"
201
+ )
202
+
203
+ # Add static route at the app level for compatibility
204
+ @app.route("/static/<path:path>")
205
+ def app_serve_static(path):
206
+ return send_from_directory(app.static_folder, path)
207
+
208
+
209
+ def register_error_handlers(app):
210
+ """Register error handlers with the Flask app."""
211
+
212
+ @app.errorhandler(404)
213
+ def not_found(error):
214
+ return make_response(jsonify({"error": "Not found"}), 404)
215
+
216
+ @app.errorhandler(500)
217
+ def server_error(error):
218
+ return make_response(jsonify({"error": "Server error"}), 500)
219
+
220
+
221
+ def register_socket_events(socketio):
222
+ """Register Socket.IO event handlers."""
223
+
224
+ from .routes.research_routes import get_globals
225
+ from .services.socket_service import (
226
+ handle_connect,
227
+ handle_default_error,
228
+ handle_disconnect,
229
+ handle_socket_error,
230
+ handle_subscribe,
231
+ )
232
+
233
+ @socketio.on("connect")
234
+ def on_connect():
235
+ handle_connect(request)
236
+
237
+ @socketio.on("disconnect")
238
+ def on_disconnect():
239
+ handle_disconnect(request)
240
+
241
+ @socketio.on("subscribe_to_research")
242
+ def on_subscribe(data):
243
+ globals_dict = get_globals()
244
+ active_research = globals_dict.get("active_research", {})
245
+ handle_subscribe(data, request, active_research)
246
+
247
+ @socketio.on_error
248
+ def on_error(e):
249
+ return handle_socket_error(e)
250
+
251
+ @socketio.on_error_default
252
+ def on_default_error(e):
253
+ return handle_default_error(e)
254
+
255
+
256
+ def create_database(app):
257
+ """
258
+ Create the database and tables for the application.
259
+ """
260
+ from sqlalchemy import create_engine
261
+ from sqlalchemy.orm import scoped_session, sessionmaker
262
+
263
+ from .database.migrations import run_migrations, setup_predefined_settings
264
+ from .database.models import Base
265
+
266
+ # Configure SQLite to use URI mode, which allows for relative file paths
267
+ engine = create_engine(
268
+ app.config["SQLALCHEMY_DATABASE_URI"],
269
+ echo=app.config.get("SQLALCHEMY_ECHO", False),
270
+ connect_args={"check_same_thread": False},
271
+ )
272
+
273
+ app.engine = engine
274
+
275
+ # Create all tables
276
+ Base.metadata.create_all(engine)
277
+
278
+ # Configure session factory
279
+ session_factory = sessionmaker(bind=engine, autocommit=False, autoflush=False)
280
+ app.db_session = scoped_session(session_factory)
281
+
282
+ # Run migrations and setup predefined settings
283
+ run_migrations(engine, app.db_session)
284
+ setup_predefined_settings(app.db_session)
285
+
286
+ # Add teardown context
287
+ @app.teardown_appcontext
288
+ def remove_session(exception=None):
289
+ app.db_session.remove()
@@ -0,0 +1,70 @@
1
+ # Database Architecture
2
+
3
+ ## Overview
4
+
5
+ Local Deep Research now uses a unified database architecture with a single SQLite database file (`ldr.db`) that replaces the previous split database approach (`deep_research.db` and `research_history.db`).
6
+
7
+ The database is located at `src/data/ldr.db` within the project directory structure.
8
+
9
+ ## Database-First Settings
10
+
11
+ The application now follows a "database-first" approach for settings:
12
+
13
+ 1. All settings are stored in the database, in the `settings` table
14
+ 2. Settings from TOML files are used only as fallbacks if a setting doesn't exist in the database
15
+ 3. The web UI settings page modifies the database values directly
16
+
17
+ ## Migration
18
+
19
+ If you have existing data in the legacy databases, you need to migrate it to the new unified database.
20
+
21
+ ### Automatic Migration
22
+
23
+ When you start the application for the first time after updating, it will check if migration is needed:
24
+
25
+ 1. If legacy databases exist and `ldr.db` doesn't exist, you'll see a warning message
26
+ 2. You can run migration using the command: `python -m src.local_deep_research.main --migrate-db`
27
+ 3. Alternatively, start the application with auto-migration: `python -m src.local_deep_research.main --auto-migrate`
28
+
29
+ ### Manual Migration
30
+
31
+ If automatic migration doesn't work, you can:
32
+
33
+ 1. Run the migration script directly: `python -m src.local_deep_research.web.database.migrate_to_ldr_db`
34
+ 2. Check migration results in the log output
35
+
36
+ ### Schema Upgrades
37
+
38
+ If you have already migrated your database but need to update its schema:
39
+
40
+ 1. The application automatically runs schema upgrades on startup
41
+ 2. You can manually run schema upgrades with: `python -m src.local_deep_research.main --schema-upgrade`
42
+ 3. Current schema upgrades include:
43
+ - Removing the redundant `research_log` table (consolidated into `research_logs`)
44
+
45
+ ## Database Schema
46
+
47
+ The unified database contains:
48
+
49
+ * `research_history` - Research history entries (from research_history.db)
50
+ * `research_logs` - Consolidated logs for all research activities (merged from research_history.db)
51
+ * `research_resources` - Resources found during research (from research_history.db)
52
+ * `settings` - Application settings (from deep_research.db)
53
+ * `research` - Research data (from deep_research.db)
54
+ * `research_report` - Generated research reports (from deep_research.db)
55
+
56
+ ## Rollback
57
+
58
+ If you need to roll back to the previous database architecture:
59
+
60
+ 1. Keep backup copies of your original `deep_research.db` and `research_history.db` files
61
+ 2. In case of issues, restore them and modify the database paths in the code
62
+
63
+ ## Troubleshooting
64
+
65
+ If you encounter issues with database migration:
66
+
67
+ 1. Check the application logs for detailed error messages
68
+ 2. Ensure you have write permissions to the data directory
69
+ 3. Make sure SQLite is functioning properly
70
+ 4. If necessary, start with a fresh database by removing `ldr.db`
@@ -0,0 +1,289 @@
1
+ """
2
+ Migration script to merge deep_research.db and research_history.db into ldr.db
3
+ """
4
+
5
+ # Standard library imports
6
+ # import json # Remove unused imports
7
+ import logging
8
+ import os
9
+ import sqlite3
10
+ import sys
11
+ import traceback
12
+
13
+ # from pathlib import Path # Remove unused imports
14
+
15
+ # Set up logging
16
+ logging.basicConfig(level=logging.INFO)
17
+ logger = logging.getLogger(__name__)
18
+
19
+ # Add the parent directory to sys.path to allow relative imports
20
+ sys.path.append(
21
+ os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
22
+ )
23
+
24
+ # Import the database module after adding to sys path
25
+ # pylint: disable=wrong-import-position
26
+ from src.local_deep_research.web.models.database import ( # noqa: E402
27
+ DB_PATH,
28
+ LEGACY_DEEP_RESEARCH_DB,
29
+ LEGACY_RESEARCH_HISTORY_DB,
30
+ )
31
+
32
+
33
+ def migrate_to_ldr_db():
34
+ """
35
+ Migrates data from deep_research.db and research_history.db to ldr.db
36
+ """
37
+ # Ensure data directory exists
38
+ try:
39
+ from src.local_deep_research.setup_data_dir import setup_data_dir
40
+
41
+ setup_data_dir()
42
+ except ImportError:
43
+ # If we can't import directly, check the path manually
44
+ logger.info("Creating data directory manually")
45
+ data_dir = os.path.dirname(DB_PATH)
46
+ os.makedirs(data_dir, exist_ok=True)
47
+
48
+ logger.info(f"Using database path: {DB_PATH}")
49
+
50
+ # Check if ldr.db already exists
51
+ if os.path.exists(DB_PATH):
52
+ logger.info(f"Target database {DB_PATH} already exists")
53
+
54
+ # Ask for confirmation
55
+ if (
56
+ input(
57
+ f"Target database {DB_PATH} already exists. Do you want to continue migration? (y/n): "
58
+ ).lower()
59
+ != "y"
60
+ ):
61
+ logger.info("Migration aborted by user")
62
+ return False
63
+
64
+ # Connect to the target database
65
+ try:
66
+ ldr_conn = sqlite3.connect(DB_PATH)
67
+ ldr_cursor = ldr_conn.cursor()
68
+ logger.info(f"Connected to target database: {DB_PATH}")
69
+ except Exception as e:
70
+ logger.error(f"Failed to connect to target database: {e}")
71
+ return False
72
+
73
+ # Enable foreign keys
74
+ ldr_cursor.execute("PRAGMA foreign_keys = OFF")
75
+
76
+ # Initialize the database schema
77
+ try:
78
+ from src.local_deep_research.web.models.database import init_db
79
+
80
+ init_db()
81
+ logger.info("Initialized database schema")
82
+ except Exception as e:
83
+ logger.error(f"Failed to initialize database schema: {e}")
84
+ ldr_conn.close()
85
+ return False
86
+
87
+ # Migrate from research_history.db
88
+ migrated_research = migrate_research_history_db(
89
+ ldr_conn, LEGACY_RESEARCH_HISTORY_DB
90
+ )
91
+
92
+ # Migrate from deep_research.db
93
+ migrated_deep_research = migrate_deep_research_db(ldr_conn, LEGACY_DEEP_RESEARCH_DB)
94
+
95
+ # Re-enable foreign keys and commit
96
+ ldr_cursor.execute("PRAGMA foreign_keys = ON")
97
+ ldr_conn.commit()
98
+ ldr_conn.close()
99
+
100
+ logger.info(
101
+ f"Migration completed - Research History: {migrated_research}, Deep Research: {migrated_deep_research}"
102
+ )
103
+ return True
104
+
105
+
106
+ def migrate_research_history_db(ldr_conn, legacy_path):
107
+ """
108
+ Migrates data from research_history.db to ldr.db
109
+
110
+ Args:
111
+ ldr_conn: Connection to the target ldr.db
112
+ legacy_path: Path to legacy research_history.db
113
+
114
+ Returns:
115
+ bool: True if migration was successful, False otherwise
116
+ """
117
+ if not os.path.exists(legacy_path):
118
+ logger.warning(f"Legacy database not found: {legacy_path}")
119
+ return False
120
+
121
+ try:
122
+ # Connect to legacy database
123
+ legacy_conn = sqlite3.connect(legacy_path)
124
+ legacy_cursor = legacy_conn.cursor()
125
+ ldr_cursor = ldr_conn.cursor()
126
+
127
+ logger.info(f"Connected to legacy database: {legacy_path}")
128
+
129
+ # Get tables from legacy database
130
+ legacy_cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
131
+ tables = [row[0] for row in legacy_cursor.fetchall()]
132
+
133
+ for table in tables:
134
+ # Skip sqlite internal tables
135
+ if table.startswith("sqlite_"):
136
+ continue
137
+
138
+ logger.info(f"Migrating table: {table}")
139
+
140
+ # Check if table exists in target database
141
+ ldr_cursor.execute(
142
+ f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table}'"
143
+ )
144
+ if not ldr_cursor.fetchone():
145
+ # Create the table in the target database
146
+ legacy_cursor.execute(
147
+ f"SELECT sql FROM sqlite_master WHERE type='table' AND name='{table}'"
148
+ )
149
+ create_sql = legacy_cursor.fetchone()[0]
150
+ logger.info(f"Creating table {table} with SQL: {create_sql}")
151
+ ldr_cursor.execute(create_sql)
152
+ logger.info(f"Created table {table} in target database")
153
+
154
+ # Get column names
155
+ legacy_cursor.execute(f"PRAGMA table_info({table})")
156
+ columns = [row[1] for row in legacy_cursor.fetchall()]
157
+
158
+ # Get all data from legacy table
159
+ legacy_cursor.execute(f"SELECT * FROM {table}")
160
+ rows = legacy_cursor.fetchall()
161
+
162
+ logger.info(f"Found {len(rows)} rows in {table}")
163
+
164
+ if rows:
165
+ # Create placeholders for the SQL query
166
+ placeholders = ", ".join(["?" for _ in columns])
167
+ columns_str = ", ".join(columns)
168
+
169
+ # Insert data into target database
170
+ for row in rows:
171
+ try:
172
+ ldr_cursor.execute(
173
+ f"INSERT OR IGNORE INTO {table} ({columns_str}) VALUES ({placeholders})",
174
+ row,
175
+ )
176
+ except sqlite3.Error as e:
177
+ logger.error(f"Error inserting into {table}: {e}")
178
+ logger.error(f"Row data: {row}")
179
+ continue
180
+
181
+ # Verify data was inserted
182
+ ldr_cursor.execute(f"SELECT COUNT(*) FROM {table}")
183
+ count = ldr_cursor.fetchone()[0]
184
+ logger.info(f"Migrated {count} rows to {table} (expected {len(rows)})")
185
+ else:
186
+ logger.info(f"No data to migrate from {table}")
187
+
188
+ legacy_conn.close()
189
+ return True
190
+
191
+ except Exception as e:
192
+ logger.error(f"Failed to migrate from {legacy_path}: {e}")
193
+ logger.error(f"Exception details: {traceback.format_exc()}")
194
+ return False
195
+
196
+
197
+ def migrate_deep_research_db(ldr_conn, legacy_path):
198
+ """
199
+ Migrates data from deep_research.db to ldr.db
200
+
201
+ Args:
202
+ ldr_conn: Connection to the target ldr.db
203
+ legacy_path: Path to legacy deep_research.db
204
+
205
+ Returns:
206
+ bool: True if migration was successful, False otherwise
207
+ """
208
+ if not os.path.exists(legacy_path):
209
+ logger.warning(f"Legacy database not found: {legacy_path}")
210
+ return False
211
+
212
+ try:
213
+ # Connect to legacy database
214
+ legacy_conn = sqlite3.connect(legacy_path)
215
+ legacy_cursor = legacy_conn.cursor()
216
+ ldr_cursor = ldr_conn.cursor()
217
+
218
+ logger.info(f"Connected to legacy database: {legacy_path}")
219
+
220
+ # Get tables from legacy database
221
+ legacy_cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
222
+ tables = [row[0] for row in legacy_cursor.fetchall()]
223
+
224
+ # Migrate each table
225
+ for table in tables:
226
+ # Skip sqlite internal tables
227
+ if table.startswith("sqlite_"):
228
+ continue
229
+
230
+ # Skip the research_log table as it's redundant with research_logs
231
+ if table == "research_log":
232
+ logger.info(
233
+ "Skipping redundant table 'research_log', using 'research_logs' instead"
234
+ )
235
+ continue
236
+
237
+ logger.info(f"Migrating table: {table}")
238
+
239
+ # Check if table exists in target database
240
+ ldr_cursor.execute(
241
+ f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table}'"
242
+ )
243
+ if not ldr_cursor.fetchone():
244
+ # Create the table in the target database
245
+ legacy_cursor.execute(
246
+ f"SELECT sql FROM sqlite_master WHERE type='table' AND name='{table}'"
247
+ )
248
+ create_sql = legacy_cursor.fetchone()[0]
249
+ ldr_cursor.execute(create_sql)
250
+ logger.info(f"Created table {table} in target database")
251
+
252
+ # Get column names
253
+ legacy_cursor.execute(f"PRAGMA table_info({table})")
254
+ columns = [row[1] for row in legacy_cursor.fetchall()]
255
+
256
+ # Get all data from legacy table
257
+ legacy_cursor.execute(f"SELECT * FROM {table}")
258
+ rows = legacy_cursor.fetchall()
259
+
260
+ if rows:
261
+ # Create placeholders for the SQL query
262
+ placeholders = ", ".join(["?" for _ in columns])
263
+ columns_str = ", ".join(columns)
264
+
265
+ # Insert data into target database
266
+ for row in rows:
267
+ try:
268
+ ldr_cursor.execute(
269
+ f"INSERT OR IGNORE INTO {table} ({columns_str}) VALUES ({placeholders})",
270
+ row,
271
+ )
272
+ except sqlite3.Error as e:
273
+ logger.error(f"Error inserting into {table}: {e}")
274
+ continue
275
+
276
+ logger.info(f"Migrated {len(rows)} rows from {table}")
277
+ else:
278
+ logger.info(f"No data to migrate from {table}")
279
+
280
+ legacy_conn.close()
281
+ return True
282
+
283
+ except Exception as e:
284
+ logger.error(f"Failed to migrate from {legacy_path}: {e}")
285
+ return False
286
+
287
+
288
+ if __name__ == "__main__":
289
+ migrate_to_ldr_db()