local-deep-research 0.1.26__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +23 -22
- local_deep_research/__main__.py +16 -0
- local_deep_research/advanced_search_system/__init__.py +7 -0
- local_deep_research/advanced_search_system/filters/__init__.py +8 -0
- local_deep_research/advanced_search_system/filters/base_filter.py +38 -0
- local_deep_research/advanced_search_system/filters/cross_engine_filter.py +200 -0
- local_deep_research/advanced_search_system/findings/base_findings.py +81 -0
- local_deep_research/advanced_search_system/findings/repository.py +452 -0
- local_deep_research/advanced_search_system/knowledge/__init__.py +1 -0
- local_deep_research/advanced_search_system/knowledge/base_knowledge.py +151 -0
- local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +159 -0
- local_deep_research/advanced_search_system/questions/__init__.py +1 -0
- local_deep_research/advanced_search_system/questions/base_question.py +64 -0
- local_deep_research/advanced_search_system/questions/decomposition_question.py +445 -0
- local_deep_research/advanced_search_system/questions/standard_question.py +119 -0
- local_deep_research/advanced_search_system/repositories/__init__.py +7 -0
- local_deep_research/advanced_search_system/strategies/__init__.py +1 -0
- local_deep_research/advanced_search_system/strategies/base_strategy.py +118 -0
- local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +450 -0
- local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +312 -0
- local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +270 -0
- local_deep_research/advanced_search_system/strategies/standard_strategy.py +300 -0
- local_deep_research/advanced_search_system/tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/base_tool.py +100 -0
- local_deep_research/advanced_search_system/tools/knowledge_tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/question_tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/search_tools/__init__.py +1 -0
- local_deep_research/api/__init__.py +5 -5
- local_deep_research/api/research_functions.py +96 -84
- local_deep_research/app.py +8 -0
- local_deep_research/citation_handler.py +25 -16
- local_deep_research/{config.py → config/config_files.py} +102 -110
- local_deep_research/config/llm_config.py +472 -0
- local_deep_research/config/search_config.py +77 -0
- local_deep_research/defaults/__init__.py +10 -5
- local_deep_research/defaults/main.toml +2 -2
- local_deep_research/defaults/search_engines.toml +60 -34
- local_deep_research/main.py +121 -19
- local_deep_research/migrate_db.py +147 -0
- local_deep_research/report_generator.py +72 -44
- local_deep_research/search_system.py +147 -283
- local_deep_research/setup_data_dir.py +35 -0
- local_deep_research/test_migration.py +178 -0
- local_deep_research/utilities/__init__.py +0 -0
- local_deep_research/utilities/db_utils.py +49 -0
- local_deep_research/{utilties → utilities}/enums.py +2 -2
- local_deep_research/{utilties → utilities}/llm_utils.py +63 -29
- local_deep_research/utilities/search_utilities.py +242 -0
- local_deep_research/{utilties → utilities}/setup_utils.py +4 -2
- local_deep_research/web/__init__.py +0 -1
- local_deep_research/web/app.py +86 -1709
- local_deep_research/web/app_factory.py +289 -0
- local_deep_research/web/database/README.md +70 -0
- local_deep_research/web/database/migrate_to_ldr_db.py +289 -0
- local_deep_research/web/database/migrations.py +447 -0
- local_deep_research/web/database/models.py +117 -0
- local_deep_research/web/database/schema_upgrade.py +107 -0
- local_deep_research/web/models/database.py +294 -0
- local_deep_research/web/models/settings.py +94 -0
- local_deep_research/web/routes/api_routes.py +559 -0
- local_deep_research/web/routes/history_routes.py +354 -0
- local_deep_research/web/routes/research_routes.py +715 -0
- local_deep_research/web/routes/settings_routes.py +1592 -0
- local_deep_research/web/services/research_service.py +947 -0
- local_deep_research/web/services/resource_service.py +149 -0
- local_deep_research/web/services/settings_manager.py +669 -0
- local_deep_research/web/services/settings_service.py +187 -0
- local_deep_research/web/services/socket_service.py +210 -0
- local_deep_research/web/static/css/custom_dropdown.css +277 -0
- local_deep_research/web/static/css/settings.css +1223 -0
- local_deep_research/web/static/css/styles.css +525 -48
- local_deep_research/web/static/js/components/custom_dropdown.js +428 -0
- local_deep_research/web/static/js/components/detail.js +348 -0
- local_deep_research/web/static/js/components/fallback/formatting.js +122 -0
- local_deep_research/web/static/js/components/fallback/ui.js +215 -0
- local_deep_research/web/static/js/components/history.js +487 -0
- local_deep_research/web/static/js/components/logpanel.js +949 -0
- local_deep_research/web/static/js/components/progress.js +1107 -0
- local_deep_research/web/static/js/components/research.js +1865 -0
- local_deep_research/web/static/js/components/results.js +766 -0
- local_deep_research/web/static/js/components/settings.js +3981 -0
- local_deep_research/web/static/js/components/settings_sync.js +106 -0
- local_deep_research/web/static/js/main.js +226 -0
- local_deep_research/web/static/js/services/api.js +253 -0
- local_deep_research/web/static/js/services/audio.js +31 -0
- local_deep_research/web/static/js/services/formatting.js +119 -0
- local_deep_research/web/static/js/services/pdf.js +622 -0
- local_deep_research/web/static/js/services/socket.js +882 -0
- local_deep_research/web/static/js/services/ui.js +546 -0
- local_deep_research/web/templates/base.html +72 -0
- local_deep_research/web/templates/components/custom_dropdown.html +47 -0
- local_deep_research/web/templates/components/log_panel.html +32 -0
- local_deep_research/web/templates/components/mobile_nav.html +22 -0
- local_deep_research/web/templates/components/settings_form.html +299 -0
- local_deep_research/web/templates/components/sidebar.html +21 -0
- local_deep_research/web/templates/pages/details.html +73 -0
- local_deep_research/web/templates/pages/history.html +51 -0
- local_deep_research/web/templates/pages/progress.html +57 -0
- local_deep_research/web/templates/pages/research.html +139 -0
- local_deep_research/web/templates/pages/results.html +59 -0
- local_deep_research/web/templates/settings_dashboard.html +78 -192
- local_deep_research/web/utils/__init__.py +0 -0
- local_deep_research/web/utils/formatters.py +76 -0
- local_deep_research/web_search_engines/engines/full_search.py +18 -16
- local_deep_research/web_search_engines/engines/meta_search_engine.py +182 -131
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +224 -139
- local_deep_research/web_search_engines/engines/search_engine_brave.py +88 -71
- local_deep_research/web_search_engines/engines/search_engine_ddg.py +48 -39
- local_deep_research/web_search_engines/engines/search_engine_github.py +415 -204
- local_deep_research/web_search_engines/engines/search_engine_google_pse.py +123 -90
- local_deep_research/web_search_engines/engines/search_engine_guardian.py +210 -157
- local_deep_research/web_search_engines/engines/search_engine_local.py +532 -369
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +42 -36
- local_deep_research/web_search_engines/engines/search_engine_pubmed.py +358 -266
- local_deep_research/web_search_engines/engines/search_engine_searxng.py +211 -159
- local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +213 -170
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +84 -68
- local_deep_research/web_search_engines/engines/search_engine_wayback.py +186 -154
- local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +115 -77
- local_deep_research/web_search_engines/search_engine_base.py +174 -99
- local_deep_research/web_search_engines/search_engine_factory.py +192 -102
- local_deep_research/web_search_engines/search_engines_config.py +22 -15
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/METADATA +177 -97
- local_deep_research-0.2.0.dist-info/RECORD +135 -0
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/WHEEL +1 -2
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/entry_points.txt +3 -0
- local_deep_research/defaults/llm_config.py +0 -338
- local_deep_research/utilties/search_utilities.py +0 -114
- local_deep_research/web/static/js/app.js +0 -3763
- local_deep_research/web/templates/api_keys_config.html +0 -82
- local_deep_research/web/templates/collections_config.html +0 -90
- local_deep_research/web/templates/index.html +0 -348
- local_deep_research/web/templates/llm_config.html +0 -120
- local_deep_research/web/templates/main_config.html +0 -89
- local_deep_research/web/templates/search_engines_config.html +0 -154
- local_deep_research/web/templates/settings.html +0 -519
- local_deep_research-0.1.26.dist-info/RECORD +0 -61
- local_deep_research-0.1.26.dist-info/top_level.txt +0 -1
- /local_deep_research/{utilties → config}/__init__.py +0 -0
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,289 @@
|
|
1
|
+
import logging
|
2
|
+
import os
|
3
|
+
from importlib import resources as importlib_resources
|
4
|
+
|
5
|
+
from flask import (
|
6
|
+
Flask,
|
7
|
+
jsonify,
|
8
|
+
make_response,
|
9
|
+
redirect,
|
10
|
+
request,
|
11
|
+
send_from_directory,
|
12
|
+
url_for,
|
13
|
+
)
|
14
|
+
from flask_socketio import SocketIO
|
15
|
+
from flask_wtf.csrf import CSRFProtect
|
16
|
+
|
17
|
+
from .models.database import DB_PATH, init_db
|
18
|
+
|
19
|
+
# Initialize logger
|
20
|
+
logger = logging.getLogger(__name__)
|
21
|
+
|
22
|
+
|
23
|
+
def create_app():
|
24
|
+
"""
|
25
|
+
Create and configure the Flask application.
|
26
|
+
|
27
|
+
Returns:
|
28
|
+
tuple: (app, socketio) - The configured Flask app and SocketIO instance
|
29
|
+
"""
|
30
|
+
# Configure logging
|
31
|
+
logging.basicConfig(level=logging.INFO)
|
32
|
+
|
33
|
+
# Set Werkzeug logger to WARNING level to suppress Socket.IO polling logs
|
34
|
+
logging.getLogger("werkzeug").setLevel(logging.WARNING)
|
35
|
+
|
36
|
+
try:
|
37
|
+
# Get directories based on package installation
|
38
|
+
PACKAGE_DIR = importlib_resources.files("local_deep_research") / "web"
|
39
|
+
with importlib_resources.as_file(PACKAGE_DIR) as package_dir:
|
40
|
+
STATIC_DIR = (package_dir / "static").as_posix()
|
41
|
+
TEMPLATE_DIR = (package_dir / "templates").as_posix()
|
42
|
+
|
43
|
+
# Initialize Flask app with package directories
|
44
|
+
app = Flask(__name__, static_folder=STATIC_DIR, template_folder=TEMPLATE_DIR)
|
45
|
+
print(f"Using package static path: {STATIC_DIR}")
|
46
|
+
print(f"Using package template path: {TEMPLATE_DIR}")
|
47
|
+
except Exception as e:
|
48
|
+
# Fallback for development
|
49
|
+
print(f"Package directories not found, using fallback paths: {str(e)}")
|
50
|
+
app = Flask(
|
51
|
+
__name__,
|
52
|
+
static_folder=os.path.abspath("static"),
|
53
|
+
template_folder=os.path.abspath("templates"),
|
54
|
+
)
|
55
|
+
|
56
|
+
# App configuration
|
57
|
+
app.config["SECRET_KEY"] = "deep-research-secret-key"
|
58
|
+
|
59
|
+
# Initialize CSRF protection
|
60
|
+
csrf = CSRFProtect(app)
|
61
|
+
# Exempt Socket.IO from CSRF protection
|
62
|
+
csrf.exempt("research.socket_io")
|
63
|
+
|
64
|
+
# Database configuration - Use unified ldr.db from the database module
|
65
|
+
db_path = DB_PATH
|
66
|
+
app.config["SQLALCHEMY_DATABASE_URI"] = f"sqlite:///{db_path}"
|
67
|
+
logger.info(f"Using database at {db_path}")
|
68
|
+
app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False
|
69
|
+
app.config["SQLALCHEMY_ECHO"] = False
|
70
|
+
|
71
|
+
# Initialize extensions
|
72
|
+
socketio = SocketIO(
|
73
|
+
app,
|
74
|
+
cors_allowed_origins="*",
|
75
|
+
async_mode="threading",
|
76
|
+
path="/research/socket.io",
|
77
|
+
logger=False,
|
78
|
+
engineio_logger=False,
|
79
|
+
ping_timeout=20,
|
80
|
+
ping_interval=5,
|
81
|
+
)
|
82
|
+
|
83
|
+
# Initialize the database
|
84
|
+
create_database(app)
|
85
|
+
init_db()
|
86
|
+
|
87
|
+
# Register socket service
|
88
|
+
from .services.socket_service import set_socketio
|
89
|
+
|
90
|
+
set_socketio(socketio)
|
91
|
+
|
92
|
+
# Apply middleware
|
93
|
+
apply_middleware(app)
|
94
|
+
|
95
|
+
# Register blueprints
|
96
|
+
register_blueprints(app)
|
97
|
+
|
98
|
+
# Register error handlers
|
99
|
+
register_error_handlers(app)
|
100
|
+
|
101
|
+
# Register socket event handlers
|
102
|
+
register_socket_events(socketio)
|
103
|
+
|
104
|
+
return app, socketio
|
105
|
+
|
106
|
+
|
107
|
+
def apply_middleware(app):
|
108
|
+
"""Apply middleware to the Flask app."""
|
109
|
+
|
110
|
+
# Add Content Security Policy headers to allow Socket.IO to function
|
111
|
+
@app.after_request
|
112
|
+
def add_security_headers(response):
|
113
|
+
# Define a permissive CSP for development that allows Socket.IO to function
|
114
|
+
csp = (
|
115
|
+
"default-src 'self'; "
|
116
|
+
"connect-src 'self' ws: wss: http: https:; "
|
117
|
+
"script-src 'self' 'unsafe-inline' 'unsafe-eval' cdnjs.cloudflare.com cdn.jsdelivr.net unpkg.com; "
|
118
|
+
"style-src 'self' 'unsafe-inline' cdnjs.cloudflare.com; "
|
119
|
+
"font-src 'self' cdnjs.cloudflare.com; "
|
120
|
+
"img-src 'self' data:; "
|
121
|
+
"worker-src blob:; "
|
122
|
+
"frame-src 'self';"
|
123
|
+
)
|
124
|
+
|
125
|
+
response.headers["Content-Security-Policy"] = csp
|
126
|
+
response.headers["X-Content-Security-Policy"] = csp
|
127
|
+
|
128
|
+
# Add CORS headers for API requests
|
129
|
+
if request.path.startswith("/api/"):
|
130
|
+
response.headers["Access-Control-Allow-Origin"] = "*"
|
131
|
+
response.headers["Access-Control-Allow-Methods"] = (
|
132
|
+
"GET, POST, DELETE, OPTIONS"
|
133
|
+
)
|
134
|
+
response.headers["Access-Control-Allow-Headers"] = "Content-Type"
|
135
|
+
|
136
|
+
return response
|
137
|
+
|
138
|
+
# Add a middleware layer to handle abrupt disconnections
|
139
|
+
@app.before_request
|
140
|
+
def handle_websocket_requests():
|
141
|
+
if request.path.startswith("/research/socket.io"):
|
142
|
+
try:
|
143
|
+
if not request.environ.get("werkzeug.socket"):
|
144
|
+
return
|
145
|
+
except Exception as e:
|
146
|
+
print(f"WebSocket preprocessing error: {e}")
|
147
|
+
# Return empty response to prevent further processing
|
148
|
+
return "", 200
|
149
|
+
|
150
|
+
|
151
|
+
def register_blueprints(app):
|
152
|
+
"""Register blueprints with the Flask app."""
|
153
|
+
|
154
|
+
# Import blueprints
|
155
|
+
from .routes.api_routes import api_bp # Import the API blueprint
|
156
|
+
from .routes.history_routes import history_bp
|
157
|
+
from .routes.research_routes import research_bp
|
158
|
+
from .routes.settings_routes import settings_bp
|
159
|
+
|
160
|
+
# Register blueprints
|
161
|
+
app.register_blueprint(research_bp)
|
162
|
+
app.register_blueprint(history_bp, url_prefix="/research/api")
|
163
|
+
app.register_blueprint(settings_bp)
|
164
|
+
app.register_blueprint(
|
165
|
+
api_bp, url_prefix="/research/api"
|
166
|
+
) # Register API blueprint with prefix
|
167
|
+
|
168
|
+
# Configure settings paths
|
169
|
+
# Import config inside the function to avoid circular dependencies
|
170
|
+
def configure_settings_routes():
|
171
|
+
try:
|
172
|
+
from ..config.config_files import SEARCH_ENGINES_FILE, get_config_dir
|
173
|
+
from .routes.settings_routes import set_config_paths
|
174
|
+
|
175
|
+
CONFIG_DIR = get_config_dir() / "config"
|
176
|
+
MAIN_CONFIG_FILE = CONFIG_DIR / "settings.toml"
|
177
|
+
LOCAL_COLLECTIONS_FILE = CONFIG_DIR / "local_collections.toml"
|
178
|
+
|
179
|
+
set_config_paths(
|
180
|
+
CONFIG_DIR,
|
181
|
+
SEARCH_ENGINES_FILE,
|
182
|
+
MAIN_CONFIG_FILE,
|
183
|
+
LOCAL_COLLECTIONS_FILE,
|
184
|
+
)
|
185
|
+
except Exception as e:
|
186
|
+
logger.error(f"Error configuring settings routes: {e}")
|
187
|
+
|
188
|
+
# Call this after all blueprints are registered
|
189
|
+
configure_settings_routes()
|
190
|
+
|
191
|
+
# Add root route redirect
|
192
|
+
@app.route("/")
|
193
|
+
def root_index():
|
194
|
+
return redirect(url_for("research.index"))
|
195
|
+
|
196
|
+
# Add favicon route
|
197
|
+
@app.route("/favicon.ico")
|
198
|
+
def favicon():
|
199
|
+
return send_from_directory(
|
200
|
+
app.static_folder, "favicon.ico", mimetype="image/x-icon"
|
201
|
+
)
|
202
|
+
|
203
|
+
# Add static route at the app level for compatibility
|
204
|
+
@app.route("/static/<path:path>")
|
205
|
+
def app_serve_static(path):
|
206
|
+
return send_from_directory(app.static_folder, path)
|
207
|
+
|
208
|
+
|
209
|
+
def register_error_handlers(app):
|
210
|
+
"""Register error handlers with the Flask app."""
|
211
|
+
|
212
|
+
@app.errorhandler(404)
|
213
|
+
def not_found(error):
|
214
|
+
return make_response(jsonify({"error": "Not found"}), 404)
|
215
|
+
|
216
|
+
@app.errorhandler(500)
|
217
|
+
def server_error(error):
|
218
|
+
return make_response(jsonify({"error": "Server error"}), 500)
|
219
|
+
|
220
|
+
|
221
|
+
def register_socket_events(socketio):
|
222
|
+
"""Register Socket.IO event handlers."""
|
223
|
+
|
224
|
+
from .routes.research_routes import get_globals
|
225
|
+
from .services.socket_service import (
|
226
|
+
handle_connect,
|
227
|
+
handle_default_error,
|
228
|
+
handle_disconnect,
|
229
|
+
handle_socket_error,
|
230
|
+
handle_subscribe,
|
231
|
+
)
|
232
|
+
|
233
|
+
@socketio.on("connect")
|
234
|
+
def on_connect():
|
235
|
+
handle_connect(request)
|
236
|
+
|
237
|
+
@socketio.on("disconnect")
|
238
|
+
def on_disconnect():
|
239
|
+
handle_disconnect(request)
|
240
|
+
|
241
|
+
@socketio.on("subscribe_to_research")
|
242
|
+
def on_subscribe(data):
|
243
|
+
globals_dict = get_globals()
|
244
|
+
active_research = globals_dict.get("active_research", {})
|
245
|
+
handle_subscribe(data, request, active_research)
|
246
|
+
|
247
|
+
@socketio.on_error
|
248
|
+
def on_error(e):
|
249
|
+
return handle_socket_error(e)
|
250
|
+
|
251
|
+
@socketio.on_error_default
|
252
|
+
def on_default_error(e):
|
253
|
+
return handle_default_error(e)
|
254
|
+
|
255
|
+
|
256
|
+
def create_database(app):
|
257
|
+
"""
|
258
|
+
Create the database and tables for the application.
|
259
|
+
"""
|
260
|
+
from sqlalchemy import create_engine
|
261
|
+
from sqlalchemy.orm import scoped_session, sessionmaker
|
262
|
+
|
263
|
+
from .database.migrations import run_migrations, setup_predefined_settings
|
264
|
+
from .database.models import Base
|
265
|
+
|
266
|
+
# Configure SQLite to use URI mode, which allows for relative file paths
|
267
|
+
engine = create_engine(
|
268
|
+
app.config["SQLALCHEMY_DATABASE_URI"],
|
269
|
+
echo=app.config.get("SQLALCHEMY_ECHO", False),
|
270
|
+
connect_args={"check_same_thread": False},
|
271
|
+
)
|
272
|
+
|
273
|
+
app.engine = engine
|
274
|
+
|
275
|
+
# Create all tables
|
276
|
+
Base.metadata.create_all(engine)
|
277
|
+
|
278
|
+
# Configure session factory
|
279
|
+
session_factory = sessionmaker(bind=engine, autocommit=False, autoflush=False)
|
280
|
+
app.db_session = scoped_session(session_factory)
|
281
|
+
|
282
|
+
# Run migrations and setup predefined settings
|
283
|
+
run_migrations(engine, app.db_session)
|
284
|
+
setup_predefined_settings(app.db_session)
|
285
|
+
|
286
|
+
# Add teardown context
|
287
|
+
@app.teardown_appcontext
|
288
|
+
def remove_session(exception=None):
|
289
|
+
app.db_session.remove()
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# Database Architecture
|
2
|
+
|
3
|
+
## Overview
|
4
|
+
|
5
|
+
Local Deep Research now uses a unified database architecture with a single SQLite database file (`ldr.db`) that replaces the previous split database approach (`deep_research.db` and `research_history.db`).
|
6
|
+
|
7
|
+
The database is located at `src/data/ldr.db` within the project directory structure.
|
8
|
+
|
9
|
+
## Database-First Settings
|
10
|
+
|
11
|
+
The application now follows a "database-first" approach for settings:
|
12
|
+
|
13
|
+
1. All settings are stored in the database, in the `settings` table
|
14
|
+
2. Settings from TOML files are used only as fallbacks if a setting doesn't exist in the database
|
15
|
+
3. The web UI settings page modifies the database values directly
|
16
|
+
|
17
|
+
## Migration
|
18
|
+
|
19
|
+
If you have existing data in the legacy databases, you need to migrate it to the new unified database.
|
20
|
+
|
21
|
+
### Automatic Migration
|
22
|
+
|
23
|
+
When you start the application for the first time after updating, it will check if migration is needed:
|
24
|
+
|
25
|
+
1. If legacy databases exist and `ldr.db` doesn't exist, you'll see a warning message
|
26
|
+
2. You can run migration using the command: `python -m src.local_deep_research.main --migrate-db`
|
27
|
+
3. Alternatively, start the application with auto-migration: `python -m src.local_deep_research.main --auto-migrate`
|
28
|
+
|
29
|
+
### Manual Migration
|
30
|
+
|
31
|
+
If automatic migration doesn't work, you can:
|
32
|
+
|
33
|
+
1. Run the migration script directly: `python -m src.local_deep_research.web.database.migrate_to_ldr_db`
|
34
|
+
2. Check migration results in the log output
|
35
|
+
|
36
|
+
### Schema Upgrades
|
37
|
+
|
38
|
+
If you have already migrated your database but need to update its schema:
|
39
|
+
|
40
|
+
1. The application automatically runs schema upgrades on startup
|
41
|
+
2. You can manually run schema upgrades with: `python -m src.local_deep_research.main --schema-upgrade`
|
42
|
+
3. Current schema upgrades include:
|
43
|
+
- Removing the redundant `research_log` table (consolidated into `research_logs`)
|
44
|
+
|
45
|
+
## Database Schema
|
46
|
+
|
47
|
+
The unified database contains:
|
48
|
+
|
49
|
+
* `research_history` - Research history entries (from research_history.db)
|
50
|
+
* `research_logs` - Consolidated logs for all research activities (merged from research_history.db)
|
51
|
+
* `research_resources` - Resources found during research (from research_history.db)
|
52
|
+
* `settings` - Application settings (from deep_research.db)
|
53
|
+
* `research` - Research data (from deep_research.db)
|
54
|
+
* `research_report` - Generated research reports (from deep_research.db)
|
55
|
+
|
56
|
+
## Rollback
|
57
|
+
|
58
|
+
If you need to roll back to the previous database architecture:
|
59
|
+
|
60
|
+
1. Keep backup copies of your original `deep_research.db` and `research_history.db` files
|
61
|
+
2. In case of issues, restore them and modify the database paths in the code
|
62
|
+
|
63
|
+
## Troubleshooting
|
64
|
+
|
65
|
+
If you encounter issues with database migration:
|
66
|
+
|
67
|
+
1. Check the application logs for detailed error messages
|
68
|
+
2. Ensure you have write permissions to the data directory
|
69
|
+
3. Make sure SQLite is functioning properly
|
70
|
+
4. If necessary, start with a fresh database by removing `ldr.db`
|
@@ -0,0 +1,289 @@
|
|
1
|
+
"""
|
2
|
+
Migration script to merge deep_research.db and research_history.db into ldr.db
|
3
|
+
"""
|
4
|
+
|
5
|
+
# Standard library imports
|
6
|
+
# import json # Remove unused imports
|
7
|
+
import logging
|
8
|
+
import os
|
9
|
+
import sqlite3
|
10
|
+
import sys
|
11
|
+
import traceback
|
12
|
+
|
13
|
+
# from pathlib import Path # Remove unused imports
|
14
|
+
|
15
|
+
# Set up logging
|
16
|
+
logging.basicConfig(level=logging.INFO)
|
17
|
+
logger = logging.getLogger(__name__)
|
18
|
+
|
19
|
+
# Add the parent directory to sys.path to allow relative imports
|
20
|
+
sys.path.append(
|
21
|
+
os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
22
|
+
)
|
23
|
+
|
24
|
+
# Import the database module after adding to sys path
|
25
|
+
# pylint: disable=wrong-import-position
|
26
|
+
from src.local_deep_research.web.models.database import ( # noqa: E402
|
27
|
+
DB_PATH,
|
28
|
+
LEGACY_DEEP_RESEARCH_DB,
|
29
|
+
LEGACY_RESEARCH_HISTORY_DB,
|
30
|
+
)
|
31
|
+
|
32
|
+
|
33
|
+
def migrate_to_ldr_db():
|
34
|
+
"""
|
35
|
+
Migrates data from deep_research.db and research_history.db to ldr.db
|
36
|
+
"""
|
37
|
+
# Ensure data directory exists
|
38
|
+
try:
|
39
|
+
from src.local_deep_research.setup_data_dir import setup_data_dir
|
40
|
+
|
41
|
+
setup_data_dir()
|
42
|
+
except ImportError:
|
43
|
+
# If we can't import directly, check the path manually
|
44
|
+
logger.info("Creating data directory manually")
|
45
|
+
data_dir = os.path.dirname(DB_PATH)
|
46
|
+
os.makedirs(data_dir, exist_ok=True)
|
47
|
+
|
48
|
+
logger.info(f"Using database path: {DB_PATH}")
|
49
|
+
|
50
|
+
# Check if ldr.db already exists
|
51
|
+
if os.path.exists(DB_PATH):
|
52
|
+
logger.info(f"Target database {DB_PATH} already exists")
|
53
|
+
|
54
|
+
# Ask for confirmation
|
55
|
+
if (
|
56
|
+
input(
|
57
|
+
f"Target database {DB_PATH} already exists. Do you want to continue migration? (y/n): "
|
58
|
+
).lower()
|
59
|
+
!= "y"
|
60
|
+
):
|
61
|
+
logger.info("Migration aborted by user")
|
62
|
+
return False
|
63
|
+
|
64
|
+
# Connect to the target database
|
65
|
+
try:
|
66
|
+
ldr_conn = sqlite3.connect(DB_PATH)
|
67
|
+
ldr_cursor = ldr_conn.cursor()
|
68
|
+
logger.info(f"Connected to target database: {DB_PATH}")
|
69
|
+
except Exception as e:
|
70
|
+
logger.error(f"Failed to connect to target database: {e}")
|
71
|
+
return False
|
72
|
+
|
73
|
+
# Enable foreign keys
|
74
|
+
ldr_cursor.execute("PRAGMA foreign_keys = OFF")
|
75
|
+
|
76
|
+
# Initialize the database schema
|
77
|
+
try:
|
78
|
+
from src.local_deep_research.web.models.database import init_db
|
79
|
+
|
80
|
+
init_db()
|
81
|
+
logger.info("Initialized database schema")
|
82
|
+
except Exception as e:
|
83
|
+
logger.error(f"Failed to initialize database schema: {e}")
|
84
|
+
ldr_conn.close()
|
85
|
+
return False
|
86
|
+
|
87
|
+
# Migrate from research_history.db
|
88
|
+
migrated_research = migrate_research_history_db(
|
89
|
+
ldr_conn, LEGACY_RESEARCH_HISTORY_DB
|
90
|
+
)
|
91
|
+
|
92
|
+
# Migrate from deep_research.db
|
93
|
+
migrated_deep_research = migrate_deep_research_db(ldr_conn, LEGACY_DEEP_RESEARCH_DB)
|
94
|
+
|
95
|
+
# Re-enable foreign keys and commit
|
96
|
+
ldr_cursor.execute("PRAGMA foreign_keys = ON")
|
97
|
+
ldr_conn.commit()
|
98
|
+
ldr_conn.close()
|
99
|
+
|
100
|
+
logger.info(
|
101
|
+
f"Migration completed - Research History: {migrated_research}, Deep Research: {migrated_deep_research}"
|
102
|
+
)
|
103
|
+
return True
|
104
|
+
|
105
|
+
|
106
|
+
def migrate_research_history_db(ldr_conn, legacy_path):
|
107
|
+
"""
|
108
|
+
Migrates data from research_history.db to ldr.db
|
109
|
+
|
110
|
+
Args:
|
111
|
+
ldr_conn: Connection to the target ldr.db
|
112
|
+
legacy_path: Path to legacy research_history.db
|
113
|
+
|
114
|
+
Returns:
|
115
|
+
bool: True if migration was successful, False otherwise
|
116
|
+
"""
|
117
|
+
if not os.path.exists(legacy_path):
|
118
|
+
logger.warning(f"Legacy database not found: {legacy_path}")
|
119
|
+
return False
|
120
|
+
|
121
|
+
try:
|
122
|
+
# Connect to legacy database
|
123
|
+
legacy_conn = sqlite3.connect(legacy_path)
|
124
|
+
legacy_cursor = legacy_conn.cursor()
|
125
|
+
ldr_cursor = ldr_conn.cursor()
|
126
|
+
|
127
|
+
logger.info(f"Connected to legacy database: {legacy_path}")
|
128
|
+
|
129
|
+
# Get tables from legacy database
|
130
|
+
legacy_cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
|
131
|
+
tables = [row[0] for row in legacy_cursor.fetchall()]
|
132
|
+
|
133
|
+
for table in tables:
|
134
|
+
# Skip sqlite internal tables
|
135
|
+
if table.startswith("sqlite_"):
|
136
|
+
continue
|
137
|
+
|
138
|
+
logger.info(f"Migrating table: {table}")
|
139
|
+
|
140
|
+
# Check if table exists in target database
|
141
|
+
ldr_cursor.execute(
|
142
|
+
f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table}'"
|
143
|
+
)
|
144
|
+
if not ldr_cursor.fetchone():
|
145
|
+
# Create the table in the target database
|
146
|
+
legacy_cursor.execute(
|
147
|
+
f"SELECT sql FROM sqlite_master WHERE type='table' AND name='{table}'"
|
148
|
+
)
|
149
|
+
create_sql = legacy_cursor.fetchone()[0]
|
150
|
+
logger.info(f"Creating table {table} with SQL: {create_sql}")
|
151
|
+
ldr_cursor.execute(create_sql)
|
152
|
+
logger.info(f"Created table {table} in target database")
|
153
|
+
|
154
|
+
# Get column names
|
155
|
+
legacy_cursor.execute(f"PRAGMA table_info({table})")
|
156
|
+
columns = [row[1] for row in legacy_cursor.fetchall()]
|
157
|
+
|
158
|
+
# Get all data from legacy table
|
159
|
+
legacy_cursor.execute(f"SELECT * FROM {table}")
|
160
|
+
rows = legacy_cursor.fetchall()
|
161
|
+
|
162
|
+
logger.info(f"Found {len(rows)} rows in {table}")
|
163
|
+
|
164
|
+
if rows:
|
165
|
+
# Create placeholders for the SQL query
|
166
|
+
placeholders = ", ".join(["?" for _ in columns])
|
167
|
+
columns_str = ", ".join(columns)
|
168
|
+
|
169
|
+
# Insert data into target database
|
170
|
+
for row in rows:
|
171
|
+
try:
|
172
|
+
ldr_cursor.execute(
|
173
|
+
f"INSERT OR IGNORE INTO {table} ({columns_str}) VALUES ({placeholders})",
|
174
|
+
row,
|
175
|
+
)
|
176
|
+
except sqlite3.Error as e:
|
177
|
+
logger.error(f"Error inserting into {table}: {e}")
|
178
|
+
logger.error(f"Row data: {row}")
|
179
|
+
continue
|
180
|
+
|
181
|
+
# Verify data was inserted
|
182
|
+
ldr_cursor.execute(f"SELECT COUNT(*) FROM {table}")
|
183
|
+
count = ldr_cursor.fetchone()[0]
|
184
|
+
logger.info(f"Migrated {count} rows to {table} (expected {len(rows)})")
|
185
|
+
else:
|
186
|
+
logger.info(f"No data to migrate from {table}")
|
187
|
+
|
188
|
+
legacy_conn.close()
|
189
|
+
return True
|
190
|
+
|
191
|
+
except Exception as e:
|
192
|
+
logger.error(f"Failed to migrate from {legacy_path}: {e}")
|
193
|
+
logger.error(f"Exception details: {traceback.format_exc()}")
|
194
|
+
return False
|
195
|
+
|
196
|
+
|
197
|
+
def migrate_deep_research_db(ldr_conn, legacy_path):
|
198
|
+
"""
|
199
|
+
Migrates data from deep_research.db to ldr.db
|
200
|
+
|
201
|
+
Args:
|
202
|
+
ldr_conn: Connection to the target ldr.db
|
203
|
+
legacy_path: Path to legacy deep_research.db
|
204
|
+
|
205
|
+
Returns:
|
206
|
+
bool: True if migration was successful, False otherwise
|
207
|
+
"""
|
208
|
+
if not os.path.exists(legacy_path):
|
209
|
+
logger.warning(f"Legacy database not found: {legacy_path}")
|
210
|
+
return False
|
211
|
+
|
212
|
+
try:
|
213
|
+
# Connect to legacy database
|
214
|
+
legacy_conn = sqlite3.connect(legacy_path)
|
215
|
+
legacy_cursor = legacy_conn.cursor()
|
216
|
+
ldr_cursor = ldr_conn.cursor()
|
217
|
+
|
218
|
+
logger.info(f"Connected to legacy database: {legacy_path}")
|
219
|
+
|
220
|
+
# Get tables from legacy database
|
221
|
+
legacy_cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
|
222
|
+
tables = [row[0] for row in legacy_cursor.fetchall()]
|
223
|
+
|
224
|
+
# Migrate each table
|
225
|
+
for table in tables:
|
226
|
+
# Skip sqlite internal tables
|
227
|
+
if table.startswith("sqlite_"):
|
228
|
+
continue
|
229
|
+
|
230
|
+
# Skip the research_log table as it's redundant with research_logs
|
231
|
+
if table == "research_log":
|
232
|
+
logger.info(
|
233
|
+
"Skipping redundant table 'research_log', using 'research_logs' instead"
|
234
|
+
)
|
235
|
+
continue
|
236
|
+
|
237
|
+
logger.info(f"Migrating table: {table}")
|
238
|
+
|
239
|
+
# Check if table exists in target database
|
240
|
+
ldr_cursor.execute(
|
241
|
+
f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table}'"
|
242
|
+
)
|
243
|
+
if not ldr_cursor.fetchone():
|
244
|
+
# Create the table in the target database
|
245
|
+
legacy_cursor.execute(
|
246
|
+
f"SELECT sql FROM sqlite_master WHERE type='table' AND name='{table}'"
|
247
|
+
)
|
248
|
+
create_sql = legacy_cursor.fetchone()[0]
|
249
|
+
ldr_cursor.execute(create_sql)
|
250
|
+
logger.info(f"Created table {table} in target database")
|
251
|
+
|
252
|
+
# Get column names
|
253
|
+
legacy_cursor.execute(f"PRAGMA table_info({table})")
|
254
|
+
columns = [row[1] for row in legacy_cursor.fetchall()]
|
255
|
+
|
256
|
+
# Get all data from legacy table
|
257
|
+
legacy_cursor.execute(f"SELECT * FROM {table}")
|
258
|
+
rows = legacy_cursor.fetchall()
|
259
|
+
|
260
|
+
if rows:
|
261
|
+
# Create placeholders for the SQL query
|
262
|
+
placeholders = ", ".join(["?" for _ in columns])
|
263
|
+
columns_str = ", ".join(columns)
|
264
|
+
|
265
|
+
# Insert data into target database
|
266
|
+
for row in rows:
|
267
|
+
try:
|
268
|
+
ldr_cursor.execute(
|
269
|
+
f"INSERT OR IGNORE INTO {table} ({columns_str}) VALUES ({placeholders})",
|
270
|
+
row,
|
271
|
+
)
|
272
|
+
except sqlite3.Error as e:
|
273
|
+
logger.error(f"Error inserting into {table}: {e}")
|
274
|
+
continue
|
275
|
+
|
276
|
+
logger.info(f"Migrated {len(rows)} rows from {table}")
|
277
|
+
else:
|
278
|
+
logger.info(f"No data to migrate from {table}")
|
279
|
+
|
280
|
+
legacy_conn.close()
|
281
|
+
return True
|
282
|
+
|
283
|
+
except Exception as e:
|
284
|
+
logger.error(f"Failed to migrate from {legacy_path}: {e}")
|
285
|
+
return False
|
286
|
+
|
287
|
+
|
288
|
+
if __name__ == "__main__":
|
289
|
+
migrate_to_ldr_db()
|