local-deep-research 0.5.7__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. local_deep_research/__version__.py +1 -1
  2. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +11 -1
  3. local_deep_research/advanced_search_system/questions/browsecomp_question.py +32 -6
  4. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +33 -8
  5. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +2 -0
  6. local_deep_research/api/__init__.py +2 -0
  7. local_deep_research/api/research_functions.py +177 -3
  8. local_deep_research/benchmarks/graders.py +150 -5
  9. local_deep_research/benchmarks/models/__init__.py +19 -0
  10. local_deep_research/benchmarks/models/benchmark_models.py +283 -0
  11. local_deep_research/benchmarks/ui/__init__.py +1 -0
  12. local_deep_research/benchmarks/web_api/__init__.py +6 -0
  13. local_deep_research/benchmarks/web_api/benchmark_routes.py +862 -0
  14. local_deep_research/benchmarks/web_api/benchmark_service.py +920 -0
  15. local_deep_research/config/llm_config.py +106 -21
  16. local_deep_research/defaults/default_settings.json +448 -3
  17. local_deep_research/error_handling/report_generator.py +10 -0
  18. local_deep_research/llm/__init__.py +19 -0
  19. local_deep_research/llm/llm_registry.py +155 -0
  20. local_deep_research/metrics/db_models.py +3 -7
  21. local_deep_research/metrics/search_tracker.py +25 -11
  22. local_deep_research/report_generator.py +3 -2
  23. local_deep_research/search_system.py +12 -9
  24. local_deep_research/utilities/log_utils.py +23 -10
  25. local_deep_research/utilities/thread_context.py +99 -0
  26. local_deep_research/web/app_factory.py +32 -8
  27. local_deep_research/web/database/benchmark_schema.py +230 -0
  28. local_deep_research/web/database/convert_research_id_to_string.py +161 -0
  29. local_deep_research/web/database/models.py +55 -1
  30. local_deep_research/web/database/schema_upgrade.py +397 -2
  31. local_deep_research/web/database/uuid_migration.py +265 -0
  32. local_deep_research/web/routes/api_routes.py +62 -31
  33. local_deep_research/web/routes/history_routes.py +13 -6
  34. local_deep_research/web/routes/metrics_routes.py +264 -4
  35. local_deep_research/web/routes/research_routes.py +45 -18
  36. local_deep_research/web/routes/route_registry.py +352 -0
  37. local_deep_research/web/routes/settings_routes.py +382 -22
  38. local_deep_research/web/services/research_service.py +22 -29
  39. local_deep_research/web/services/settings_manager.py +53 -0
  40. local_deep_research/web/services/settings_service.py +2 -0
  41. local_deep_research/web/static/css/styles.css +8 -0
  42. local_deep_research/web/static/js/components/detail.js +7 -14
  43. local_deep_research/web/static/js/components/details.js +8 -10
  44. local_deep_research/web/static/js/components/fallback/ui.js +4 -4
  45. local_deep_research/web/static/js/components/history.js +6 -6
  46. local_deep_research/web/static/js/components/logpanel.js +14 -11
  47. local_deep_research/web/static/js/components/progress.js +51 -46
  48. local_deep_research/web/static/js/components/research.js +250 -89
  49. local_deep_research/web/static/js/components/results.js +5 -7
  50. local_deep_research/web/static/js/components/settings.js +32 -26
  51. local_deep_research/web/static/js/components/settings_sync.js +24 -23
  52. local_deep_research/web/static/js/config/urls.js +285 -0
  53. local_deep_research/web/static/js/main.js +8 -8
  54. local_deep_research/web/static/js/research_form.js +267 -12
  55. local_deep_research/web/static/js/services/api.js +18 -18
  56. local_deep_research/web/static/js/services/keyboard.js +8 -8
  57. local_deep_research/web/static/js/services/socket.js +53 -35
  58. local_deep_research/web/static/js/services/ui.js +1 -1
  59. local_deep_research/web/templates/base.html +4 -1
  60. local_deep_research/web/templates/components/custom_dropdown.html +5 -3
  61. local_deep_research/web/templates/components/mobile_nav.html +3 -3
  62. local_deep_research/web/templates/components/sidebar.html +9 -3
  63. local_deep_research/web/templates/pages/benchmark.html +2697 -0
  64. local_deep_research/web/templates/pages/benchmark_results.html +1136 -0
  65. local_deep_research/web/templates/pages/benchmark_simple.html +453 -0
  66. local_deep_research/web/templates/pages/cost_analytics.html +1 -1
  67. local_deep_research/web/templates/pages/metrics.html +212 -39
  68. local_deep_research/web/templates/pages/research.html +8 -6
  69. local_deep_research/web/templates/pages/star_reviews.html +1 -1
  70. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +14 -1
  71. local_deep_research/web_search_engines/engines/search_engine_brave.py +15 -1
  72. local_deep_research/web_search_engines/engines/search_engine_ddg.py +20 -1
  73. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +26 -2
  74. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +15 -1
  75. local_deep_research/web_search_engines/engines/search_engine_retriever.py +192 -0
  76. local_deep_research/web_search_engines/engines/search_engine_tavily.py +307 -0
  77. local_deep_research/web_search_engines/rate_limiting/__init__.py +14 -0
  78. local_deep_research/web_search_engines/rate_limiting/__main__.py +9 -0
  79. local_deep_research/web_search_engines/rate_limiting/cli.py +209 -0
  80. local_deep_research/web_search_engines/rate_limiting/exceptions.py +21 -0
  81. local_deep_research/web_search_engines/rate_limiting/tracker.py +506 -0
  82. local_deep_research/web_search_engines/retriever_registry.py +108 -0
  83. local_deep_research/web_search_engines/search_engine_base.py +161 -43
  84. local_deep_research/web_search_engines/search_engine_factory.py +14 -0
  85. local_deep_research/web_search_engines/search_engines_config.py +20 -0
  86. local_deep_research-0.6.0.dist-info/METADATA +374 -0
  87. {local_deep_research-0.5.7.dist-info → local_deep_research-0.6.0.dist-info}/RECORD +90 -65
  88. local_deep_research-0.5.7.dist-info/METADATA +0 -420
  89. {local_deep_research-0.5.7.dist-info → local_deep_research-0.6.0.dist-info}/WHEEL +0 -0
  90. {local_deep_research-0.5.7.dist-info → local_deep_research-0.6.0.dist-info}/entry_points.txt +0 -0
  91. {local_deep_research-0.5.7.dist-info → local_deep_research-0.6.0.dist-info}/licenses/LICENSE +0 -0
@@ -7,7 +7,7 @@ import os
7
7
  import sys
8
8
 
9
9
  from loguru import logger
10
- from sqlalchemy import create_engine, inspect
10
+ from sqlalchemy import create_engine, inspect, text
11
11
 
12
12
  # Add the parent directory to sys.path to allow relative imports
13
13
  sys.path.append(
@@ -24,7 +24,7 @@ except ImportError:
24
24
  )
25
25
  DB_PATH = os.path.join(project_root, "src", "data", "ldr.db")
26
26
 
27
- from .models import Base, ResearchStrategy
27
+ from .models import Base, ResearchStrategy, RateLimitAttempt, RateLimitEstimate
28
28
 
29
29
 
30
30
  def remove_research_log_table(engine):
@@ -87,6 +87,383 @@ def create_research_strategy_table(engine):
87
87
  return False
88
88
 
89
89
 
90
+ def create_benchmark_tables(engine):
91
+ """
92
+ Create benchmark tables if they don't exist
93
+
94
+ Args:
95
+ engine: SQLAlchemy engine
96
+
97
+ Returns:
98
+ bool: True if operation was successful, False otherwise
99
+ """
100
+ try:
101
+ from .benchmark_schema import create_benchmark_tables_simple
102
+
103
+ inspector = inspect(engine)
104
+
105
+ # Check if benchmark tables already exist
106
+ if not inspector.has_table("benchmark_runs"):
107
+ # Create all benchmark tables using simple schema
108
+ create_benchmark_tables_simple(engine)
109
+ logger.info("Successfully created benchmark tables")
110
+ return True
111
+ else:
112
+ logger.info("Benchmark tables already exist, no action needed")
113
+ return True
114
+ except Exception:
115
+ logger.exception("Error creating benchmark tables")
116
+ return False
117
+
118
+
119
+ def create_rate_limiting_tables(engine):
120
+ """
121
+ Create rate limiting tables if they don't exist
122
+
123
+ Args:
124
+ engine: SQLAlchemy engine
125
+
126
+ Returns:
127
+ bool: True if operation was successful, False otherwise
128
+ """
129
+ try:
130
+ inspector = inspect(engine)
131
+
132
+ tables_to_create = []
133
+
134
+ # Check if rate_limit_attempts table exists
135
+ if not inspector.has_table("rate_limit_attempts"):
136
+ tables_to_create.append(RateLimitAttempt.__table__)
137
+ logger.info("Need to create 'rate_limit_attempts' table")
138
+
139
+ # Check if rate_limit_estimates table exists
140
+ if not inspector.has_table("rate_limit_estimates"):
141
+ tables_to_create.append(RateLimitEstimate.__table__)
142
+ logger.info("Need to create 'rate_limit_estimates' table")
143
+
144
+ if tables_to_create:
145
+ # Create the tables using ORM
146
+ Base.metadata.create_all(engine, tables=tables_to_create)
147
+ logger.info(
148
+ f"Successfully created {len(tables_to_create)} rate limiting tables"
149
+ )
150
+ else:
151
+ logger.info("Rate limiting tables already exist, no action needed")
152
+
153
+ return True
154
+ except Exception:
155
+ logger.exception("Error creating rate limiting tables")
156
+ return False
157
+
158
+
159
+ def add_research_id_to_benchmark_results(engine):
160
+ """
161
+ Add research_id column to benchmark_results table if it doesn't exist.
162
+ """
163
+ try:
164
+ import sqlite3
165
+
166
+ # Get database path from engine
167
+ db_path = engine.url.database
168
+
169
+ logger.info("Checking if benchmark_results needs research_id column...")
170
+
171
+ conn = sqlite3.connect(db_path)
172
+
173
+ try:
174
+ cursor = conn.cursor()
175
+
176
+ # Check if table exists
177
+ cursor.execute(
178
+ "SELECT name FROM sqlite_master WHERE type='table' AND name='benchmark_results'"
179
+ )
180
+ if not cursor.fetchone():
181
+ logger.info("benchmark_results table does not exist, skipping")
182
+ return True
183
+
184
+ # Check if research_id column already exists
185
+ cursor.execute("PRAGMA table_info(benchmark_results)")
186
+ columns = cursor.fetchall()
187
+ has_research_id = any(col[1] == "research_id" for col in columns)
188
+
189
+ if has_research_id:
190
+ logger.info("benchmark_results already has research_id column")
191
+ return True
192
+
193
+ # Add research_id column
194
+ logger.info("Adding research_id column to benchmark_results table")
195
+ cursor.execute(
196
+ "ALTER TABLE benchmark_results ADD COLUMN research_id TEXT"
197
+ )
198
+
199
+ conn.commit()
200
+ logger.info(
201
+ "Successfully added research_id column to benchmark_results"
202
+ )
203
+ return True
204
+
205
+ finally:
206
+ conn.close()
207
+
208
+ except Exception:
209
+ logger.exception("Error adding research_id column to benchmark_results")
210
+ return False
211
+
212
+
213
+ def add_uuid_id_column_to_research_history(engine):
214
+ """
215
+ Adds a new `uuid_id` string column to the `research_history` table if it
216
+ does not exist already.
217
+ """
218
+ try:
219
+ import sqlite3
220
+
221
+ # Get database path from engine
222
+ db_path = engine.url.database
223
+
224
+ logger.info("Checking if research_history needs uuid_id column...")
225
+
226
+ conn = sqlite3.connect(db_path)
227
+
228
+ try:
229
+ cursor = conn.cursor()
230
+
231
+ # Check if table exists
232
+ cursor.execute(
233
+ "SELECT name FROM sqlite_master WHERE type='table' AND name='research_history'"
234
+ )
235
+ if not cursor.fetchone():
236
+ logger.info("research_history table does not exist, skipping")
237
+ return True
238
+
239
+ # Check if uuid_id column already exists
240
+ cursor.execute("PRAGMA table_info(research_history)")
241
+ columns = cursor.fetchall()
242
+ has_uuid_id = any(col[1] == "uuid_id" for col in columns)
243
+
244
+ if has_uuid_id:
245
+ logger.info("research_history already has uuid_id column")
246
+ return True
247
+
248
+ # Add uuid_id column
249
+ logger.info("Adding uuid_id column to research_history table")
250
+ cursor.execute(
251
+ "ALTER TABLE research_history ADD COLUMN uuid_id CHAR(36)"
252
+ )
253
+
254
+ conn.commit()
255
+ logger.info("Successfully added uuid_id column to research_history")
256
+ return True
257
+
258
+ finally:
259
+ conn.close()
260
+
261
+ except Exception:
262
+ logger.exception("Error adding uuid_id column to research_history")
263
+ return False
264
+
265
+
266
+ def convert_research_id_to_string_if_needed(engine):
267
+ """
268
+ Convert research_id columns from Integer to String in all tables.
269
+ Preserves existing data by converting integer IDs to string format.
270
+ Only runs if integer research_id columns are detected.
271
+ """
272
+ try:
273
+ import sqlite3
274
+
275
+ # Get database path from engine
276
+ db_path = engine.url.database
277
+
278
+ logger.info(
279
+ "Checking if research_id columns need conversion to string..."
280
+ )
281
+
282
+ conn = sqlite3.connect(db_path)
283
+ conn.execute("PRAGMA foreign_keys = OFF")
284
+
285
+ try:
286
+ cursor = conn.cursor()
287
+
288
+ # List of tables that might have research_id columns
289
+ tables_to_check = [
290
+ "token_usage",
291
+ "model_usage",
292
+ "search_calls",
293
+ "benchmark_results",
294
+ ]
295
+
296
+ tables_needing_conversion = []
297
+
298
+ # Check which tables need conversion
299
+ for table_name in tables_to_check:
300
+ # Check if table exists
301
+ cursor.execute(
302
+ "SELECT name FROM sqlite_master WHERE type='table' AND name=?",
303
+ (table_name,),
304
+ )
305
+ if not cursor.fetchone():
306
+ continue
307
+
308
+ # Check if research_id column exists and is integer type
309
+ cursor.execute(f"PRAGMA table_info({table_name})")
310
+ columns = cursor.fetchall()
311
+
312
+ for col in columns:
313
+ col_name, col_type = col[1], col[2]
314
+ if col_name == "research_id" and (
315
+ "INTEGER" in col_type.upper()
316
+ or "INT" in col_type.upper()
317
+ ):
318
+ tables_needing_conversion.append(table_name)
319
+ break
320
+
321
+ if not tables_needing_conversion:
322
+ logger.info(
323
+ "All research_id columns are already string type, no conversion needed"
324
+ )
325
+ return True
326
+
327
+ logger.info(
328
+ f"Converting research_id to string in tables: {tables_needing_conversion}"
329
+ )
330
+
331
+ # Convert each table
332
+ for table_name in tables_needing_conversion:
333
+ logger.info(f"Converting {table_name} table...")
334
+
335
+ # Get the current table schema
336
+ cursor.execute(
337
+ f"SELECT sql FROM sqlite_master WHERE type='table' AND name='{table_name}'"
338
+ )
339
+ create_sql = cursor.fetchone()[0]
340
+
341
+ # Create new table name
342
+ new_table_name = f"{table_name}_new"
343
+
344
+ # Modify the CREATE TABLE statement to change research_id to TEXT
345
+ new_create_sql = create_sql.replace(
346
+ f"CREATE TABLE {table_name}",
347
+ f"CREATE TABLE {new_table_name}",
348
+ )
349
+ new_create_sql = new_create_sql.replace(
350
+ "research_id INTEGER", "research_id TEXT"
351
+ )
352
+ new_create_sql = new_create_sql.replace(
353
+ "research_id INT", "research_id TEXT"
354
+ )
355
+
356
+ # Create the new table
357
+ cursor.execute(new_create_sql)
358
+
359
+ # Copy data from old table to new table, converting research_id to string
360
+ cursor.execute(f"SELECT * FROM {table_name}")
361
+ old_rows = cursor.fetchall()
362
+
363
+ if old_rows:
364
+ # Get column names
365
+ cursor.execute(f"PRAGMA table_info({table_name})")
366
+ columns = cursor.fetchall()
367
+ column_names = [col[1] for col in columns]
368
+ research_id_index = (
369
+ column_names.index("research_id")
370
+ if "research_id" in column_names
371
+ else -1
372
+ )
373
+
374
+ # Prepare insert statement
375
+ placeholders = ",".join(["?" for _ in column_names])
376
+ insert_sql = f"INSERT INTO {new_table_name} ({','.join(column_names)}) VALUES ({placeholders})"
377
+
378
+ # Convert rows and insert
379
+ converted_rows = []
380
+ for row in old_rows:
381
+ row_list = list(row)
382
+ # Convert research_id to string if it's not None
383
+ if (
384
+ research_id_index >= 0
385
+ and row_list[research_id_index] is not None
386
+ ):
387
+ row_list[research_id_index] = str(
388
+ row_list[research_id_index]
389
+ )
390
+ converted_rows.append(tuple(row_list))
391
+
392
+ cursor.executemany(insert_sql, converted_rows)
393
+ logger.info(
394
+ f"Converted {len(converted_rows)} rows in {table_name}"
395
+ )
396
+
397
+ # Drop old table and rename new table
398
+ cursor.execute(f"DROP TABLE {table_name}")
399
+ cursor.execute(
400
+ f"ALTER TABLE {new_table_name} RENAME TO {table_name}"
401
+ )
402
+
403
+ logger.info(
404
+ f"Successfully converted {table_name} research_id to string"
405
+ )
406
+
407
+ # Commit all changes
408
+ conn.commit()
409
+ logger.info(
410
+ "All research_id columns converted to string successfully!"
411
+ )
412
+ return True
413
+
414
+ finally:
415
+ conn.execute("PRAGMA foreign_keys = ON")
416
+ conn.close()
417
+
418
+ except Exception:
419
+ logger.exception("Error converting research_id columns to string")
420
+ return False
421
+
422
+
423
+ def create_provider_models_table(engine):
424
+ """Create provider_models table for caching available models"""
425
+ with engine.connect() as conn:
426
+ result = conn.execute(
427
+ text(
428
+ "SELECT name FROM sqlite_master WHERE type='table' AND name='provider_models'"
429
+ )
430
+ )
431
+ if result.fetchone():
432
+ logger.info(
433
+ "Table 'provider_models' already exists, no action needed"
434
+ )
435
+ return
436
+
437
+ logger.info("Creating 'provider_models' table...")
438
+
439
+ # Create the table
440
+ conn.execute(
441
+ text(
442
+ """
443
+ CREATE TABLE provider_models (
444
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
445
+ provider VARCHAR(50) NOT NULL,
446
+ model_key VARCHAR(255) NOT NULL,
447
+ model_label VARCHAR(255) NOT NULL,
448
+ model_metadata JSON,
449
+ last_updated DATETIME DEFAULT CURRENT_TIMESTAMP NOT NULL,
450
+ UNIQUE(provider, model_key)
451
+ )
452
+ """
453
+ )
454
+ )
455
+
456
+ # Create index on provider
457
+ conn.execute(
458
+ text(
459
+ "CREATE INDEX ix_provider_models_provider ON provider_models (provider)"
460
+ )
461
+ )
462
+
463
+ conn.commit()
464
+ logger.info("Table 'provider_models' created successfully")
465
+
466
+
90
467
  def run_schema_upgrades():
91
468
  """
92
469
  Run all schema upgrade operations on the database
@@ -113,6 +490,24 @@ def run_schema_upgrades():
113
490
  # 2. Create research_strategies table
114
491
  create_research_strategy_table(engine)
115
492
 
493
+ # 3. Create benchmark tables
494
+ create_benchmark_tables(engine)
495
+
496
+ # 4. Create rate limiting tables
497
+ create_rate_limiting_tables(engine)
498
+
499
+ # 5. Add research_id column to benchmark_results if missing
500
+ add_research_id_to_benchmark_results(engine)
501
+
502
+ # 6. Convert research_id columns from integer to string
503
+ convert_research_id_to_string_if_needed(engine)
504
+
505
+ # 7. Add uuid_id column to research_history if missing
506
+ add_uuid_id_column_to_research_history(engine)
507
+
508
+ # 8. Create provider_models table for caching
509
+ create_provider_models_table(engine)
510
+
116
511
  logger.info("Schema upgrades completed successfully")
117
512
  return True
118
513
  except Exception:
@@ -0,0 +1,265 @@
1
+ """
2
+ UUID Migration Script
3
+
4
+ Migrates all research_id fields from Integer to String (UUID) format.
5
+ This creates a more consistent and scalable ID system across the application.
6
+ """
7
+
8
+ import sqlite3
9
+ import uuid
10
+ from pathlib import Path
11
+ from loguru import logger
12
+
13
+
14
+ def get_database_path():
15
+ """Get the path to the SQLite database."""
16
+ data_dir = Path(__file__).parents[3] / "data"
17
+ return data_dir / "ldr.db"
18
+
19
+
20
+ def migrate_to_uuid():
21
+ """
22
+ Migrate all research_id fields from integers to UUIDs.
23
+
24
+ Strategy:
25
+ 1. Add new UUID columns alongside existing integer columns
26
+ 2. Generate UUIDs for existing data (or keep as string versions of integers)
27
+ 3. Update foreign key relationships
28
+ 4. Drop old integer columns and rename UUID columns
29
+ """
30
+ db_path = get_database_path()
31
+
32
+ if not db_path.exists():
33
+ logger.info("Database doesn't exist yet, migration not needed")
34
+ return
35
+
36
+ logger.info(f"Starting UUID migration on {db_path}")
37
+
38
+ conn = sqlite3.connect(db_path)
39
+ conn.execute(
40
+ "PRAGMA foreign_keys = OFF"
41
+ ) # Disable FK constraints during migration
42
+
43
+ try:
44
+ cursor = conn.cursor()
45
+
46
+ # 1. Migrate research_history table (main research IDs)
47
+ logger.info("Migrating research_history table...")
48
+
49
+ # Check if the table exists and has the old structure
50
+ cursor.execute("PRAGMA table_info(research_history)")
51
+ columns = cursor.fetchall()
52
+ has_uuid_id = any(col[1] == "uuid_id" for col in columns)
53
+
54
+ if not has_uuid_id:
55
+ # Add UUID column
56
+ cursor.execute(
57
+ "ALTER TABLE research_history ADD COLUMN uuid_id TEXT"
58
+ )
59
+
60
+ # Generate UUIDs for existing records (convert integer ID to UUID format)
61
+ cursor.execute("SELECT id FROM research_history")
62
+ existing_ids = cursor.fetchall()
63
+
64
+ for (old_id,) in existing_ids:
65
+ # Generate a deterministic UUID based on the old ID
66
+ new_uuid = str(
67
+ uuid.uuid5(uuid.NAMESPACE_OID, f"research_{old_id}")
68
+ )
69
+ cursor.execute(
70
+ "UPDATE research_history SET uuid_id = ? WHERE id = ?",
71
+ (new_uuid, old_id),
72
+ )
73
+
74
+ logger.info(
75
+ f"Generated UUIDs for {len(existing_ids)} research records"
76
+ )
77
+
78
+ # 2. Migrate metrics tables
79
+ logger.info("Migrating metrics tables...")
80
+
81
+ # Token usage table
82
+ cursor.execute("PRAGMA table_info(token_usage)")
83
+ columns = cursor.fetchall()
84
+ has_uuid_research_id = any(
85
+ col[1] == "uuid_research_id" for col in columns
86
+ )
87
+
88
+ if not has_uuid_research_id:
89
+ cursor.execute(
90
+ "ALTER TABLE token_usage ADD COLUMN uuid_research_id TEXT"
91
+ )
92
+
93
+ # Convert existing research_ids to UUIDs (deterministic conversion)
94
+ cursor.execute(
95
+ "SELECT DISTINCT research_id FROM token_usage WHERE research_id IS NOT NULL"
96
+ )
97
+ research_ids = cursor.fetchall()
98
+
99
+ for (research_id,) in research_ids:
100
+ if research_id:
101
+ new_uuid = str(
102
+ uuid.uuid5(
103
+ uuid.NAMESPACE_OID, f"research_{research_id}"
104
+ )
105
+ )
106
+ cursor.execute(
107
+ "UPDATE token_usage SET uuid_research_id = ? WHERE research_id = ?",
108
+ (new_uuid, research_id),
109
+ )
110
+
111
+ logger.info(
112
+ f"Migrated {len(research_ids)} research IDs in token_usage"
113
+ )
114
+
115
+ # Model usage table
116
+ cursor.execute("PRAGMA table_info(model_usage)")
117
+ columns = cursor.fetchall()
118
+ has_uuid_research_id = any(
119
+ col[1] == "uuid_research_id" for col in columns
120
+ )
121
+
122
+ if not has_uuid_research_id:
123
+ cursor.execute(
124
+ "ALTER TABLE model_usage ADD COLUMN uuid_research_id TEXT"
125
+ )
126
+
127
+ cursor.execute(
128
+ "SELECT DISTINCT research_id FROM model_usage WHERE research_id IS NOT NULL"
129
+ )
130
+ research_ids = cursor.fetchall()
131
+
132
+ for (research_id,) in research_ids:
133
+ if research_id:
134
+ new_uuid = str(
135
+ uuid.uuid5(
136
+ uuid.NAMESPACE_OID, f"research_{research_id}"
137
+ )
138
+ )
139
+ cursor.execute(
140
+ "UPDATE model_usage SET uuid_research_id = ? WHERE research_id = ?",
141
+ (new_uuid, research_id),
142
+ )
143
+
144
+ logger.info(
145
+ f"Migrated {len(research_ids)} research IDs in model_usage"
146
+ )
147
+
148
+ # Search calls table
149
+ cursor.execute("PRAGMA table_info(search_calls)")
150
+ columns = cursor.fetchall()
151
+ has_uuid_research_id = any(
152
+ col[1] == "uuid_research_id" for col in columns
153
+ )
154
+
155
+ if not has_uuid_research_id:
156
+ cursor.execute(
157
+ "ALTER TABLE search_calls ADD COLUMN uuid_research_id TEXT"
158
+ )
159
+
160
+ cursor.execute(
161
+ "SELECT DISTINCT research_id FROM search_calls WHERE research_id IS NOT NULL"
162
+ )
163
+ research_ids = cursor.fetchall()
164
+
165
+ for (research_id,) in research_ids:
166
+ if research_id:
167
+ new_uuid = str(
168
+ uuid.uuid5(
169
+ uuid.NAMESPACE_OID, f"research_{research_id}"
170
+ )
171
+ )
172
+ cursor.execute(
173
+ "UPDATE search_calls SET uuid_research_id = ? WHERE research_id = ?",
174
+ (new_uuid, research_id),
175
+ )
176
+
177
+ logger.info(
178
+ f"Migrated {len(research_ids)} research IDs in search_calls"
179
+ )
180
+
181
+ # 3. Migrate benchmark tables
182
+ logger.info("Migrating benchmark tables...")
183
+
184
+ # Check if benchmark_results table exists
185
+ cursor.execute(
186
+ "SELECT name FROM sqlite_master WHERE type='table' AND name='benchmark_results'"
187
+ )
188
+ if cursor.fetchone():
189
+ cursor.execute("PRAGMA table_info(benchmark_results)")
190
+ columns = cursor.fetchall()
191
+ has_uuid_research_id = any(
192
+ col[1] == "uuid_research_id" for col in columns
193
+ )
194
+
195
+ if not has_uuid_research_id:
196
+ cursor.execute(
197
+ "ALTER TABLE benchmark_results ADD COLUMN uuid_research_id TEXT"
198
+ )
199
+
200
+ cursor.execute(
201
+ "SELECT DISTINCT research_id FROM benchmark_results WHERE research_id IS NOT NULL"
202
+ )
203
+ research_ids = cursor.fetchall()
204
+
205
+ for (research_id,) in research_ids:
206
+ if research_id:
207
+ new_uuid = str(
208
+ uuid.uuid5(
209
+ uuid.NAMESPACE_OID, f"research_{research_id}"
210
+ )
211
+ )
212
+ cursor.execute(
213
+ "UPDATE benchmark_results SET uuid_research_id = ? WHERE research_id = ?",
214
+ (new_uuid, research_id),
215
+ )
216
+
217
+ logger.info(
218
+ f"Migrated {len(research_ids)} research IDs in benchmark_results"
219
+ )
220
+
221
+ # Commit all changes
222
+ conn.commit()
223
+ logger.info("UUID migration completed successfully!")
224
+
225
+ # Note: We're keeping both old and new columns for now
226
+ # The application will use the new UUID columns
227
+ # Old columns can be dropped in a future migration once everything is stable
228
+
229
+ except Exception as e:
230
+ logger.error(f"Error during UUID migration: {e}")
231
+ conn.rollback()
232
+ raise
233
+ finally:
234
+ conn.execute("PRAGMA foreign_keys = ON") # Re-enable FK constraints
235
+ conn.close()
236
+
237
+
238
+ def cleanup_old_columns():
239
+ """
240
+ Cleanup migration - drops old integer columns after UUID migration is stable.
241
+ Run this only after confirming the UUID migration is working correctly.
242
+ """
243
+ logger.warning(
244
+ "This will permanently remove old integer research_id columns!"
245
+ )
246
+ logger.warning("Make sure to backup your database before running this!")
247
+
248
+ db_path = get_database_path()
249
+ conn = sqlite3.connect(db_path)
250
+ conn.execute("PRAGMA foreign_keys = OFF")
251
+
252
+ try:
253
+ # For SQLite, we need to recreate tables to drop columns
254
+ # This is complex, so we'll leave old columns for now
255
+ # They can be cleaned up manually if needed
256
+
257
+ logger.info("Cleanup deferred - old columns remain for safety")
258
+
259
+ finally:
260
+ conn.execute("PRAGMA foreign_keys = ON")
261
+ conn.close()
262
+
263
+
264
+ if __name__ == "__main__":
265
+ migrate_to_uuid()