local-deep-research 0.5.7__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. local_deep_research/__version__.py +1 -1
  2. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +11 -1
  3. local_deep_research/advanced_search_system/questions/browsecomp_question.py +32 -6
  4. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +33 -8
  5. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +2 -0
  6. local_deep_research/api/__init__.py +2 -0
  7. local_deep_research/api/research_functions.py +177 -3
  8. local_deep_research/benchmarks/graders.py +150 -5
  9. local_deep_research/benchmarks/models/__init__.py +19 -0
  10. local_deep_research/benchmarks/models/benchmark_models.py +283 -0
  11. local_deep_research/benchmarks/ui/__init__.py +1 -0
  12. local_deep_research/benchmarks/web_api/__init__.py +6 -0
  13. local_deep_research/benchmarks/web_api/benchmark_routes.py +862 -0
  14. local_deep_research/benchmarks/web_api/benchmark_service.py +920 -0
  15. local_deep_research/config/llm_config.py +106 -21
  16. local_deep_research/defaults/default_settings.json +448 -3
  17. local_deep_research/error_handling/report_generator.py +10 -0
  18. local_deep_research/llm/__init__.py +19 -0
  19. local_deep_research/llm/llm_registry.py +155 -0
  20. local_deep_research/metrics/db_models.py +3 -7
  21. local_deep_research/metrics/search_tracker.py +25 -11
  22. local_deep_research/report_generator.py +3 -2
  23. local_deep_research/search_system.py +12 -9
  24. local_deep_research/utilities/log_utils.py +23 -10
  25. local_deep_research/utilities/thread_context.py +99 -0
  26. local_deep_research/web/app_factory.py +32 -8
  27. local_deep_research/web/database/benchmark_schema.py +230 -0
  28. local_deep_research/web/database/convert_research_id_to_string.py +161 -0
  29. local_deep_research/web/database/models.py +55 -1
  30. local_deep_research/web/database/schema_upgrade.py +397 -2
  31. local_deep_research/web/database/uuid_migration.py +265 -0
  32. local_deep_research/web/routes/api_routes.py +62 -31
  33. local_deep_research/web/routes/history_routes.py +13 -6
  34. local_deep_research/web/routes/metrics_routes.py +264 -4
  35. local_deep_research/web/routes/research_routes.py +45 -18
  36. local_deep_research/web/routes/route_registry.py +352 -0
  37. local_deep_research/web/routes/settings_routes.py +382 -22
  38. local_deep_research/web/services/research_service.py +22 -29
  39. local_deep_research/web/services/settings_manager.py +53 -0
  40. local_deep_research/web/services/settings_service.py +2 -0
  41. local_deep_research/web/static/css/styles.css +8 -0
  42. local_deep_research/web/static/js/components/detail.js +7 -14
  43. local_deep_research/web/static/js/components/details.js +8 -10
  44. local_deep_research/web/static/js/components/fallback/ui.js +4 -4
  45. local_deep_research/web/static/js/components/history.js +6 -6
  46. local_deep_research/web/static/js/components/logpanel.js +14 -11
  47. local_deep_research/web/static/js/components/progress.js +51 -46
  48. local_deep_research/web/static/js/components/research.js +250 -89
  49. local_deep_research/web/static/js/components/results.js +5 -7
  50. local_deep_research/web/static/js/components/settings.js +32 -26
  51. local_deep_research/web/static/js/components/settings_sync.js +24 -23
  52. local_deep_research/web/static/js/config/urls.js +285 -0
  53. local_deep_research/web/static/js/main.js +8 -8
  54. local_deep_research/web/static/js/research_form.js +267 -12
  55. local_deep_research/web/static/js/services/api.js +18 -18
  56. local_deep_research/web/static/js/services/keyboard.js +8 -8
  57. local_deep_research/web/static/js/services/socket.js +53 -35
  58. local_deep_research/web/static/js/services/ui.js +1 -1
  59. local_deep_research/web/templates/base.html +4 -1
  60. local_deep_research/web/templates/components/custom_dropdown.html +5 -3
  61. local_deep_research/web/templates/components/mobile_nav.html +3 -3
  62. local_deep_research/web/templates/components/sidebar.html +9 -3
  63. local_deep_research/web/templates/pages/benchmark.html +2697 -0
  64. local_deep_research/web/templates/pages/benchmark_results.html +1136 -0
  65. local_deep_research/web/templates/pages/benchmark_simple.html +453 -0
  66. local_deep_research/web/templates/pages/cost_analytics.html +1 -1
  67. local_deep_research/web/templates/pages/metrics.html +212 -39
  68. local_deep_research/web/templates/pages/research.html +8 -6
  69. local_deep_research/web/templates/pages/star_reviews.html +1 -1
  70. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +14 -1
  71. local_deep_research/web_search_engines/engines/search_engine_brave.py +15 -1
  72. local_deep_research/web_search_engines/engines/search_engine_ddg.py +20 -1
  73. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +26 -2
  74. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +15 -1
  75. local_deep_research/web_search_engines/engines/search_engine_retriever.py +192 -0
  76. local_deep_research/web_search_engines/engines/search_engine_tavily.py +307 -0
  77. local_deep_research/web_search_engines/rate_limiting/__init__.py +14 -0
  78. local_deep_research/web_search_engines/rate_limiting/__main__.py +9 -0
  79. local_deep_research/web_search_engines/rate_limiting/cli.py +209 -0
  80. local_deep_research/web_search_engines/rate_limiting/exceptions.py +21 -0
  81. local_deep_research/web_search_engines/rate_limiting/tracker.py +506 -0
  82. local_deep_research/web_search_engines/retriever_registry.py +108 -0
  83. local_deep_research/web_search_engines/search_engine_base.py +161 -43
  84. local_deep_research/web_search_engines/search_engine_factory.py +14 -0
  85. local_deep_research/web_search_engines/search_engines_config.py +20 -0
  86. local_deep_research-0.6.0.dist-info/METADATA +374 -0
  87. {local_deep_research-0.5.7.dist-info → local_deep_research-0.6.0.dist-info}/RECORD +90 -65
  88. local_deep_research-0.5.7.dist-info/METADATA +0 -420
  89. {local_deep_research-0.5.7.dist-info → local_deep_research-0.6.0.dist-info}/WHEEL +0 -0
  90. {local_deep_research-0.5.7.dist-info → local_deep_research-0.6.0.dist-info}/entry_points.txt +0 -0
  91. {local_deep_research-0.5.7.dist-info → local_deep_research-0.6.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,230 @@
1
+ """Simple benchmark table definitions for schema creation."""
2
+
3
+ import enum
4
+
5
+ from sqlalchemy import (
6
+ JSON,
7
+ Boolean,
8
+ Column,
9
+ DateTime,
10
+ Enum,
11
+ Float,
12
+ ForeignKey,
13
+ Integer,
14
+ String,
15
+ Text,
16
+ UniqueConstraint,
17
+ Index,
18
+ )
19
+ from sqlalchemy.sql import func
20
+
21
+
22
+ class BenchmarkStatus(enum.Enum):
23
+ """Status of a benchmark run."""
24
+
25
+ PENDING = "pending"
26
+ IN_PROGRESS = "in_progress"
27
+ COMPLETED = "completed"
28
+ FAILED = "failed"
29
+ CANCELLED = "cancelled"
30
+ PAUSED = "paused"
31
+
32
+
33
+ class DatasetType(enum.Enum):
34
+ """Supported dataset types."""
35
+
36
+ SIMPLEQA = "simpleqa"
37
+ BROWSECOMP = "browsecomp"
38
+ CUSTOM = "custom"
39
+
40
+
41
+ # Simple table definitions for creation
42
+ benchmark_runs_table = {
43
+ "table_name": "benchmark_runs",
44
+ "columns": [
45
+ Column("id", Integer, primary_key=True, index=True),
46
+ Column("run_name", String(255), nullable=True),
47
+ Column("config_hash", String(16), nullable=False, index=True),
48
+ Column("query_hash_list", JSON, nullable=False),
49
+ Column("search_config", JSON, nullable=False),
50
+ Column("evaluation_config", JSON, nullable=False),
51
+ Column("datasets_config", JSON, nullable=False),
52
+ Column(
53
+ "status",
54
+ Enum(BenchmarkStatus),
55
+ default=BenchmarkStatus.PENDING,
56
+ nullable=False,
57
+ ),
58
+ Column(
59
+ "created_at", DateTime, server_default=func.now(), nullable=False
60
+ ),
61
+ Column(
62
+ "updated_at",
63
+ DateTime,
64
+ server_default=func.now(),
65
+ onupdate=func.now(),
66
+ nullable=False,
67
+ ),
68
+ Column("start_time", DateTime, nullable=True),
69
+ Column("end_time", DateTime, nullable=True),
70
+ Column("total_examples", Integer, default=0, nullable=False),
71
+ Column("completed_examples", Integer, default=0, nullable=False),
72
+ Column("failed_examples", Integer, default=0, nullable=False),
73
+ Column("overall_accuracy", Float, nullable=True),
74
+ Column("processing_rate", Float, nullable=True),
75
+ Column("error_message", Text, nullable=True),
76
+ ],
77
+ "indexes": [
78
+ Index("idx_benchmark_runs_config_hash", "config_hash"),
79
+ Index("idx_benchmark_runs_status_created", "status", "created_at"),
80
+ ],
81
+ }
82
+
83
+ benchmark_results_table = {
84
+ "table_name": "benchmark_results",
85
+ "columns": [
86
+ Column("id", Integer, primary_key=True, index=True),
87
+ Column(
88
+ "benchmark_run_id",
89
+ Integer,
90
+ ForeignKey("benchmark_runs.id", ondelete="CASCADE"),
91
+ nullable=False,
92
+ index=True,
93
+ ),
94
+ Column("example_id", String(255), nullable=False),
95
+ Column("query_hash", String(32), nullable=False, index=True),
96
+ Column("dataset_type", Enum(DatasetType), nullable=False),
97
+ Column("question", Text, nullable=False),
98
+ Column("correct_answer", Text, nullable=False),
99
+ Column("response", Text, nullable=True),
100
+ Column("extracted_answer", Text, nullable=True),
101
+ Column("confidence", String(10), nullable=True),
102
+ Column("processing_time", Float, nullable=True),
103
+ Column("sources", JSON, nullable=True),
104
+ Column("is_correct", Boolean, nullable=True),
105
+ Column("graded_confidence", String(10), nullable=True),
106
+ Column("grader_response", Text, nullable=True),
107
+ Column(
108
+ "created_at", DateTime, server_default=func.now(), nullable=False
109
+ ),
110
+ Column("completed_at", DateTime, nullable=True),
111
+ Column("research_error", Text, nullable=True),
112
+ Column("evaluation_error", Text, nullable=True),
113
+ Column("task_index", Integer, nullable=True),
114
+ Column("result_metadata", JSON, nullable=True),
115
+ ],
116
+ "indexes": [
117
+ Index(
118
+ "idx_benchmark_results_run_dataset",
119
+ "benchmark_run_id",
120
+ "dataset_type",
121
+ ),
122
+ Index("idx_benchmark_results_query_hash", "query_hash"),
123
+ Index("idx_benchmark_results_completed", "completed_at"),
124
+ ],
125
+ "constraints": [
126
+ UniqueConstraint(
127
+ "benchmark_run_id", "query_hash", name="uix_run_query"
128
+ ),
129
+ ],
130
+ }
131
+
132
+ benchmark_configs_table = {
133
+ "table_name": "benchmark_configs",
134
+ "columns": [
135
+ Column("id", Integer, primary_key=True, index=True),
136
+ Column("name", String(255), nullable=False),
137
+ Column("description", Text, nullable=True),
138
+ Column("config_hash", String(16), nullable=False, index=True),
139
+ Column("search_config", JSON, nullable=False),
140
+ Column("evaluation_config", JSON, nullable=False),
141
+ Column("datasets_config", JSON, nullable=False),
142
+ Column(
143
+ "created_at", DateTime, server_default=func.now(), nullable=False
144
+ ),
145
+ Column(
146
+ "updated_at",
147
+ DateTime,
148
+ server_default=func.now(),
149
+ onupdate=func.now(),
150
+ nullable=False,
151
+ ),
152
+ Column("is_default", Boolean, default=False, nullable=False),
153
+ Column("is_public", Boolean, default=True, nullable=False),
154
+ Column("usage_count", Integer, default=0, nullable=False),
155
+ Column("last_used", DateTime, nullable=True),
156
+ Column("best_accuracy", Float, nullable=True),
157
+ Column("avg_processing_rate", Float, nullable=True),
158
+ ],
159
+ "indexes": [
160
+ Index("idx_benchmark_configs_name", "name"),
161
+ Index("idx_benchmark_configs_hash", "config_hash"),
162
+ Index("idx_benchmark_configs_default", "is_default"),
163
+ ],
164
+ }
165
+
166
+ benchmark_progress_table = {
167
+ "table_name": "benchmark_progress",
168
+ "columns": [
169
+ Column("id", Integer, primary_key=True, index=True),
170
+ Column(
171
+ "benchmark_run_id",
172
+ Integer,
173
+ ForeignKey("benchmark_runs.id", ondelete="CASCADE"),
174
+ nullable=False,
175
+ index=True,
176
+ ),
177
+ Column(
178
+ "timestamp", DateTime, server_default=func.now(), nullable=False
179
+ ),
180
+ Column("completed_examples", Integer, nullable=False),
181
+ Column("total_examples", Integer, nullable=False),
182
+ Column("overall_accuracy", Float, nullable=True),
183
+ Column("dataset_accuracies", JSON, nullable=True),
184
+ Column("processing_rate", Float, nullable=True),
185
+ Column("estimated_completion", DateTime, nullable=True),
186
+ Column("current_dataset", Enum(DatasetType), nullable=True),
187
+ Column("current_example_id", String(255), nullable=True),
188
+ Column("memory_usage", Float, nullable=True),
189
+ Column("cpu_usage", Float, nullable=True),
190
+ ],
191
+ "indexes": [
192
+ Index(
193
+ "idx_benchmark_progress_run_time", "benchmark_run_id", "timestamp"
194
+ ),
195
+ ],
196
+ }
197
+
198
+
199
+ def create_benchmark_tables_simple(engine):
200
+ """Create benchmark tables using simple table definitions."""
201
+ from sqlalchemy import Table, MetaData
202
+
203
+ metadata = MetaData()
204
+
205
+ # Create tables
206
+ tables_to_create = [
207
+ benchmark_runs_table,
208
+ benchmark_results_table,
209
+ benchmark_configs_table,
210
+ benchmark_progress_table,
211
+ ]
212
+
213
+ for table_def in tables_to_create:
214
+ table = Table(
215
+ table_def["table_name"],
216
+ metadata,
217
+ *table_def["columns"],
218
+ extend_existing=True,
219
+ )
220
+
221
+ # Add indexes
222
+ for index in table_def.get("indexes", []):
223
+ index.table = table
224
+
225
+ # Add constraints
226
+ for constraint in table_def.get("constraints", []):
227
+ table.append_constraint(constraint)
228
+
229
+ # Create all tables
230
+ metadata.create_all(engine, checkfirst=True)
@@ -0,0 +1,161 @@
1
+ """
2
+ Convert research_id columns from Integer to String.
3
+
4
+ This migration converts existing integer research_id values to string format
5
+ while preserving all existing data. New records will use UUID strings.
6
+ """
7
+
8
+ import sqlite3
9
+ from pathlib import Path
10
+ from loguru import logger
11
+
12
+
13
+ def get_database_path():
14
+ """Get the path to the SQLite database."""
15
+ data_dir = Path(__file__).parents[3] / "data"
16
+ return data_dir / "ldr.db"
17
+
18
+
19
+ def convert_research_id_to_string():
20
+ """
21
+ Convert research_id columns from Integer to String in all tables.
22
+ Preserves existing data by converting integer IDs to string format.
23
+ """
24
+ db_path = get_database_path()
25
+
26
+ if not db_path.exists():
27
+ logger.info("Database doesn't exist yet, migration not needed")
28
+ return
29
+
30
+ logger.info(f"Converting research_id columns to string in {db_path}")
31
+
32
+ conn = sqlite3.connect(db_path)
33
+ conn.execute(
34
+ "PRAGMA foreign_keys = OFF"
35
+ ) # Disable FK constraints during migration
36
+
37
+ try:
38
+ cursor = conn.cursor()
39
+
40
+ # List of tables that have research_id columns
41
+ tables_to_migrate = [
42
+ "token_usage",
43
+ "model_usage",
44
+ "search_calls",
45
+ "benchmark_results", # If it exists
46
+ ]
47
+
48
+ for table_name in tables_to_migrate:
49
+ logger.info(f"Converting {table_name} table...")
50
+
51
+ # Check if table exists
52
+ cursor.execute(
53
+ "SELECT name FROM sqlite_master WHERE type='table' AND name=?",
54
+ (table_name,),
55
+ )
56
+ if not cursor.fetchone():
57
+ logger.info(f"Table {table_name} does not exist, skipping")
58
+ continue
59
+
60
+ # Check if research_id column exists
61
+ cursor.execute(f"PRAGMA table_info({table_name})")
62
+ columns = cursor.fetchall()
63
+ has_research_id = any(col[1] == "research_id" for col in columns)
64
+
65
+ if not has_research_id:
66
+ logger.info(
67
+ f"Table {table_name} does not have research_id column, skipping"
68
+ )
69
+ continue
70
+
71
+ # For SQLite, we need to recreate the table to change column type
72
+ # 1. Create new table with string research_id
73
+ # 2. Copy data with research_id converted to string
74
+ # 3. Drop old table and rename new table
75
+
76
+ # Get the current table schema
77
+ cursor.execute(
78
+ f"SELECT sql FROM sqlite_master WHERE type='table' AND name='{table_name}'"
79
+ )
80
+ create_sql = cursor.fetchone()[0]
81
+
82
+ # Create new table name
83
+ new_table_name = f"{table_name}_new"
84
+
85
+ # Modify the CREATE TABLE statement to change research_id to TEXT
86
+ new_create_sql = create_sql.replace(
87
+ f"CREATE TABLE {table_name}", f"CREATE TABLE {new_table_name}"
88
+ )
89
+ new_create_sql = new_create_sql.replace(
90
+ "research_id INTEGER", "research_id TEXT"
91
+ )
92
+ new_create_sql = new_create_sql.replace(
93
+ "research_id INT", "research_id TEXT"
94
+ )
95
+
96
+ # Create the new table
97
+ cursor.execute(new_create_sql)
98
+
99
+ # Copy data from old table to new table, converting research_id to string
100
+ cursor.execute(f"SELECT * FROM {table_name}")
101
+ old_rows = cursor.fetchall()
102
+
103
+ if old_rows:
104
+ # Get column names
105
+ cursor.execute(f"PRAGMA table_info({table_name})")
106
+ columns = cursor.fetchall()
107
+ column_names = [col[1] for col in columns]
108
+ research_id_index = (
109
+ column_names.index("research_id")
110
+ if "research_id" in column_names
111
+ else -1
112
+ )
113
+
114
+ # Prepare insert statement
115
+ placeholders = ",".join(["?" for _ in column_names])
116
+ insert_sql = f"INSERT INTO {new_table_name} ({','.join(column_names)}) VALUES ({placeholders})"
117
+
118
+ # Convert rows and insert
119
+ converted_rows = []
120
+ for row in old_rows:
121
+ row_list = list(row)
122
+ # Convert research_id to string if it's not None
123
+ if (
124
+ research_id_index >= 0
125
+ and row_list[research_id_index] is not None
126
+ ):
127
+ row_list[research_id_index] = str(
128
+ row_list[research_id_index]
129
+ )
130
+ converted_rows.append(tuple(row_list))
131
+
132
+ cursor.executemany(insert_sql, converted_rows)
133
+ logger.info(
134
+ f"Converted {len(converted_rows)} rows in {table_name}"
135
+ )
136
+
137
+ # Drop old table and rename new table
138
+ cursor.execute(f"DROP TABLE {table_name}")
139
+ cursor.execute(
140
+ f"ALTER TABLE {new_table_name} RENAME TO {table_name}"
141
+ )
142
+
143
+ logger.info(
144
+ f"Successfully converted {table_name} research_id to string"
145
+ )
146
+
147
+ # Commit all changes
148
+ conn.commit()
149
+ logger.info("All research_id columns converted to string successfully!")
150
+
151
+ except Exception as e:
152
+ logger.error(f"Error during research_id conversion: {e}")
153
+ conn.rollback()
154
+ raise
155
+ finally:
156
+ conn.execute("PRAGMA foreign_keys = ON") # Re-enable FK constraints
157
+ conn.close()
158
+
159
+
160
+ if __name__ == "__main__":
161
+ convert_research_id_to_string()
@@ -39,8 +39,10 @@ class ResearchHistory(Base):
39
39
 
40
40
  __tablename__ = "research_history"
41
41
 
42
- # Unique identifier for each record.
42
+ # Legacy integer ID (kept for migration compatibility)
43
43
  id = Column(Integer, primary_key=True, autoincrement=True)
44
+ # New UUID identifier (primary field to use for new records)
45
+ uuid_id = Column(String(36), unique=True, index=True)
44
46
  # The search query.
45
47
  query = Column(Text, nullable=False)
46
48
  # The mode of research (e.g., 'quick_summary', 'detailed_report').
@@ -184,3 +186,55 @@ class Journal(Base):
184
186
  quality_model = Column(String(255), nullable=True, index=True)
185
187
  # Time at which the quality was last analyzed.
186
188
  quality_analysis_time = Column(Integer, nullable=False)
189
+
190
+
191
+ class RateLimitAttempt(Base):
192
+ """Database model for tracking individual rate limit retry attempts."""
193
+
194
+ __tablename__ = "rate_limit_attempts"
195
+
196
+ id = Column(Integer, primary_key=True, index=True)
197
+ engine_type = Column(String(100), nullable=False, index=True)
198
+ timestamp = Column(Float, nullable=False, index=True)
199
+ wait_time = Column(Float, nullable=False)
200
+ retry_count = Column(Integer, nullable=False)
201
+ success = Column(Boolean, nullable=False)
202
+ error_type = Column(String(100), nullable=True)
203
+ created_at = Column(DateTime, server_default=func.now(), nullable=False)
204
+
205
+
206
+ class RateLimitEstimate(Base):
207
+ """Database model for storing current rate limit estimates per engine."""
208
+
209
+ __tablename__ = "rate_limit_estimates"
210
+
211
+ id = Column(Integer, primary_key=True, index=True)
212
+ engine_type = Column(String(100), nullable=False, unique=True, index=True)
213
+ base_wait_seconds = Column(Float, nullable=False)
214
+ min_wait_seconds = Column(Float, nullable=False)
215
+ max_wait_seconds = Column(Float, nullable=False)
216
+ last_updated = Column(Float, nullable=False)
217
+ total_attempts = Column(Integer, default=0, nullable=False)
218
+ success_rate = Column(Float, default=0.0, nullable=False)
219
+ created_at = Column(DateTime, server_default=func.now(), nullable=False)
220
+ updated_at = Column(
221
+ DateTime, server_default=func.now(), onupdate=func.now(), nullable=False
222
+ )
223
+
224
+
225
+ class ProviderModel(Base):
226
+ """Database model for caching available models from all providers."""
227
+
228
+ __tablename__ = "provider_models"
229
+
230
+ id = Column(Integer, primary_key=True, index=True)
231
+ provider = Column(String(50), nullable=False, index=True)
232
+ model_key = Column(String(255), nullable=False)
233
+ model_label = Column(String(255), nullable=False)
234
+ model_metadata = Column(JSON, nullable=True) # For additional model info
235
+ last_updated = Column(DateTime, server_default=func.now(), nullable=False)
236
+
237
+ # Composite unique constraint to prevent duplicates
238
+ __table_args__ = (
239
+ UniqueConstraint("provider", "model_key", name="uix_provider_model"),
240
+ )