local-deep-research 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (222) hide show
  1. local_deep_research/__init__.py +7 -0
  2. local_deep_research/__version__.py +1 -1
  3. local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
  4. local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
  5. local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
  6. local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
  7. local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
  8. local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
  9. local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
  10. local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
  11. local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
  12. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
  13. local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
  14. local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
  15. local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
  16. local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
  17. local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
  18. local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
  19. local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
  20. local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
  21. local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
  22. local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
  23. local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
  24. local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
  25. local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
  26. local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
  27. local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
  28. local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
  29. local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
  30. local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
  31. local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
  32. local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
  33. local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
  34. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
  35. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
  36. local_deep_research/advanced_search_system/findings/repository.py +54 -17
  37. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
  38. local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
  39. local_deep_research/advanced_search_system/questions/__init__.py +16 -0
  40. local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
  41. local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
  42. local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
  43. local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
  44. local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
  45. local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
  46. local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
  47. local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
  48. local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
  49. local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
  50. local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
  51. local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
  52. local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
  53. local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
  54. local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
  55. local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
  56. local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
  57. local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
  58. local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
  59. local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
  60. local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
  61. local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
  62. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
  63. local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
  64. local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
  65. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
  66. local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
  67. local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
  68. local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
  69. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
  70. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
  71. local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
  72. local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
  73. local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
  74. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
  75. local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
  76. local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
  77. local_deep_research/api/benchmark_functions.py +6 -2
  78. local_deep_research/api/research_functions.py +10 -4
  79. local_deep_research/benchmarks/__init__.py +9 -7
  80. local_deep_research/benchmarks/benchmark_functions.py +6 -2
  81. local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
  82. local_deep_research/benchmarks/cli.py +38 -13
  83. local_deep_research/benchmarks/comparison/__init__.py +4 -2
  84. local_deep_research/benchmarks/comparison/evaluator.py +316 -239
  85. local_deep_research/benchmarks/datasets/__init__.py +1 -1
  86. local_deep_research/benchmarks/datasets/base.py +91 -72
  87. local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
  88. local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
  89. local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
  90. local_deep_research/benchmarks/datasets/utils.py +48 -29
  91. local_deep_research/benchmarks/datasets.py +4 -11
  92. local_deep_research/benchmarks/efficiency/__init__.py +8 -4
  93. local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
  94. local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
  95. local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
  96. local_deep_research/benchmarks/evaluators/composite.py +6 -2
  97. local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
  98. local_deep_research/benchmarks/graders.py +32 -10
  99. local_deep_research/benchmarks/metrics/README.md +1 -1
  100. local_deep_research/benchmarks/metrics/calculation.py +25 -10
  101. local_deep_research/benchmarks/metrics/reporting.py +7 -3
  102. local_deep_research/benchmarks/metrics/visualization.py +42 -23
  103. local_deep_research/benchmarks/metrics.py +1 -1
  104. local_deep_research/benchmarks/optimization/__init__.py +3 -1
  105. local_deep_research/benchmarks/optimization/api.py +7 -1
  106. local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
  107. local_deep_research/benchmarks/runners.py +48 -15
  108. local_deep_research/citation_handler.py +65 -92
  109. local_deep_research/citation_handlers/__init__.py +15 -0
  110. local_deep_research/citation_handlers/base_citation_handler.py +70 -0
  111. local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
  112. local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
  113. local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
  114. local_deep_research/config/llm_config.py +271 -169
  115. local_deep_research/config/search_config.py +14 -5
  116. local_deep_research/defaults/__init__.py +0 -1
  117. local_deep_research/defaults/default_settings.json +35 -35
  118. local_deep_research/metrics/__init__.py +13 -0
  119. local_deep_research/metrics/database.py +58 -0
  120. local_deep_research/metrics/db_models.py +115 -0
  121. local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
  122. local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
  123. local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
  124. local_deep_research/metrics/migrate_research_ratings.py +31 -0
  125. local_deep_research/metrics/models.py +61 -0
  126. local_deep_research/metrics/pricing/__init__.py +12 -0
  127. local_deep_research/metrics/pricing/cost_calculator.py +237 -0
  128. local_deep_research/metrics/pricing/pricing_cache.py +143 -0
  129. local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
  130. local_deep_research/metrics/query_utils.py +51 -0
  131. local_deep_research/metrics/search_tracker.py +380 -0
  132. local_deep_research/metrics/token_counter.py +1078 -0
  133. local_deep_research/migrate_db.py +3 -1
  134. local_deep_research/report_generator.py +22 -8
  135. local_deep_research/search_system.py +390 -9
  136. local_deep_research/test_migration.py +15 -5
  137. local_deep_research/utilities/db_utils.py +7 -4
  138. local_deep_research/utilities/es_utils.py +115 -104
  139. local_deep_research/utilities/llm_utils.py +15 -5
  140. local_deep_research/utilities/log_utils.py +151 -0
  141. local_deep_research/utilities/search_cache.py +387 -0
  142. local_deep_research/utilities/search_utilities.py +14 -6
  143. local_deep_research/utilities/threading_utils.py +92 -0
  144. local_deep_research/utilities/url_utils.py +6 -0
  145. local_deep_research/web/api.py +347 -0
  146. local_deep_research/web/app.py +13 -17
  147. local_deep_research/web/app_factory.py +71 -66
  148. local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
  149. local_deep_research/web/database/migrations.py +5 -3
  150. local_deep_research/web/database/models.py +51 -2
  151. local_deep_research/web/database/schema_upgrade.py +49 -29
  152. local_deep_research/web/models/database.py +51 -61
  153. local_deep_research/web/routes/api_routes.py +56 -22
  154. local_deep_research/web/routes/benchmark_routes.py +4 -1
  155. local_deep_research/web/routes/globals.py +22 -0
  156. local_deep_research/web/routes/history_routes.py +71 -46
  157. local_deep_research/web/routes/metrics_routes.py +1155 -0
  158. local_deep_research/web/routes/research_routes.py +227 -41
  159. local_deep_research/web/routes/settings_routes.py +156 -55
  160. local_deep_research/web/services/research_service.py +310 -103
  161. local_deep_research/web/services/resource_service.py +36 -11
  162. local_deep_research/web/services/settings_manager.py +58 -18
  163. local_deep_research/web/services/settings_service.py +12 -4
  164. local_deep_research/web/services/socket_service.py +295 -188
  165. local_deep_research/web/static/css/custom_dropdown.css +180 -0
  166. local_deep_research/web/static/css/styles.css +39 -1
  167. local_deep_research/web/static/js/components/detail.js +633 -267
  168. local_deep_research/web/static/js/components/details.js +751 -0
  169. local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
  170. local_deep_research/web/static/js/components/fallback/ui.js +23 -23
  171. local_deep_research/web/static/js/components/history.js +76 -76
  172. local_deep_research/web/static/js/components/logpanel.js +61 -13
  173. local_deep_research/web/static/js/components/progress.js +13 -2
  174. local_deep_research/web/static/js/components/research.js +99 -12
  175. local_deep_research/web/static/js/components/results.js +239 -106
  176. local_deep_research/web/static/js/components/settings.js +70 -47
  177. local_deep_research/web/static/js/main.js +40 -40
  178. local_deep_research/web/static/js/services/audio.js +1 -1
  179. local_deep_research/web/static/js/services/formatting.js +11 -11
  180. local_deep_research/web/static/js/services/keyboard.js +157 -0
  181. local_deep_research/web/static/js/services/pdf.js +80 -80
  182. local_deep_research/web/static/sounds/README.md +1 -1
  183. local_deep_research/web/templates/base.html +1 -0
  184. local_deep_research/web/templates/components/log_panel.html +7 -1
  185. local_deep_research/web/templates/components/mobile_nav.html +1 -1
  186. local_deep_research/web/templates/components/sidebar.html +3 -0
  187. local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
  188. local_deep_research/web/templates/pages/details.html +325 -24
  189. local_deep_research/web/templates/pages/history.html +1 -1
  190. local_deep_research/web/templates/pages/metrics.html +1929 -0
  191. local_deep_research/web/templates/pages/progress.html +2 -2
  192. local_deep_research/web/templates/pages/research.html +53 -17
  193. local_deep_research/web/templates/pages/results.html +12 -1
  194. local_deep_research/web/templates/pages/star_reviews.html +803 -0
  195. local_deep_research/web/utils/formatters.py +9 -3
  196. local_deep_research/web_search_engines/default_search_engines.py +5 -3
  197. local_deep_research/web_search_engines/engines/full_search.py +8 -2
  198. local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
  199. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
  200. local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
  201. local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
  202. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
  203. local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
  204. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
  205. local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
  206. local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
  207. local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
  208. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
  209. local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
  210. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
  211. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
  212. local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
  213. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
  214. local_deep_research/web_search_engines/search_engine_base.py +83 -35
  215. local_deep_research/web_search_engines/search_engine_factory.py +25 -8
  216. local_deep_research/web_search_engines/search_engines_config.py +9 -3
  217. {local_deep_research-0.4.3.dist-info → local_deep_research-0.5.0.dist-info}/METADATA +8 -2
  218. local_deep_research-0.5.0.dist-info/RECORD +265 -0
  219. local_deep_research-0.4.3.dist-info/RECORD +0 -177
  220. {local_deep_research-0.4.3.dist-info → local_deep_research-0.5.0.dist-info}/WHEEL +0 -0
  221. {local_deep_research-0.4.3.dist-info → local_deep_research-0.5.0.dist-info}/entry_points.txt +0 -0
  222. {local_deep_research-0.4.3.dist-info → local_deep_research-0.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,105 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Database migration script to add call stack tracking columns to token_usage table.
4
+ This adds the Phase 1 call stack tracking functionality.
5
+ """
6
+
7
+ import sqlite3
8
+ import sys
9
+ from pathlib import Path
10
+
11
+ from loguru import logger
12
+
13
+
14
+ def migrate_call_stack_tracking(db_path: str):
15
+ """Add call stack tracking columns to the token_usage table.
16
+
17
+ Args:
18
+ db_path: Path to the SQLite database file
19
+ """
20
+ try:
21
+ conn = sqlite3.connect(db_path)
22
+ cursor = conn.cursor()
23
+
24
+ # Check if columns already exist
25
+ cursor.execute("PRAGMA table_info(token_usage)")
26
+ columns = [row[1] for row in cursor.fetchall()]
27
+
28
+ # Add call stack tracking columns if they don't exist
29
+ new_columns = [
30
+ ("calling_file", "TEXT"),
31
+ ("calling_function", "TEXT"),
32
+ ("call_stack", "TEXT"),
33
+ ]
34
+
35
+ for column_name, column_type in new_columns:
36
+ if column_name not in columns:
37
+ logger.info(f"Adding column {column_name} to token_usage table")
38
+ cursor.execute(
39
+ f"ALTER TABLE token_usage ADD COLUMN {column_name} {column_type}"
40
+ )
41
+ else:
42
+ logger.info(
43
+ f"Column {column_name} already exists in token_usage table"
44
+ )
45
+
46
+ conn.commit()
47
+ logger.success(
48
+ "Call stack tracking columns migration completed successfully"
49
+ )
50
+
51
+ except sqlite3.Error as e:
52
+ logger.error(f"Database error during call stack migration: {e}")
53
+ raise
54
+ except Exception as e:
55
+ logger.error(f"Unexpected error during call stack migration: {e}")
56
+ raise
57
+ finally:
58
+ if conn:
59
+ conn.close()
60
+
61
+
62
+ def find_database_file():
63
+ """Find the metrics database file."""
64
+ # Common locations for the database
65
+ possible_paths = [
66
+ "data/metrics.db",
67
+ "../data/metrics.db",
68
+ "../../data/metrics.db",
69
+ ]
70
+
71
+ for path in possible_paths:
72
+ db_path = Path(path)
73
+ if db_path.exists():
74
+ return str(db_path.absolute())
75
+
76
+ return None
77
+
78
+
79
+ if __name__ == "__main__":
80
+ logger.info("Starting call stack tracking migration...")
81
+
82
+ # Check if database path provided as argument
83
+ if len(sys.argv) > 1:
84
+ db_path = sys.argv[1]
85
+ else:
86
+ db_path = find_database_file()
87
+
88
+ if not db_path:
89
+ logger.error("Could not find metrics database file.")
90
+ logger.info("Please provide the database path as an argument:")
91
+ logger.info("python migrate_call_stack_tracking.py /path/to/metrics.db")
92
+ sys.exit(1)
93
+
94
+ if not Path(db_path).exists():
95
+ logger.error(f"Database file does not exist: {db_path}")
96
+ sys.exit(1)
97
+
98
+ logger.info(f"Using database: {db_path}")
99
+
100
+ try:
101
+ migrate_call_stack_tracking(db_path)
102
+ logger.success("Call stack tracking migration completed!")
103
+ except Exception as e:
104
+ logger.error(f"Migration failed: {e}")
105
+ sys.exit(1)
@@ -0,0 +1,75 @@
1
+ """Migration script to add Phase 1 enhanced token tracking fields."""
2
+
3
+ import sqlite3
4
+ from pathlib import Path
5
+
6
+ from loguru import logger
7
+
8
+ from ..utilities.db_utils import DB_PATH
9
+
10
+
11
+ def migrate_enhanced_tracking():
12
+ """Add Phase 1 enhanced tracking columns to existing token_usage table."""
13
+
14
+ if not Path(DB_PATH).exists():
15
+ logger.info("Database doesn't exist yet, skipping migration")
16
+ return
17
+
18
+ try:
19
+ conn = sqlite3.connect(DB_PATH)
20
+ cursor = conn.cursor()
21
+
22
+ # Check if token_usage table exists
23
+ cursor.execute(
24
+ """
25
+ SELECT name FROM sqlite_master
26
+ WHERE type='table' AND name='token_usage'
27
+ """
28
+ )
29
+
30
+ if not cursor.fetchone():
31
+ logger.info(
32
+ "token_usage table doesn't exist yet, skipping migration"
33
+ )
34
+ conn.close()
35
+ return
36
+
37
+ # Check if enhanced columns already exist
38
+ cursor.execute("PRAGMA table_info(token_usage)")
39
+ columns = [column[1] for column in cursor.fetchall()]
40
+
41
+ # Define new columns to add
42
+ new_columns = [
43
+ ("research_query", "TEXT"),
44
+ ("research_mode", "TEXT"),
45
+ ("research_phase", "TEXT"),
46
+ ("search_iteration", "INTEGER"),
47
+ ("response_time_ms", "INTEGER"),
48
+ ("success_status", "TEXT DEFAULT 'success'"),
49
+ ("error_type", "TEXT"),
50
+ ("search_engines_planned", "TEXT"),
51
+ ("search_engine_selected", "TEXT"),
52
+ ]
53
+
54
+ # Add missing columns
55
+ for column_name, column_type in new_columns:
56
+ if column_name not in columns:
57
+ logger.info(f"Adding column {column_name} to token_usage table")
58
+ cursor.execute(
59
+ f"ALTER TABLE token_usage ADD COLUMN {column_name} {column_type}"
60
+ )
61
+
62
+ conn.commit()
63
+ conn.close()
64
+
65
+ logger.info("Enhanced token tracking migration completed successfully")
66
+
67
+ except Exception as e:
68
+ logger.exception(f"Error during enhanced token tracking migration: {e}")
69
+ if "conn" in locals():
70
+ conn.close()
71
+ raise
72
+
73
+
74
+ if __name__ == "__main__":
75
+ migrate_enhanced_tracking()
@@ -0,0 +1,31 @@
1
+ #!/usr/bin/env python3
2
+ """Migration script to add research ratings table."""
3
+
4
+ import sys
5
+ from pathlib import Path
6
+
7
+ # Add the project root to Python path
8
+ project_root = Path(__file__).parent.parent.parent.parent
9
+ sys.path.insert(0, str(project_root))
10
+
11
+ # Import after path modification
12
+ from local_deep_research.metrics.database import MetricsDatabase # noqa: E402
13
+ from local_deep_research.metrics.db_models import ResearchRating # noqa: E402
14
+
15
+
16
+ def main():
17
+ """Run the migration to add research ratings table."""
18
+ print("Creating research ratings table...")
19
+
20
+ # Initialize database
21
+ db = MetricsDatabase()
22
+
23
+ # Create the research_ratings table
24
+ ResearchRating.__table__.create(db.engine, checkfirst=True)
25
+
26
+ print("✅ Research ratings table created successfully!")
27
+ print("Users can now rate their research sessions on a 1-5 star scale.")
28
+
29
+
30
+ if __name__ == "__main__":
31
+ main()
@@ -0,0 +1,61 @@
1
+ """SQLAlchemy models for metrics."""
2
+
3
+ from sqlalchemy import Column, DateTime, Integer, String, Text, UniqueConstraint
4
+ from sqlalchemy.ext.declarative import declarative_base
5
+ from sqlalchemy.sql import func
6
+
7
+ Base = declarative_base()
8
+
9
+
10
+ class TokenUsage(Base):
11
+ """Model for tracking individual token usage events."""
12
+
13
+ __tablename__ = "token_usage"
14
+
15
+ id = Column(Integer, primary_key=True)
16
+ research_id = Column(Integer, index=True) # No foreign key for now
17
+ model_name = Column(String)
18
+ prompt_tokens = Column(Integer)
19
+ completion_tokens = Column(Integer)
20
+ total_tokens = Column(Integer)
21
+
22
+ # Phase 1 Enhancement: Research context
23
+ research_query = Column(Text)
24
+ research_mode = Column(String) # 'quick' or 'detailed'
25
+ research_phase = Column(String) # 'init', 'iteration_1', etc.
26
+ search_iteration = Column(Integer)
27
+
28
+ # Phase 1 Enhancement: Performance metrics
29
+ response_time_ms = Column(Integer)
30
+ success_status = Column(
31
+ String, default="success"
32
+ ) # 'success', 'error', 'timeout'
33
+ error_type = Column(String)
34
+
35
+ # Phase 1 Enhancement: Search engine context
36
+ search_engines_planned = Column(Text) # JSON array as text
37
+ search_engine_selected = Column(String)
38
+
39
+ # Call stack tracking
40
+ calling_file = Column(String) # File that made the LLM call
41
+ calling_function = Column(String) # Function that made the LLM call
42
+ call_stack = Column(Text) # Full call stack as JSON
43
+
44
+ timestamp = Column(DateTime, server_default=func.now())
45
+
46
+
47
+ class ModelUsage(Base):
48
+ """Model for aggregated token usage by model and research."""
49
+
50
+ __tablename__ = "model_usage"
51
+ __table_args__ = (UniqueConstraint("research_id", "model_name"),)
52
+
53
+ id = Column(Integer, primary_key=True)
54
+ research_id = Column(Integer, index=True) # No foreign key for now
55
+ model_name = Column(String)
56
+ provider = Column(String)
57
+ prompt_tokens = Column(Integer, default=0)
58
+ completion_tokens = Column(Integer, default=0)
59
+ total_tokens = Column(Integer, default=0)
60
+ calls = Column(Integer, default=0)
61
+ timestamp = Column(DateTime, server_default=func.now())
@@ -0,0 +1,12 @@
1
+ """
2
+ LLM Pricing API Module
3
+
4
+ Provides real-time pricing data for LLM models from various providers.
5
+ Includes caching and cost calculation utilities.
6
+ """
7
+
8
+ from .cost_calculator import CostCalculator
9
+ from .pricing_cache import PricingCache
10
+ from .pricing_fetcher import PricingFetcher
11
+
12
+ __all__ = ["PricingFetcher", "PricingCache", "CostCalculator"]
@@ -0,0 +1,237 @@
1
+ """
2
+ Cost Calculator
3
+
4
+ Calculates LLM usage costs based on token usage and pricing data.
5
+ Integrates with pricing fetcher and cache systems.
6
+ """
7
+
8
+ import logging
9
+ from typing import Any, Dict, List, Optional
10
+
11
+ from .pricing_cache import PricingCache
12
+ from .pricing_fetcher import PricingFetcher
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class CostCalculator:
18
+ """Calculates LLM usage costs."""
19
+
20
+ def __init__(self, cache_dir: Optional[str] = None):
21
+ self.cache = PricingCache(cache_dir)
22
+ self.pricing_fetcher = None
23
+
24
+ async def __aenter__(self):
25
+ self.pricing_fetcher = PricingFetcher()
26
+ await self.pricing_fetcher.__aenter__()
27
+ return self
28
+
29
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
30
+ if self.pricing_fetcher:
31
+ await self.pricing_fetcher.__aexit__(exc_type, exc_val, exc_tb)
32
+
33
+ async def get_model_pricing(
34
+ self, model_name: str, provider: str = None
35
+ ) -> Dict[str, float]:
36
+ """Get pricing for a model and provider (cached or fetched)."""
37
+ # Create cache key that includes provider
38
+ cache_key = f"{provider}:{model_name}" if provider else model_name
39
+
40
+ # Try cache first
41
+ cached_pricing = self.cache.get(f"model:{cache_key}")
42
+ if cached_pricing:
43
+ return cached_pricing
44
+
45
+ # Fetch from API
46
+ if self.pricing_fetcher:
47
+ pricing = await self.pricing_fetcher.get_model_pricing(
48
+ model_name, provider
49
+ )
50
+ if pricing:
51
+ self.cache.set(f"model:{cache_key}", pricing)
52
+ return pricing
53
+
54
+ # No pricing found
55
+ logger.warning(
56
+ f"No pricing found for {model_name} (provider: {provider})"
57
+ )
58
+ return None
59
+
60
+ async def calculate_cost(
61
+ self,
62
+ model_name: str,
63
+ prompt_tokens: int,
64
+ completion_tokens: int,
65
+ provider: str = None,
66
+ ) -> Dict[str, float]:
67
+ """
68
+ Calculate cost for a single LLM call.
69
+
70
+ Returns:
71
+ Dict with prompt_cost, completion_cost, total_cost
72
+ """
73
+ pricing = await self.get_model_pricing(model_name, provider)
74
+
75
+ # If no pricing found, return zero cost
76
+ if pricing is None:
77
+ return {
78
+ "prompt_cost": 0.0,
79
+ "completion_cost": 0.0,
80
+ "total_cost": 0.0,
81
+ "pricing_used": None,
82
+ "error": "No pricing data available for this model",
83
+ }
84
+
85
+ # Convert tokens to thousands for pricing calculation
86
+ prompt_cost = (prompt_tokens / 1000) * pricing["prompt"]
87
+ completion_cost = (completion_tokens / 1000) * pricing["completion"]
88
+ total_cost = prompt_cost + completion_cost
89
+
90
+ return {
91
+ "prompt_cost": round(prompt_cost, 6),
92
+ "completion_cost": round(completion_cost, 6),
93
+ "total_cost": round(total_cost, 6),
94
+ "pricing_used": pricing,
95
+ }
96
+
97
+ async def calculate_batch_costs(
98
+ self, usage_records: List[Dict[str, Any]]
99
+ ) -> List[Dict[str, Any]]:
100
+ """
101
+ Calculate costs for multiple usage records.
102
+
103
+ Expected record format:
104
+ {
105
+ "model_name": str,
106
+ "provider": str (optional),
107
+ "prompt_tokens": int,
108
+ "completion_tokens": int,
109
+ "research_id": int (optional),
110
+ "timestamp": datetime (optional)
111
+ }
112
+ """
113
+ results = []
114
+
115
+ for record in usage_records:
116
+ try:
117
+ cost_data = await self.calculate_cost(
118
+ record["model_name"],
119
+ record["prompt_tokens"],
120
+ record["completion_tokens"],
121
+ record.get("provider"),
122
+ )
123
+
124
+ result = {**record, **cost_data}
125
+ results.append(result)
126
+
127
+ except Exception as e:
128
+ logger.error(
129
+ f"Failed to calculate cost for record {record}: {e}"
130
+ )
131
+ # Add record with zero cost on error
132
+ results.append(
133
+ {
134
+ **record,
135
+ "prompt_cost": 0.0,
136
+ "completion_cost": 0.0,
137
+ "total_cost": 0.0,
138
+ "error": str(e),
139
+ }
140
+ )
141
+
142
+ return results
143
+
144
+ def calculate_cost_sync(
145
+ self, model_name: str, prompt_tokens: int, completion_tokens: int
146
+ ) -> Dict[str, float]:
147
+ """
148
+ Synchronous cost calculation using cached pricing only.
149
+ Fallback for when async is not available.
150
+ """
151
+ # Use cached pricing only
152
+ pricing = self.cache.get_model_pricing(model_name)
153
+ if not pricing:
154
+ # Use static fallback with exact matching only
155
+ fetcher = PricingFetcher()
156
+ # Try exact match
157
+ pricing = fetcher.static_pricing.get(model_name)
158
+ if not pricing:
159
+ # Try exact match without provider prefix
160
+ if "/" in model_name:
161
+ model_only = model_name.split("/")[-1]
162
+ pricing = fetcher.static_pricing.get(model_only)
163
+
164
+ # If no pricing found, return zero cost
165
+ if not pricing:
166
+ return {
167
+ "prompt_cost": 0.0,
168
+ "completion_cost": 0.0,
169
+ "total_cost": 0.0,
170
+ "pricing_used": None,
171
+ "error": "No pricing data available for this model",
172
+ }
173
+
174
+ prompt_cost = (prompt_tokens / 1000) * pricing["prompt"]
175
+ completion_cost = (completion_tokens / 1000) * pricing["completion"]
176
+ total_cost = prompt_cost + completion_cost
177
+
178
+ return {
179
+ "prompt_cost": round(prompt_cost, 6),
180
+ "completion_cost": round(completion_cost, 6),
181
+ "total_cost": round(total_cost, 6),
182
+ "pricing_used": pricing,
183
+ }
184
+
185
+ async def get_research_cost_summary(
186
+ self, usage_records: List[Dict[str, Any]]
187
+ ) -> Dict[str, Any]:
188
+ """
189
+ Get cost summary for research session(s).
190
+ """
191
+ costs = await self.calculate_batch_costs(usage_records)
192
+
193
+ total_cost = sum(c["total_cost"] for c in costs)
194
+ total_prompt_cost = sum(c["prompt_cost"] for c in costs)
195
+ total_completion_cost = sum(c["completion_cost"] for c in costs)
196
+
197
+ total_prompt_tokens = sum(r["prompt_tokens"] for r in usage_records)
198
+ total_completion_tokens = sum(
199
+ r["completion_tokens"] for r in usage_records
200
+ )
201
+ total_tokens = total_prompt_tokens + total_completion_tokens
202
+
203
+ # Model breakdown
204
+ model_costs = {}
205
+ for cost in costs:
206
+ model = cost["model_name"]
207
+ if model not in model_costs:
208
+ model_costs[model] = {
209
+ "total_cost": 0.0,
210
+ "prompt_tokens": 0,
211
+ "completion_tokens": 0,
212
+ "calls": 0,
213
+ }
214
+
215
+ model_costs[model]["total_cost"] += cost["total_cost"]
216
+ model_costs[model]["prompt_tokens"] += cost["prompt_tokens"]
217
+ model_costs[model]["completion_tokens"] += cost["completion_tokens"]
218
+ model_costs[model]["calls"] += 1
219
+
220
+ return {
221
+ "total_cost": round(total_cost, 6),
222
+ "prompt_cost": round(total_prompt_cost, 6),
223
+ "completion_cost": round(total_completion_cost, 6),
224
+ "total_tokens": total_tokens,
225
+ "prompt_tokens": total_prompt_tokens,
226
+ "completion_tokens": total_completion_tokens,
227
+ "total_calls": len(usage_records),
228
+ "model_breakdown": model_costs,
229
+ "avg_cost_per_call": (
230
+ round(total_cost / len(usage_records), 6)
231
+ if usage_records
232
+ else 0.0
233
+ ),
234
+ "cost_per_token": (
235
+ round(total_cost / total_tokens, 8) if total_tokens > 0 else 0.0
236
+ ),
237
+ }
@@ -0,0 +1,143 @@
1
+ """
2
+ Pricing Cache System
3
+
4
+ Caches pricing data to avoid repeated API calls and improve performance.
5
+ Includes cache expiration and refresh mechanisms.
6
+ """
7
+
8
+ import json
9
+ import logging
10
+ import time
11
+ from datetime import datetime
12
+ from pathlib import Path
13
+ from typing import Any, Dict, Optional
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class PricingCache:
19
+ """Cache for LLM pricing data."""
20
+
21
+ def __init__(self, cache_dir: Optional[str] = None, cache_ttl: int = 3600):
22
+ """
23
+ Initialize pricing cache.
24
+
25
+ Args:
26
+ cache_dir: Directory to store cache files
27
+ cache_ttl: Cache time-to-live in seconds (default: 1 hour)
28
+ """
29
+ self.cache_ttl = cache_ttl
30
+
31
+ if cache_dir:
32
+ self.cache_dir = Path(cache_dir)
33
+ else:
34
+ # Default to data directory
35
+ self.cache_dir = Path.cwd() / "data" / "cache" / "pricing"
36
+
37
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
38
+ self.cache_file = self.cache_dir / "pricing_cache.json"
39
+
40
+ self._cache = {}
41
+ self._load_cache()
42
+
43
+ def _load_cache(self):
44
+ """Load cache from disk."""
45
+ try:
46
+ if self.cache_file.exists():
47
+ with open(self.cache_file, "r") as f:
48
+ data = json.load(f)
49
+ self._cache = data.get("cache", {})
50
+ logger.info(
51
+ f"Loaded pricing cache with {len(self._cache)} entries"
52
+ )
53
+ except Exception as e:
54
+ logger.warning(f"Failed to load pricing cache: {e}")
55
+ self._cache = {}
56
+
57
+ def _save_cache(self):
58
+ """Save cache to disk."""
59
+ try:
60
+ cache_data = {
61
+ "cache": self._cache,
62
+ "last_updated": datetime.now().isoformat(),
63
+ }
64
+ with open(self.cache_file, "w") as f:
65
+ json.dump(cache_data, f, indent=2)
66
+ except Exception as e:
67
+ logger.warning(f"Failed to save pricing cache: {e}")
68
+
69
+ def _is_expired(self, timestamp: float) -> bool:
70
+ """Check if cache entry is expired."""
71
+ return (time.time() - timestamp) > self.cache_ttl
72
+
73
+ def get(self, key: str) -> Optional[Any]:
74
+ """Get cached pricing data."""
75
+ if key not in self._cache:
76
+ return None
77
+
78
+ entry = self._cache[key]
79
+ if self._is_expired(entry["timestamp"]):
80
+ # Remove expired entry
81
+ del self._cache[key]
82
+ self._save_cache()
83
+ return None
84
+
85
+ return entry["data"]
86
+
87
+ def set(self, key: str, data: Any):
88
+ """Set cached pricing data."""
89
+ self._cache[key] = {"data": data, "timestamp": time.time()}
90
+ self._save_cache()
91
+
92
+ def get_model_pricing(self, model_name: str) -> Optional[Dict[str, float]]:
93
+ """Get cached pricing for a specific model."""
94
+ return self.get(f"model:{model_name}")
95
+
96
+ def set_model_pricing(self, model_name: str, pricing: Dict[str, float]):
97
+ """Cache pricing for a specific model."""
98
+ self.set(f"model:{model_name}", pricing)
99
+
100
+ def get_all_pricing(self) -> Optional[Dict[str, Dict[str, float]]]:
101
+ """Get cached pricing for all models."""
102
+ return self.get("all_models")
103
+
104
+ def set_all_pricing(self, pricing: Dict[str, Dict[str, float]]):
105
+ """Cache pricing for all models."""
106
+ self.set("all_models", pricing)
107
+
108
+ def clear(self):
109
+ """Clear all cached data."""
110
+ self._cache = {}
111
+ self._save_cache()
112
+ logger.info("Pricing cache cleared")
113
+
114
+ def clear_expired(self):
115
+ """Remove expired cache entries."""
116
+ expired_keys = []
117
+ for key, entry in self._cache.items():
118
+ if self._is_expired(entry["timestamp"]):
119
+ expired_keys.append(key)
120
+
121
+ for key in expired_keys:
122
+ del self._cache[key]
123
+
124
+ if expired_keys:
125
+ self._save_cache()
126
+ logger.info(f"Removed {len(expired_keys)} expired cache entries")
127
+
128
+ def get_cache_stats(self) -> Dict[str, Any]:
129
+ """Get cache statistics."""
130
+ total_entries = len(self._cache)
131
+ expired_count = 0
132
+
133
+ for entry in self._cache.values():
134
+ if self._is_expired(entry["timestamp"]):
135
+ expired_count += 1
136
+
137
+ return {
138
+ "total_entries": total_entries,
139
+ "expired_entries": expired_count,
140
+ "valid_entries": total_entries - expired_count,
141
+ "cache_file": str(self.cache_file),
142
+ "cache_ttl": self.cache_ttl,
143
+ }