local-deep-research 0.4.4__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. local_deep_research/__init__.py +7 -0
  2. local_deep_research/__version__.py +1 -1
  3. local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
  4. local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
  5. local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
  6. local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
  7. local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
  8. local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
  9. local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
  10. local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
  11. local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
  12. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
  13. local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
  14. local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
  15. local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
  16. local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
  17. local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
  18. local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
  19. local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
  20. local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
  21. local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
  22. local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
  23. local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
  24. local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
  25. local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
  26. local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
  27. local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
  28. local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
  29. local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
  30. local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
  31. local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
  32. local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
  33. local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
  34. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
  35. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
  36. local_deep_research/advanced_search_system/findings/repository.py +54 -17
  37. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
  38. local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
  39. local_deep_research/advanced_search_system/questions/__init__.py +16 -0
  40. local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
  41. local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
  42. local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
  43. local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
  44. local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
  45. local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
  46. local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
  47. local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
  48. local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
  49. local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
  50. local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
  51. local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
  52. local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
  53. local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
  54. local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
  55. local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
  56. local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
  57. local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
  58. local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
  59. local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
  60. local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
  61. local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
  62. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
  63. local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
  64. local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
  65. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
  66. local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
  67. local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
  68. local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
  69. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
  70. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
  71. local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
  72. local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
  73. local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
  74. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
  75. local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
  76. local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
  77. local_deep_research/api/benchmark_functions.py +6 -2
  78. local_deep_research/api/research_functions.py +10 -4
  79. local_deep_research/benchmarks/__init__.py +9 -7
  80. local_deep_research/benchmarks/benchmark_functions.py +6 -2
  81. local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
  82. local_deep_research/benchmarks/cli.py +38 -13
  83. local_deep_research/benchmarks/comparison/__init__.py +4 -2
  84. local_deep_research/benchmarks/comparison/evaluator.py +316 -239
  85. local_deep_research/benchmarks/datasets/__init__.py +1 -1
  86. local_deep_research/benchmarks/datasets/base.py +91 -72
  87. local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
  88. local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
  89. local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
  90. local_deep_research/benchmarks/datasets/utils.py +48 -29
  91. local_deep_research/benchmarks/datasets.py +4 -11
  92. local_deep_research/benchmarks/efficiency/__init__.py +8 -4
  93. local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
  94. local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
  95. local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
  96. local_deep_research/benchmarks/evaluators/composite.py +6 -2
  97. local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
  98. local_deep_research/benchmarks/graders.py +32 -10
  99. local_deep_research/benchmarks/metrics/README.md +1 -1
  100. local_deep_research/benchmarks/metrics/calculation.py +25 -10
  101. local_deep_research/benchmarks/metrics/reporting.py +7 -3
  102. local_deep_research/benchmarks/metrics/visualization.py +42 -23
  103. local_deep_research/benchmarks/metrics.py +1 -1
  104. local_deep_research/benchmarks/optimization/__init__.py +3 -1
  105. local_deep_research/benchmarks/optimization/api.py +7 -1
  106. local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
  107. local_deep_research/benchmarks/runners.py +48 -15
  108. local_deep_research/citation_handler.py +65 -92
  109. local_deep_research/citation_handlers/__init__.py +15 -0
  110. local_deep_research/citation_handlers/base_citation_handler.py +70 -0
  111. local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
  112. local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
  113. local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
  114. local_deep_research/config/llm_config.py +271 -169
  115. local_deep_research/config/search_config.py +14 -5
  116. local_deep_research/defaults/__init__.py +0 -1
  117. local_deep_research/metrics/__init__.py +13 -0
  118. local_deep_research/metrics/database.py +58 -0
  119. local_deep_research/metrics/db_models.py +115 -0
  120. local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
  121. local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
  122. local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
  123. local_deep_research/metrics/migrate_research_ratings.py +31 -0
  124. local_deep_research/metrics/models.py +61 -0
  125. local_deep_research/metrics/pricing/__init__.py +12 -0
  126. local_deep_research/metrics/pricing/cost_calculator.py +237 -0
  127. local_deep_research/metrics/pricing/pricing_cache.py +143 -0
  128. local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
  129. local_deep_research/metrics/query_utils.py +51 -0
  130. local_deep_research/metrics/search_tracker.py +380 -0
  131. local_deep_research/metrics/token_counter.py +1078 -0
  132. local_deep_research/migrate_db.py +3 -1
  133. local_deep_research/report_generator.py +22 -8
  134. local_deep_research/search_system.py +390 -9
  135. local_deep_research/test_migration.py +15 -5
  136. local_deep_research/utilities/db_utils.py +7 -4
  137. local_deep_research/utilities/es_utils.py +115 -104
  138. local_deep_research/utilities/llm_utils.py +15 -5
  139. local_deep_research/utilities/log_utils.py +151 -0
  140. local_deep_research/utilities/search_cache.py +387 -0
  141. local_deep_research/utilities/search_utilities.py +14 -6
  142. local_deep_research/utilities/threading_utils.py +92 -0
  143. local_deep_research/utilities/url_utils.py +6 -0
  144. local_deep_research/web/api.py +347 -0
  145. local_deep_research/web/app.py +13 -17
  146. local_deep_research/web/app_factory.py +71 -66
  147. local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
  148. local_deep_research/web/database/migrations.py +20 -3
  149. local_deep_research/web/database/models.py +74 -25
  150. local_deep_research/web/database/schema_upgrade.py +49 -29
  151. local_deep_research/web/models/database.py +63 -83
  152. local_deep_research/web/routes/api_routes.py +56 -22
  153. local_deep_research/web/routes/benchmark_routes.py +4 -1
  154. local_deep_research/web/routes/globals.py +22 -0
  155. local_deep_research/web/routes/history_routes.py +71 -46
  156. local_deep_research/web/routes/metrics_routes.py +1155 -0
  157. local_deep_research/web/routes/research_routes.py +192 -54
  158. local_deep_research/web/routes/settings_routes.py +156 -55
  159. local_deep_research/web/services/research_service.py +412 -251
  160. local_deep_research/web/services/resource_service.py +36 -11
  161. local_deep_research/web/services/settings_manager.py +55 -17
  162. local_deep_research/web/services/settings_service.py +12 -4
  163. local_deep_research/web/services/socket_service.py +295 -188
  164. local_deep_research/web/static/css/custom_dropdown.css +180 -0
  165. local_deep_research/web/static/css/styles.css +39 -1
  166. local_deep_research/web/static/js/components/detail.js +633 -267
  167. local_deep_research/web/static/js/components/details.js +751 -0
  168. local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
  169. local_deep_research/web/static/js/components/fallback/ui.js +23 -23
  170. local_deep_research/web/static/js/components/history.js +76 -76
  171. local_deep_research/web/static/js/components/logpanel.js +61 -13
  172. local_deep_research/web/static/js/components/progress.js +13 -2
  173. local_deep_research/web/static/js/components/research.js +99 -12
  174. local_deep_research/web/static/js/components/results.js +239 -106
  175. local_deep_research/web/static/js/main.js +40 -40
  176. local_deep_research/web/static/js/services/audio.js +1 -1
  177. local_deep_research/web/static/js/services/formatting.js +11 -11
  178. local_deep_research/web/static/js/services/keyboard.js +157 -0
  179. local_deep_research/web/static/js/services/pdf.js +80 -80
  180. local_deep_research/web/static/sounds/README.md +1 -1
  181. local_deep_research/web/templates/base.html +1 -0
  182. local_deep_research/web/templates/components/log_panel.html +7 -1
  183. local_deep_research/web/templates/components/mobile_nav.html +1 -1
  184. local_deep_research/web/templates/components/sidebar.html +3 -0
  185. local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
  186. local_deep_research/web/templates/pages/details.html +325 -24
  187. local_deep_research/web/templates/pages/history.html +1 -1
  188. local_deep_research/web/templates/pages/metrics.html +1929 -0
  189. local_deep_research/web/templates/pages/progress.html +2 -2
  190. local_deep_research/web/templates/pages/research.html +53 -17
  191. local_deep_research/web/templates/pages/results.html +12 -1
  192. local_deep_research/web/templates/pages/star_reviews.html +803 -0
  193. local_deep_research/web/utils/formatters.py +9 -3
  194. local_deep_research/web_search_engines/default_search_engines.py +5 -3
  195. local_deep_research/web_search_engines/engines/full_search.py +8 -2
  196. local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
  197. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
  198. local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
  199. local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
  200. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
  201. local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
  202. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
  203. local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
  204. local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
  205. local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
  206. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
  207. local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
  208. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
  209. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
  210. local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
  211. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
  212. local_deep_research/web_search_engines/search_engine_base.py +83 -35
  213. local_deep_research/web_search_engines/search_engine_factory.py +25 -8
  214. local_deep_research/web_search_engines/search_engines_config.py +9 -3
  215. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/METADATA +7 -1
  216. local_deep_research-0.5.2.dist-info/RECORD +265 -0
  217. local_deep_research-0.4.4.dist-info/RECORD +0 -177
  218. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/WHEEL +0 -0
  219. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/entry_points.txt +0 -0
  220. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/licenses/LICENSE +0 -0
@@ -2,10 +2,9 @@
2
2
  Elasticsearch utilities for indexing and managing documents.
3
3
  """
4
4
 
5
- import json
6
5
  import logging
7
6
  import os
8
- from typing import Any, Dict, List, Optional, Union
7
+ from typing import Any, Dict, List, Optional
9
8
 
10
9
  from elasticsearch import Elasticsearch
11
10
  from elasticsearch.helpers import bulk
@@ -16,11 +15,11 @@ logger = logging.getLogger(__name__)
16
15
  class ElasticsearchManager:
17
16
  """
18
17
  Utility class for managing Elasticsearch indices and documents.
19
-
18
+
20
19
  This class provides methods for creating indices, indexing documents,
21
20
  and performing other Elasticsearch management tasks.
22
21
  """
23
-
22
+
24
23
  def __init__(
25
24
  self,
26
25
  hosts: List[str] = ["http://localhost:9200"],
@@ -31,7 +30,7 @@ class ElasticsearchManager:
31
30
  ):
32
31
  """
33
32
  Initialize the Elasticsearch manager.
34
-
33
+
35
34
  Args:
36
35
  hosts: List of Elasticsearch hosts
37
36
  username: Optional username for authentication
@@ -41,31 +40,37 @@ class ElasticsearchManager:
41
40
  """
42
41
  # Initialize the Elasticsearch client
43
42
  es_args = {}
44
-
43
+
45
44
  # Basic authentication
46
45
  if username and password:
47
46
  es_args["basic_auth"] = (username, password)
48
-
47
+
49
48
  # API key authentication
50
49
  if api_key:
51
50
  es_args["api_key"] = api_key
52
-
51
+
53
52
  # Cloud ID for Elastic Cloud
54
53
  if cloud_id:
55
54
  es_args["cloud_id"] = cloud_id
56
-
55
+
57
56
  # Connect to Elasticsearch
58
57
  self.client = Elasticsearch(hosts, **es_args)
59
-
58
+
60
59
  # Verify connection
61
60
  try:
62
61
  info = self.client.info()
63
- logger.info(f"Connected to Elasticsearch cluster: {info.get('cluster_name')}")
64
- logger.info(f"Elasticsearch version: {info.get('version', {}).get('number')}")
62
+ logger.info(
63
+ f"Connected to Elasticsearch cluster: {info.get('cluster_name')}"
64
+ )
65
+ logger.info(
66
+ f"Elasticsearch version: {info.get('version', {}).get('number')}"
67
+ )
65
68
  except Exception as e:
66
69
  logger.error(f"Failed to connect to Elasticsearch: {str(e)}")
67
- raise ConnectionError(f"Could not connect to Elasticsearch: {str(e)}")
68
-
70
+ raise ConnectionError(
71
+ f"Could not connect to Elasticsearch: {str(e)}"
72
+ )
73
+
69
74
  def create_index(
70
75
  self,
71
76
  index_name: str,
@@ -74,21 +79,23 @@ class ElasticsearchManager:
74
79
  ) -> bool:
75
80
  """
76
81
  Create an Elasticsearch index with optional mappings and settings.
77
-
82
+
78
83
  Args:
79
84
  index_name: Name of the index to create
80
85
  mappings: Optional mappings for the index fields
81
86
  settings: Optional settings for the index
82
-
87
+
83
88
  Returns:
84
89
  bool: True if successful, False otherwise
85
90
  """
86
91
  try:
87
92
  # Check if index already exists
88
93
  if self.client.indices.exists(index=index_name):
89
- logger.warning(f"Index '{index_name}' already exists - skipping creation")
94
+ logger.warning(
95
+ f"Index '{index_name}' already exists - skipping creation"
96
+ )
90
97
  return True
91
-
98
+
92
99
  # Default mappings for better text search if none provided
93
100
  if mappings is None:
94
101
  mappings = {
@@ -99,83 +106,69 @@ class ElasticsearchManager:
99
106
  "fields": {
100
107
  "keyword": {
101
108
  "type": "keyword",
102
- "ignore_above": 256
109
+ "ignore_above": 256,
103
110
  }
104
- }
105
- },
106
- "content": {
107
- "type": "text",
108
- "analyzer": "standard"
109
- },
110
- "url": {
111
- "type": "keyword"
112
- },
113
- "source": {
114
- "type": "keyword"
115
- },
116
- "timestamp": {
117
- "type": "date"
111
+ },
118
112
  },
119
- "metadata": {
120
- "type": "object",
121
- "enabled": True
122
- }
113
+ "content": {"type": "text", "analyzer": "standard"},
114
+ "url": {"type": "keyword"},
115
+ "source": {"type": "keyword"},
116
+ "timestamp": {"type": "date"},
117
+ "metadata": {"type": "object", "enabled": True},
123
118
  }
124
119
  }
125
-
120
+
126
121
  # Default settings if none provided
127
122
  if settings is None:
128
123
  settings = {
129
124
  "number_of_shards": 1,
130
125
  "number_of_replicas": 0,
131
126
  "analysis": {
132
- "analyzer": {
133
- "standard": {
134
- "type": "standard"
135
- }
136
- }
137
- }
127
+ "analyzer": {"standard": {"type": "standard"}}
128
+ },
138
129
  }
139
-
130
+
140
131
  # Create the index with mappings and settings
141
132
  create_response = self.client.indices.create(
142
133
  index=index_name,
143
134
  mappings=mappings,
144
135
  settings=settings,
145
136
  )
146
-
137
+
147
138
  logger.info(f"Created index '{index_name}': {create_response}")
148
139
  return True
149
-
140
+
150
141
  except Exception as e:
151
142
  logger.error(f"Error creating index '{index_name}': {str(e)}")
152
143
  return False
153
-
144
+
154
145
  def delete_index(self, index_name: str) -> bool:
155
146
  """
156
147
  Delete an Elasticsearch index.
157
-
148
+
158
149
  Args:
159
150
  index_name: Name of the index to delete
160
-
151
+
161
152
  Returns:
162
153
  bool: True if successful, False otherwise
163
154
  """
164
155
  try:
165
156
  # Check if index exists
166
157
  if not self.client.indices.exists(index=index_name):
167
- logger.warning(f"Index '{index_name}' does not exist - skipping deletion")
158
+ logger.warning(
159
+ f"Index '{index_name}' does not exist - skipping deletion"
160
+ )
168
161
  return True
169
-
162
+
170
163
  # Delete the index
171
164
  delete_response = self.client.indices.delete(index=index_name)
172
165
  logger.info(f"Deleted index '{index_name}': {delete_response}")
173
166
  return True
174
-
167
+
175
168
  except Exception as e:
176
169
  logger.error(f"Error deleting index '{index_name}': {str(e)}")
177
170
  return False
178
-
171
+
179
172
  def index_document(
180
173
  self,
181
174
  index_name: str,
@@ -185,13 +178,13 @@ class ElasticsearchManager:
185
178
  ) -> Optional[str]:
186
179
  """
187
180
  Index a single document in Elasticsearch.
188
-
181
+
189
182
  Args:
190
183
  index_name: Name of the index to add the document to
191
184
  document: The document to index
192
185
  document_id: Optional document ID (will be generated if not provided)
193
186
  refresh: Whether to refresh the index after indexing
194
-
187
+
195
188
  Returns:
196
189
  str: Document ID if successful, None otherwise
197
190
  """
@@ -203,14 +196,16 @@ class ElasticsearchManager:
203
196
  id=document_id,
204
197
  refresh=refresh,
205
198
  )
206
-
207
- logger.info(f"Indexed document in '{index_name}' with ID: {response['_id']}")
199
+
200
+ logger.info(
201
+ f"Indexed document in '{index_name}' with ID: {response['_id']}"
202
+ )
208
203
  return response["_id"]
209
-
204
+
210
205
  except Exception as e:
211
206
  logger.error(f"Error indexing document in '{index_name}': {str(e)}")
212
207
  return None
213
-
208
+
214
209
  def bulk_index_documents(
215
210
  self,
216
211
  index_name: str,
@@ -220,13 +215,13 @@ class ElasticsearchManager:
220
215
  ) -> int:
221
216
  """
222
217
  Bulk index multiple documents in Elasticsearch.
223
-
218
+
224
219
  Args:
225
220
  index_name: Name of the index to add the documents to
226
221
  documents: List of documents to index
227
222
  id_field: Optional field in the documents to use as the document ID
228
223
  refresh: Whether to refresh the index after indexing
229
-
224
+
230
225
  Returns:
231
226
  int: Number of successfully indexed documents
232
227
  """
@@ -238,13 +233,13 @@ class ElasticsearchManager:
238
233
  "_index": index_name,
239
234
  "_source": doc,
240
235
  }
241
-
236
+
242
237
  # Use the specified field as the document ID if provided
243
238
  if id_field and id_field in doc:
244
239
  action["_id"] = doc[id_field]
245
-
240
+
246
241
  actions.append(action)
247
-
242
+
248
243
  # Execute the bulk indexing
249
244
  success, failed = bulk(
250
245
  self.client,
@@ -252,14 +247,18 @@ class ElasticsearchManager:
252
247
  refresh=refresh,
253
248
  stats_only=True,
254
249
  )
255
-
256
- logger.info(f"Bulk indexed {success} documents in '{index_name}', failed: {failed}")
250
+
251
+ logger.info(
252
+ f"Bulk indexed {success} documents in '{index_name}', failed: {failed}"
253
+ )
257
254
  return success
258
-
255
+
259
256
  except Exception as e:
260
- logger.error(f"Error bulk indexing documents in '{index_name}': {str(e)}")
257
+ logger.error(
258
+ f"Error bulk indexing documents in '{index_name}': {str(e)}"
259
+ )
261
260
  return 0
262
-
261
+
263
262
  def index_file(
264
263
  self,
265
264
  index_name: str,
@@ -271,7 +270,7 @@ class ElasticsearchManager:
271
270
  ) -> Optional[str]:
272
271
  """
273
272
  Index a file in Elasticsearch, extracting text content and metadata.
274
-
273
+
275
274
  Args:
276
275
  index_name: Name of the index to add the document to
277
276
  file_path: Path to the file to index
@@ -279,53 +278,59 @@ class ElasticsearchManager:
279
278
  title_field: Field name to store the file title (filename if not specified)
280
279
  extract_metadata: Whether to extract file metadata
281
280
  refresh: Whether to refresh the index after indexing
282
-
281
+
283
282
  Returns:
284
283
  str: Document ID if successful, None otherwise
285
284
  """
286
285
  try:
287
- from langchain_community.document_loaders import UnstructuredFileLoader
288
-
286
+ from langchain_community.document_loaders import (
287
+ UnstructuredFileLoader,
288
+ )
289
+
289
290
  # Extract file content and metadata
290
291
  loader = UnstructuredFileLoader(file_path)
291
292
  documents = loader.load()
292
-
293
+
293
294
  # Combine all content from the documents
294
295
  content = "\n\n".join([doc.page_content for doc in documents])
295
-
296
+
296
297
  # Get the filename for the title
297
298
  filename = os.path.basename(file_path)
298
299
  title = filename
299
-
300
+
300
301
  # Prepare the document
301
302
  document = {
302
303
  content_field: content,
303
304
  }
304
-
305
+
305
306
  # Add title if requested
306
307
  if title_field:
307
308
  document[title_field] = title
308
-
309
+
309
310
  # Add metadata if requested
310
311
  if extract_metadata and documents:
311
312
  # Include metadata from the first document
312
313
  document["metadata"] = documents[0].metadata
313
-
314
+
314
315
  # Add file-specific metadata
315
316
  document["source"] = file_path
316
- document["file_extension"] = os.path.splitext(filename)[1].lstrip(".")
317
+ document["file_extension"] = os.path.splitext(filename)[
318
+ 1
319
+ ].lstrip(".")
317
320
  document["filename"] = filename
318
-
321
+
319
322
  # Index the document
320
323
  return self.index_document(index_name, document, refresh=refresh)
321
-
324
+
322
325
  except ImportError:
323
- logger.error("UnstructuredFileLoader not available. Please install the 'unstructured' package.")
326
+ logger.error(
327
+ "UnstructuredFileLoader not available. Please install the 'unstructured' package."
328
+ )
324
329
  return None
325
330
  except Exception as e:
326
331
  logger.error(f"Error indexing file '{file_path}': {str(e)}")
327
332
  return None
328
-
333
+
329
334
  def index_directory(
330
335
  self,
331
336
  index_name: str,
@@ -338,7 +343,7 @@ class ElasticsearchManager:
338
343
  ) -> int:
339
344
  """
340
345
  Index all matching files in a directory in Elasticsearch.
341
-
346
+
342
347
  Args:
343
348
  index_name: Name of the index to add the documents to
344
349
  directory_path: Path to the directory containing files to index
@@ -347,22 +352,24 @@ class ElasticsearchManager:
347
352
  title_field: Field name to store the file title
348
353
  extract_metadata: Whether to extract file metadata
349
354
  refresh: Whether to refresh the index after indexing
350
-
355
+
351
356
  Returns:
352
357
  int: Number of successfully indexed files
353
358
  """
354
359
  try:
355
360
  import glob
356
-
361
+
357
362
  # Find all matching files
358
363
  all_files = []
359
364
  for pattern in file_patterns:
360
365
  pattern_path = os.path.join(directory_path, pattern)
361
366
  matching_files = glob.glob(pattern_path)
362
367
  all_files.extend(matching_files)
363
-
364
- logger.info(f"Found {len(all_files)} files matching patterns {file_patterns} in {directory_path}")
365
-
368
+
369
+ logger.info(
370
+ f"Found {len(all_files)} files matching patterns {file_patterns} in {directory_path}"
371
+ )
372
+
366
373
  # Index each file
367
374
  successful_count = 0
368
375
  for file_path in all_files:
@@ -375,17 +382,21 @@ class ElasticsearchManager:
375
382
  extract_metadata=extract_metadata,
376
383
  refresh=refresh,
377
384
  )
378
-
385
+
379
386
  if doc_id:
380
387
  successful_count += 1
381
-
382
- logger.info(f"Successfully indexed {successful_count} files out of {len(all_files)}")
388
+
389
+ logger.info(
390
+ f"Successfully indexed {successful_count} files out of {len(all_files)}"
391
+ )
383
392
  return successful_count
384
-
393
+
385
394
  except Exception as e:
386
- logger.error(f"Error indexing directory '{directory_path}': {str(e)}")
395
+ logger.error(
396
+ f"Error indexing directory '{directory_path}': {str(e)}"
397
+ )
387
398
  return 0
388
-
399
+
389
400
  def search(
390
401
  self,
391
402
  index_name: str,
@@ -396,14 +407,14 @@ class ElasticsearchManager:
396
407
  ) -> Dict[str, Any]:
397
408
  """
398
409
  Search for documents in Elasticsearch.
399
-
410
+
400
411
  Args:
401
412
  index_name: Name of the index to search
402
413
  query: Search query
403
414
  fields: Fields to search in
404
415
  size: Maximum number of results to return
405
416
  highlight: Whether to include highlighted excerpts in results
406
-
417
+
407
418
  Returns:
408
419
  Dict: Elasticsearch search response
409
420
  """
@@ -419,7 +430,7 @@ class ElasticsearchManager:
419
430
  },
420
431
  "size": size,
421
432
  }
422
-
433
+
423
434
  # Add highlighting if requested
424
435
  if highlight:
425
436
  search_query["highlight"] = {
@@ -427,15 +438,15 @@ class ElasticsearchManager:
427
438
  "pre_tags": ["<em>"],
428
439
  "post_tags": ["</em>"],
429
440
  }
430
-
441
+
431
442
  # Execute the search
432
443
  response = self.client.search(
433
444
  index=index_name,
434
445
  body=search_query,
435
446
  )
436
-
447
+
437
448
  return response
438
-
449
+
439
450
  except Exception as e:
440
451
  logger.error(f"Error searching index '{index_name}': {str(e)}")
441
- return {"error": str(e)}
452
+ return {"error": str(e)}
@@ -78,7 +78,9 @@ def get_model(
78
78
  api_key = os.getenv("OPENAI_API_KEY")
79
79
  if not api_key:
80
80
  raise ValueError("OPENAI_API_KEY environment variable not set")
81
- return ChatOpenAI(model=model_name, api_key=api_key, **common_params)
81
+ return ChatOpenAI(
82
+ model=model_name, api_key=api_key, **common_params
83
+ )
82
84
  except ImportError:
83
85
  logger.error("langchain_openai not available")
84
86
  raise
@@ -89,7 +91,9 @@ def get_model(
89
91
 
90
92
  api_key = os.getenv("ANTHROPIC_API_KEY")
91
93
  if not api_key:
92
- raise ValueError("ANTHROPIC_API_KEY environment variable not set")
94
+ raise ValueError(
95
+ "ANTHROPIC_API_KEY environment variable not set"
96
+ )
93
97
  return ChatAnthropic(
94
98
  model=model_name, anthropic_api_key=api_key, **common_params
95
99
  )
@@ -103,7 +107,9 @@ def get_model(
103
107
 
104
108
  api_key = os.getenv("OPENAI_ENDPOINT_API_KEY")
105
109
  if not api_key:
106
- raise ValueError("OPENAI_ENDPOINT_API_KEY environment variable not set")
110
+ raise ValueError(
111
+ "OPENAI_ENDPOINT_API_KEY environment variable not set"
112
+ )
107
113
 
108
114
  endpoint_url = kwargs.get(
109
115
  "OPENAI_ENDPOINT_URL", "https://openrouter.ai/api/v1"
@@ -113,7 +119,9 @@ def get_model(
113
119
  "OPENAI_ENDPOINT_REQUIRES_MODEL", True
114
120
  ):
115
121
  return ChatOpenAI(
116
- api_key=api_key, openai_api_base=endpoint_url, **common_params
122
+ api_key=api_key,
123
+ openai_api_base=endpoint_url,
124
+ **common_params,
117
125
  )
118
126
  else:
119
127
  return ChatOpenAI(
@@ -130,7 +138,9 @@ def get_model(
130
138
  try:
131
139
  from langchain_ollama import ChatOllama
132
140
 
133
- logger.warning(f"Unknown model type '{model_type}', defaulting to Ollama")
141
+ logger.warning(
142
+ f"Unknown model type '{model_type}', defaulting to Ollama"
143
+ )
134
144
  return ChatOllama(model=model_name, **common_params)
135
145
  except (ImportError, Exception) as e:
136
146
  logger.error(f"Failed to load any model: {e}")