MindsDB 24.12.4.0__py3-none-any.whl → 25.1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- MindsDB-25.1.2.1.dist-info/LICENSE +85 -0
- {MindsDB-24.12.4.0.dist-info → MindsDB-25.1.2.1.dist-info}/METADATA +336 -302
- {MindsDB-24.12.4.0.dist-info → MindsDB-25.1.2.1.dist-info}/RECORD +21 -18
- {MindsDB-24.12.4.0.dist-info → MindsDB-25.1.2.1.dist-info}/WHEEL +1 -1
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +3 -7
- mindsdb/integrations/handlers/github_handler/github_tables.py +3 -0
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +5 -2
- mindsdb/integrations/libs/api_handler.py +9 -1
- mindsdb/integrations/utilities/rag/pipelines/rag.py +11 -0
- mindsdb/integrations/utilities/rag/rag_pipeline_builder.py +16 -1
- mindsdb/integrations/utilities/rag/retrievers/__init__.py +3 -0
- mindsdb/integrations/utilities/rag/retrievers/multi_hop_retriever.py +85 -0
- mindsdb/integrations/utilities/rag/retrievers/retriever_factory.py +57 -0
- mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +41 -2
- mindsdb/integrations/utilities/rag/settings.py +158 -6
- mindsdb/integrations/utilities/sql_utils.py +1 -0
- mindsdb/interfaces/knowledge_base/controller.py +8 -3
- mindsdb/interfaces/storage/fs.py +38 -8
- mindsdb/utilities/starters.py +33 -0
- MindsDB-24.12.4.0.dist-info/LICENSE +0 -199
- {MindsDB-24.12.4.0.dist-info → MindsDB-25.1.2.1.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
mindsdb/__about__.py,sha256=
|
|
1
|
+
mindsdb/__about__.py,sha256=ci8TSgEG6Owf9XVPslMY4JRnZARnpEln_MZ72ETEzAI,444
|
|
2
2
|
mindsdb/__init__.py,sha256=fZopLiAYa9MzMZ0d48JgHc_LddfFKDzh7n_8icsjrVs,54
|
|
3
|
-
mindsdb/__main__.py,sha256=
|
|
3
|
+
mindsdb/__main__.py,sha256=Wdv3C8I7owpBwTxnMVd-Zoim6nIVFA62g2wy6dT9CLw,21419
|
|
4
4
|
mindsdb/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
5
|
mindsdb/api/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
mindsdb/api/common/check_auth.py,sha256=cQEZqsnCbrRtUf8j4H6uPF98cDPu79t8TdtwBi5g30w,1345
|
|
@@ -649,7 +649,7 @@ mindsdb/integrations/handlers/github_handler/__about__.py,sha256=bDVOGS37C6HVRgS
|
|
|
649
649
|
mindsdb/integrations/handlers/github_handler/__init__.py,sha256=ov-CvC4-1IbwvuKykY7i3wJxHnYenbU8KtNZUSRzB-0,629
|
|
650
650
|
mindsdb/integrations/handlers/github_handler/connection_args.py,sha256=jnJKzK-5UI5-GZVh57iWaYXkAlhnc8p_4vUn357wXow,911
|
|
651
651
|
mindsdb/integrations/handlers/github_handler/github_handler.py,sha256=EKK1e0fOQsvZAyas0ZT8cjFDyhtXdAwlNOmR6QVAcpo,3746
|
|
652
|
-
mindsdb/integrations/handlers/github_handler/github_tables.py,sha256=
|
|
652
|
+
mindsdb/integrations/handlers/github_handler/github_tables.py,sha256=ICt2HmrE0TSI_dEFgVMA00INoL5z5jhA8aKH9C_EiiA,30970
|
|
653
653
|
mindsdb/integrations/handlers/github_handler/icon.svg,sha256=c1j3IrEwYsGXPwW-hjZDpOceLhVCW_0tT8Aba5PdF5c,1198
|
|
654
654
|
mindsdb/integrations/handlers/github_handler/requirements.txt,sha256=ggn1M2VGEZcJPTW2vei8pt2sHD5Iifca4oQ13CyMHqY,8
|
|
655
655
|
mindsdb/integrations/handlers/gitlab_handler/__about__.py,sha256=2uH49-_UriKLYrfBa8yIU6j1L9Bxpyp5Gqm7zsTT7eA,341
|
|
@@ -1170,7 +1170,7 @@ mindsdb/integrations/handlers/pgvector_handler/__about__.py,sha256=f7NEmnT5v8Bhc
|
|
|
1170
1170
|
mindsdb/integrations/handlers/pgvector_handler/__init__.py,sha256=291L7daFcaNnMUEcIjs7-U-jgOTJzEvIm2FoO43S_6Q,659
|
|
1171
1171
|
mindsdb/integrations/handlers/pgvector_handler/connection_args.py,sha256=etSu8X9uvYcdG0UZP7N8NdKCywmpcMf19ZPtthZArMg,1688
|
|
1172
1172
|
mindsdb/integrations/handlers/pgvector_handler/icon.svg,sha256=BPrdgXF1gRp2IBmklyYNRpdGtbi1F6Ca78V_L4ji_LE,13760
|
|
1173
|
-
mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py,sha256=
|
|
1173
|
+
mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py,sha256=N0b-9vKbYj8e7ZYxT4nG1FUwji4fTFceCjI2T_wzqZQ,17723
|
|
1174
1174
|
mindsdb/integrations/handlers/pgvector_handler/requirements.txt,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1175
1175
|
mindsdb/integrations/handlers/phoenix_handler/__about__.py,sha256=PGGn5y0Y7tn2FnY2Ru1N7yjr6KZb8IhfUoKFc7GZO9I,359
|
|
1176
1176
|
mindsdb/integrations/handlers/phoenix_handler/__init__.py,sha256=dguuDcpGTUdL7KHbLPv3OLY9fmvJrQj5I_CsfmuQdKk,606
|
|
@@ -1676,7 +1676,7 @@ mindsdb/integrations/handlers/zotero_handler/requirements.txt,sha256=uRY96N9ioKv
|
|
|
1676
1676
|
mindsdb/integrations/handlers/zotero_handler/zotero_handler.py,sha256=CVmTS9Cqj85xWt1RWR8BKJDh9h-dUYLXWIkYHI5ncfk,3575
|
|
1677
1677
|
mindsdb/integrations/handlers/zotero_handler/zotero_tables.py,sha256=5uTXP3fYAQ6rKgWKKVRc2x0Pn1u4rNMZUHm01ewC9RA,4861
|
|
1678
1678
|
mindsdb/integrations/libs/__init__.py,sha256=uEz-XQLAwY2nMXc5ilEPP6cWWfo5HpO8o8UfV8JELS0,99
|
|
1679
|
-
mindsdb/integrations/libs/api_handler.py,sha256=
|
|
1679
|
+
mindsdb/integrations/libs/api_handler.py,sha256=PutrH8LuiEj7SNKb6Nl3ahaC6LieaB8VlG6RQL23w0w,13504
|
|
1680
1680
|
mindsdb/integrations/libs/api_handler_exceptions.py,sha256=mw83eTmo9knpVHP1ISnudonZcBMI_Xzr77b1wXN-eu8,236
|
|
1681
1681
|
mindsdb/integrations/libs/base.py,sha256=fVs3nf98jfA9aH5O18ZlrGjhZuasYHwD9TrgF9fJ8Eo,12851
|
|
1682
1682
|
mindsdb/integrations/libs/const.py,sha256=Pbdv7K_SvOWSwANwu4FK2S0jkJYaRnVZpfx4SexxR8c,407
|
|
@@ -1704,7 +1704,7 @@ mindsdb/integrations/utilities/date_utils.py,sha256=TqCyde_jbknQnrJqYIkNwEHUg-ds
|
|
|
1704
1704
|
mindsdb/integrations/utilities/handler_utils.py,sha256=UEgEckWFDZXLcu5AbBFrqPJuFFXgejDmDRkr6622CPc,2272
|
|
1705
1705
|
mindsdb/integrations/utilities/install.py,sha256=wbg0pcIn8C8PEfjA45DmwueEZ5nX27t2YsLe1xXhC7s,5018
|
|
1706
1706
|
mindsdb/integrations/utilities/query_traversal.py,sha256=oUSsBCwDcMBr-vaUYH5I8koMPDpJDXe0ByxPD0EIG0g,9292
|
|
1707
|
-
mindsdb/integrations/utilities/sql_utils.py,sha256=
|
|
1707
|
+
mindsdb/integrations/utilities/sql_utils.py,sha256=swvTNVI6K49wjtn4kqNhII7ftkLTTTSr_BTnlVpEj8c,6704
|
|
1708
1708
|
mindsdb/integrations/utilities/test_utils.py,sha256=eplCMcVjOsrXRhIhAUhgOPIt2zNiyUV67BYnJ2lvPiE,691
|
|
1709
1709
|
mindsdb/integrations/utilities/time_series_utils.py,sha256=qWVqZaXW7gdVM3jJ6WWYt1VP4WoFmaKt7jhNU6OpMvE,8312
|
|
1710
1710
|
mindsdb/integrations/utilities/utils.py,sha256=TuIgAbuZVkCRUSgLmqJ2STZ1CxVgBGrEnajW68SsKg0,972
|
|
@@ -1734,8 +1734,8 @@ mindsdb/integrations/utilities/handlers/validation_utilities/__init__.py,sha256=
|
|
|
1734
1734
|
mindsdb/integrations/utilities/handlers/validation_utilities/parameter_validation_utilities.py,sha256=AWGzBulx0tlN8d5uVD2yGvujJHoT4ZVKybA_5y3JzTU,681
|
|
1735
1735
|
mindsdb/integrations/utilities/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1736
1736
|
mindsdb/integrations/utilities/rag/config_loader.py,sha256=3m_hdTugNxbTevU79AMNzK-tjObpj5JBvpGMBZB0Iuw,3573
|
|
1737
|
-
mindsdb/integrations/utilities/rag/rag_pipeline_builder.py,sha256=
|
|
1738
|
-
mindsdb/integrations/utilities/rag/settings.py,sha256=
|
|
1737
|
+
mindsdb/integrations/utilities/rag/rag_pipeline_builder.py,sha256=0RhyafFoQPl1aniRYcOu57aljfqKqj_p0cNb_bfOrc8,3742
|
|
1738
|
+
mindsdb/integrations/utilities/rag/settings.py,sha256=kaaWn1lMY68U0rekPyVBlUCjN_i3f19qlwsDFUfmoe8,23331
|
|
1739
1739
|
mindsdb/integrations/utilities/rag/utils.py,sha256=AAMW1gybfAntUkAPb9AYUeWZUMtZAwWaYiLJcTHNB4A,1620
|
|
1740
1740
|
mindsdb/integrations/utilities/rag/vector_store.py,sha256=EwCdCf0dXwJXKOYfqTUPWEDOPLumWl2EKQiiXzgy8XA,3782
|
|
1741
1741
|
mindsdb/integrations/utilities/rag/chains/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -1747,14 +1747,16 @@ mindsdb/integrations/utilities/rag/loaders/vector_store_loader/__init__.py,sha25
|
|
|
1747
1747
|
mindsdb/integrations/utilities/rag/loaders/vector_store_loader/pgvector.py,sha256=d3ZN0aTOm7HYzZZLtnHmnKyiwY2tS2p_qPIa_m5KoGU,2455
|
|
1748
1748
|
mindsdb/integrations/utilities/rag/loaders/vector_store_loader/vector_store_loader.py,sha256=Da8UVQeOthtzjAr6Zfem1_KoCPKfqOqj0FtdBY08CRU,2120
|
|
1749
1749
|
mindsdb/integrations/utilities/rag/pipelines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1750
|
-
mindsdb/integrations/utilities/rag/pipelines/rag.py,sha256=
|
|
1750
|
+
mindsdb/integrations/utilities/rag/pipelines/rag.py,sha256=BFCj361hjfYd7UsxeLsZo0jADdYmNIoviHyeCaR50po,12343
|
|
1751
1751
|
mindsdb/integrations/utilities/rag/rerankers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1752
1752
|
mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py,sha256=WS5rEpochjp5esGCnScm0lI2Oawu-ZKDEiDFJvM1D8M,6430
|
|
1753
|
-
mindsdb/integrations/utilities/rag/retrievers/__init__.py,sha256=
|
|
1753
|
+
mindsdb/integrations/utilities/rag/retrievers/__init__.py,sha256=Kuo3AJxzHVXMxPFxGqz2AXNPzjBzyMuk2yQj9pFpOsI,128
|
|
1754
1754
|
mindsdb/integrations/utilities/rag/retrievers/auto_retriever.py,sha256=ODNXqeBuDfatGQLvKvogO0aA-A5v3Z4xbCbvO5ICvt4,3923
|
|
1755
1755
|
mindsdb/integrations/utilities/rag/retrievers/base.py,sha256=fomZCUibDLKg-g4_uoTWz6OlhRG-GzqdPPoAR6XyPtk,264
|
|
1756
|
+
mindsdb/integrations/utilities/rag/retrievers/multi_hop_retriever.py,sha256=wC2M3Vsgzs5Nu6uEuD4YQZZU9W8eW_bc7RrrqvN38mk,3319
|
|
1756
1757
|
mindsdb/integrations/utilities/rag/retrievers/multi_vector_retriever.py,sha256=D9QzIRZWQ6LrT892twdgJj287_BlVEmXRQLYQegQuVA,4383
|
|
1757
|
-
mindsdb/integrations/utilities/rag/retrievers/
|
|
1758
|
+
mindsdb/integrations/utilities/rag/retrievers/retriever_factory.py,sha256=knmGLJNEG8x4KFhUYQiCIpghR5yEEeu_tonSUMUqXAQ,2205
|
|
1759
|
+
mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py,sha256=4j0IOR8t6rXaS4Sca0EEklk2c6JYO87qvD8E6T9qDlA,8482
|
|
1758
1760
|
mindsdb/integrations/utilities/rag/splitters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1759
1761
|
mindsdb/integrations/utilities/rag/splitters/file_splitter.py,sha256=O14E_27omTti4jsxhgTiwHtlR2LdCa9D2DiEgc7yKmc,5260
|
|
1760
1762
|
mindsdb/interfaces/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
@@ -1789,7 +1791,7 @@ mindsdb/interfaces/jobs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG
|
|
|
1789
1791
|
mindsdb/interfaces/jobs/jobs_controller.py,sha256=xBleXIpGLZ_Sg3j5e7BeTRV-Hp6ELMuFuQwtVZyQ72s,18247
|
|
1790
1792
|
mindsdb/interfaces/jobs/scheduler.py,sha256=m_C-QiTExljq0ilpe4vQiQv56AIWsrtfcdo0krMYQes,3664
|
|
1791
1793
|
mindsdb/interfaces/knowledge_base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1792
|
-
mindsdb/interfaces/knowledge_base/controller.py,sha256=
|
|
1794
|
+
mindsdb/interfaces/knowledge_base/controller.py,sha256=aOpyBOHL0Ea5aKgF-DJHbFeY6PdiQZ6doZGPJbhlCjw,34394
|
|
1793
1795
|
mindsdb/interfaces/knowledge_base/preprocessing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1794
1796
|
mindsdb/interfaces/knowledge_base/preprocessing/constants.py,sha256=0sLB2GOQhh3d46WNcVPF0iTmJc01CIXJoPT99XktuMo,295
|
|
1795
1797
|
mindsdb/interfaces/knowledge_base/preprocessing/document_loader.py,sha256=Ry0KG8F6kNPAnaoKRqsGX1Oq_ukt6ZmI8fYgj_0RnvU,6342
|
|
@@ -1812,7 +1814,7 @@ mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_tool.py,sha256=CDi2v2Ym3u-
|
|
|
1812
1814
|
mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py,sha256=H04cXtTmJsgvk5xrHhtAKXn0y2JR1dwNC8y84UuQAik,5687
|
|
1813
1815
|
mindsdb/interfaces/storage/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
1814
1816
|
mindsdb/interfaces/storage/db.py,sha256=NOVpYVEbSJtjEWKoaKS7Zu-Ji-Z8dh5rnMjK5Kf1__E,19340
|
|
1815
|
-
mindsdb/interfaces/storage/fs.py,sha256=
|
|
1817
|
+
mindsdb/interfaces/storage/fs.py,sha256=4Nyo-h23UtZc2nz_LWyVzboC_e1jlU58aph1_en8MdE,21155
|
|
1816
1818
|
mindsdb/interfaces/storage/json.py,sha256=KdrmXfqVCNZ_anNpfyygcFQeywbdJMCMbaI3HFJic-U,2925
|
|
1817
1819
|
mindsdb/interfaces/storage/model_fs.py,sha256=LQHyIs3wlOEpFHceAjziA7zuQKY3N-8gt9EuSYHO8zI,8267
|
|
1818
1820
|
mindsdb/interfaces/tabs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -1900,6 +1902,7 @@ mindsdb/utilities/otel.py,sha256=0ybLBt8kFqWj_4TXEsHumywlQ2zYUcRX3jb8OIsOanY,314
|
|
|
1900
1902
|
mindsdb/utilities/ps.py,sha256=vsY7119OJGYd_n1FXT_FuMTfUL3dVr3WiTRyASaGD00,2339
|
|
1901
1903
|
mindsdb/utilities/security.py,sha256=Mdj3c9Y2BFiEmwKY7J-yrbYdQ6oMgWENPE1XIu4tidk,1506
|
|
1902
1904
|
mindsdb/utilities/sentry.py,sha256=PMI55LbYvCi8NLmI3QgCNL1M8bymVr8J4JBTywAl1WE,2420
|
|
1905
|
+
mindsdb/utilities/starters.py,sha256=PknV9T2RdnEN7ekgt0ru_PYYT20NlFEPz3LrH8kws2w,818
|
|
1903
1906
|
mindsdb/utilities/telemetry.py,sha256=E1RAdG3g4BwUuD5rx6MpFVP1J1gEd6O4AzHTND6ql1A,1377
|
|
1904
1907
|
mindsdb/utilities/wizards.py,sha256=vlWb50BSmBomj4jMGVc-DABx88GGAaWWqZf8RxA6O-0,1708
|
|
1905
1908
|
mindsdb/utilities/hooks/__init__.py,sha256=HDPLuCxND4GUj5biGVfYeCmMZipMIyTG5WCOU3k654E,796
|
|
@@ -1915,8 +1918,8 @@ mindsdb/utilities/profiler/__init__.py,sha256=d4VXl80uSm1IotR-WwbBInPmLmACiK0Azx
|
|
|
1915
1918
|
mindsdb/utilities/profiler/profiler.py,sha256=KCUtOupkbM_nCoof9MtiuhUzDGezx4a4NsBX6vGWbPA,3936
|
|
1916
1919
|
mindsdb/utilities/render/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1917
1920
|
mindsdb/utilities/render/sqlalchemy_render.py,sha256=ot4I-2OV81f7P5XohbFjIb7PluQ5uHPREY7ci8TjBoI,28072
|
|
1918
|
-
MindsDB-
|
|
1919
|
-
MindsDB-
|
|
1920
|
-
MindsDB-
|
|
1921
|
-
MindsDB-
|
|
1922
|
-
MindsDB-
|
|
1921
|
+
MindsDB-25.1.2.1.dist-info/LICENSE,sha256=ziqdjujs6WDn-9g3t0SISjHCBc2pLRht3gnRbQoXmIs,5804
|
|
1922
|
+
MindsDB-25.1.2.1.dist-info/METADATA,sha256=ARNb_YqJfCA1cqWDbN0TPrUQkBoSVYBeRO12Ibrlyxs,43066
|
|
1923
|
+
MindsDB-25.1.2.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
1924
|
+
MindsDB-25.1.2.1.dist-info/top_level.txt,sha256=10wPR96JDf3hM8aMP7Fz0lDlmClEP480zgXISJKr5jE,8
|
|
1925
|
+
MindsDB-25.1.2.1.dist-info/RECORD,,
|
mindsdb/__about__.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
__title__ = 'MindsDB'
|
|
2
2
|
__package_name__ = 'mindsdb'
|
|
3
|
-
__version__ = '
|
|
3
|
+
__version__ = '25.1.2.1'
|
|
4
4
|
__description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks"
|
|
5
5
|
__email__ = "jorge@mindsdb.com"
|
|
6
6
|
__author__ = 'MindsDB Inc'
|
mindsdb/__main__.py
CHANGED
|
@@ -22,13 +22,9 @@ logger.debug("Starting MindsDB...")
|
|
|
22
22
|
|
|
23
23
|
from mindsdb.__about__ import __version__ as mindsdb_version
|
|
24
24
|
from mindsdb.utilities.config import config
|
|
25
|
-
from mindsdb.
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
from mindsdb.api.postgres.start import start as start_postgres
|
|
29
|
-
from mindsdb.interfaces.tasks.task_monitor import start as start_tasks
|
|
30
|
-
from mindsdb.utilities.ml_task_queue.consumer import start as start_ml_task_queue
|
|
31
|
-
from mindsdb.interfaces.jobs.scheduler import start as start_scheduler
|
|
25
|
+
from mindsdb.utilities.starters import (
|
|
26
|
+
start_http, start_mysql, start_mongo, start_postgres, start_ml_task_queue, start_scheduler, start_tasks
|
|
27
|
+
)
|
|
32
28
|
from mindsdb.utilities.ps import is_pid_listen_port, get_child_pids
|
|
33
29
|
from mindsdb.utilities.functions import get_versions_where_predictors_become_obsolete
|
|
34
30
|
from mindsdb.interfaces.database.integrations import integration_controller
|
|
@@ -46,6 +46,7 @@ class GithubIssuesTable(APIResource):
|
|
|
46
46
|
if col.column in ('created', 'updated', 'comments'):
|
|
47
47
|
issues_kwargs['sort'] = col.column
|
|
48
48
|
issues_kwargs['direction'] = 'asc' if col.ascending else 'desc'
|
|
49
|
+
sort.applied = True
|
|
49
50
|
|
|
50
51
|
# supported only 1 column
|
|
51
52
|
break
|
|
@@ -274,6 +275,7 @@ class GithubPullRequestsTable(APIResource):
|
|
|
274
275
|
if col.column in ('created', 'updated', 'popularity'):
|
|
275
276
|
issues_kwargs['sort'] = col.column
|
|
276
277
|
issues_kwargs['direction'] = 'asc' if col.ascending else 'desc'
|
|
278
|
+
sort.applied = True
|
|
277
279
|
|
|
278
280
|
# supported only 1 column
|
|
279
281
|
break
|
|
@@ -418,6 +420,7 @@ class GithubCommitsTable(APIResource):
|
|
|
418
420
|
if col.column in ("author", "date", "message"):
|
|
419
421
|
commits_kwargs['sort'] = col.column
|
|
420
422
|
commits_kwargs['direction'] = 'asc' if col.ascending else 'desc'
|
|
423
|
+
sort.applied = True
|
|
421
424
|
|
|
422
425
|
# supported only 1 column
|
|
423
426
|
break
|
|
@@ -283,7 +283,7 @@ class PgVectorHandler(VectorStoreHandler, PostgresHandler):
|
|
|
283
283
|
# See https://docs.pgvecto.rs/use-case/hybrid-search.html#advanced-search-merge-the-results-of-full-text-search-and-vector-search.
|
|
284
284
|
#
|
|
285
285
|
# We can break down the below query as follows:
|
|
286
|
-
#
|
|
286
|
+
#
|
|
287
287
|
# Start with a CTE (Common Table Expression) called semantic_search (https://www.postgresql.org/docs/current/queries-with.html).
|
|
288
288
|
# This expression calculates rank by the defined distance function, which measures the distance between the
|
|
289
289
|
# embeddings column and the given embeddings vector. Results are ordered by this rank.
|
|
@@ -339,13 +339,16 @@ class PgVectorHandler(VectorStoreHandler, PostgresHandler):
|
|
|
339
339
|
full_search_query = f'{semantic_search_cte}{full_text_search_cte}{hybrid_select}'
|
|
340
340
|
return self.raw_query(full_search_query)
|
|
341
341
|
|
|
342
|
-
def create_table(self, table_name: str, if_not_exists=True):
|
|
342
|
+
def create_table(self, table_name: str, sparse=False, if_not_exists=True):
|
|
343
343
|
"""
|
|
344
344
|
Run a create table query on the pgvector database.
|
|
345
345
|
"""
|
|
346
346
|
table_name = self._check_table(table_name)
|
|
347
347
|
|
|
348
348
|
query = f"CREATE TABLE IF NOT EXISTS {table_name} (id text PRIMARY KEY, content text, embeddings vector, metadata jsonb)"
|
|
349
|
+
if sparse:
|
|
350
|
+
query = f"CREATE TABLE IF NOT EXISTS {table_name} (id text PRIMARY KEY, content text, embeddings sparsevec, metadata jsonb)"
|
|
351
|
+
|
|
349
352
|
self.raw_query(query)
|
|
350
353
|
|
|
351
354
|
def insert(
|
|
@@ -6,7 +6,7 @@ from mindsdb_sql_parser.ast import ASTNode, Select, Insert, Update, Delete, Star
|
|
|
6
6
|
from mindsdb_sql_parser.ast.select.identifier import Identifier
|
|
7
7
|
|
|
8
8
|
from mindsdb.integrations.utilities.sql_utils import (
|
|
9
|
-
extract_comparison_conditions, filter_dataframe,
|
|
9
|
+
extract_comparison_conditions, filter_dataframe, sort_dataframe,
|
|
10
10
|
FilterCondition, FilterOperator, SortColumn
|
|
11
11
|
)
|
|
12
12
|
from mindsdb.integrations.libs.base import BaseHandler
|
|
@@ -207,6 +207,14 @@ class APIResource(APITable):
|
|
|
207
207
|
|
|
208
208
|
result = filter_dataframe(result, filters)
|
|
209
209
|
|
|
210
|
+
if sort:
|
|
211
|
+
sort_columns = []
|
|
212
|
+
for idx, a_sort in enumerate(sort):
|
|
213
|
+
if not a_sort.applied:
|
|
214
|
+
sort_columns.append(query.order_by[idx])
|
|
215
|
+
|
|
216
|
+
result = sort_dataframe(result, sort_columns)
|
|
217
|
+
|
|
210
218
|
if limit is not None and len(result) > limit:
|
|
211
219
|
result = result[:int(limit)]
|
|
212
220
|
|
|
@@ -227,12 +227,23 @@ class LangChainRAGPipeline:
|
|
|
227
227
|
'provider': retriever_config.llm_config.provider,
|
|
228
228
|
**retriever_config.llm_config.params
|
|
229
229
|
})
|
|
230
|
+
vector_store_operator = VectorStoreOperator(
|
|
231
|
+
vector_store=config.vector_store,
|
|
232
|
+
documents=config.documents,
|
|
233
|
+
embedding_model=config.embedding_model,
|
|
234
|
+
vector_store_config=config.vector_store_config
|
|
235
|
+
)
|
|
236
|
+
vector_store_retriever = vector_store_operator.vector_store.as_retriever()
|
|
237
|
+
vector_store_retriever = cls._apply_search_kwargs(vector_store_retriever, config.search_kwargs, config.search_type)
|
|
230
238
|
retriever = SQLRetriever(
|
|
239
|
+
fallback_retriever=vector_store_retriever,
|
|
231
240
|
vector_store_handler=knowledge_base_table.get_vector_db(),
|
|
232
241
|
metadata_schemas=retriever_config.metadata_schemas,
|
|
233
242
|
examples=retriever_config.examples,
|
|
234
243
|
embeddings_model=embeddings,
|
|
235
244
|
rewrite_prompt_template=retriever_config.rewrite_prompt_template,
|
|
245
|
+
retry_prompt_template=retriever_config.query_retry_template,
|
|
246
|
+
num_retries=retriever_config.num_retries,
|
|
236
247
|
sql_prompt_template=retriever_config.sql_prompt_template,
|
|
237
248
|
query_checker_template=retriever_config.query_checker_template,
|
|
238
249
|
embeddings_table=knowledge_base_table._kb.vector_database_table,
|
|
@@ -7,6 +7,7 @@ from mindsdb.integrations.utilities.rag.settings import (
|
|
|
7
7
|
RAGPipelineModel
|
|
8
8
|
)
|
|
9
9
|
from mindsdb.integrations.utilities.rag.utils import documents_to_df
|
|
10
|
+
from mindsdb.integrations.utilities.rag.retrievers.multi_hop_retriever import MultiHopRetriever
|
|
10
11
|
from mindsdb.utilities.log import getLogger
|
|
11
12
|
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
|
12
13
|
|
|
@@ -16,7 +17,8 @@ _retriever_strategies = {
|
|
|
16
17
|
RetrieverType.VECTOR_STORE: lambda config: _create_pipeline_from_vector_store(config),
|
|
17
18
|
RetrieverType.AUTO: lambda config: _create_pipeline_from_auto_retriever(config),
|
|
18
19
|
RetrieverType.MULTI: lambda config: _create_pipeline_from_multi_retriever(config),
|
|
19
|
-
RetrieverType.SQL: lambda config: _create_pipeline_from_sql_retriever(config)
|
|
20
|
+
RetrieverType.SQL: lambda config: _create_pipeline_from_sql_retriever(config),
|
|
21
|
+
RetrieverType.MULTI_HOP: lambda config: _create_pipeline_from_multi_hop_retriever(config)
|
|
20
22
|
}
|
|
21
23
|
|
|
22
24
|
|
|
@@ -53,6 +55,19 @@ def _create_pipeline_from_sql_retriever(config: RAGPipelineModel) -> LangChainRA
|
|
|
53
55
|
)
|
|
54
56
|
|
|
55
57
|
|
|
58
|
+
def _create_pipeline_from_multi_hop_retriever(config: RAGPipelineModel) -> LangChainRAGPipeline:
|
|
59
|
+
retriever = MultiHopRetriever.from_config(config)
|
|
60
|
+
return LangChainRAGPipeline(
|
|
61
|
+
retriever_runnable=retriever,
|
|
62
|
+
prompt_template=config.rag_prompt_template,
|
|
63
|
+
llm=config.llm,
|
|
64
|
+
reranker_config=config.reranker_config,
|
|
65
|
+
reranker=config.reranker,
|
|
66
|
+
vector_store_config=config.vector_store_config,
|
|
67
|
+
summarization_config=config.summarization_config
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
56
71
|
def _process_documents_to_df(config: RAGPipelineModel) -> pd.DataFrame:
|
|
57
72
|
return documents_to_df(config.content_column_name,
|
|
58
73
|
config.documents,
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
from typing import List, Optional
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from langchain_core.callbacks.manager import CallbackManagerForRetrieverRun
|
|
5
|
+
from langchain_core.documents import Document
|
|
6
|
+
from langchain_core.language_models import BaseChatModel
|
|
7
|
+
from langchain_core.retrievers import BaseRetriever
|
|
8
|
+
from pydantic import Field, PrivateAttr
|
|
9
|
+
|
|
10
|
+
from mindsdb.integrations.utilities.rag.settings import (
|
|
11
|
+
RAGPipelineModel,
|
|
12
|
+
DEFAULT_QUESTION_REFORMULATION_TEMPLATE
|
|
13
|
+
)
|
|
14
|
+
from mindsdb.integrations.utilities.rag.retrievers.retriever_factory import create_retriever
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class MultiHopRetriever(BaseRetriever):
|
|
18
|
+
"""A retriever that implements multi-hop question reformulation strategy.
|
|
19
|
+
|
|
20
|
+
This retriever takes a base retriever and uses an LLM to generate follow-up
|
|
21
|
+
questions based on the initial results. It then retrieves documents for each
|
|
22
|
+
follow-up question and combines all results.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
base_retriever: BaseRetriever = Field(description="Base retriever to use for document lookup")
|
|
26
|
+
llm: BaseChatModel = Field(description="LLM to use for generating follow-up questions")
|
|
27
|
+
max_hops: int = Field(default=3, description="Maximum number of follow-up questions to generate")
|
|
28
|
+
reformulation_template: str = Field(
|
|
29
|
+
default=DEFAULT_QUESTION_REFORMULATION_TEMPLATE,
|
|
30
|
+
description="Template for reformulating questions"
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
_asked_questions: set = PrivateAttr(default_factory=set)
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def from_config(cls, config: RAGPipelineModel) -> "MultiHopRetriever":
|
|
37
|
+
"""Create a MultiHopRetriever from a RAGPipelineModel config."""
|
|
38
|
+
if config.multi_hop_config is None:
|
|
39
|
+
raise ValueError("multi_hop_config must be set for MultiHopRetriever")
|
|
40
|
+
|
|
41
|
+
# Create base retriever based on type
|
|
42
|
+
base_retriever = create_retriever(config, config.multi_hop_config.base_retriever_type)
|
|
43
|
+
|
|
44
|
+
return cls(
|
|
45
|
+
base_retriever=base_retriever,
|
|
46
|
+
llm=config.llm,
|
|
47
|
+
max_hops=config.multi_hop_config.max_hops,
|
|
48
|
+
reformulation_template=config.multi_hop_config.reformulation_template
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
def _get_relevant_documents(
|
|
52
|
+
self, query: str, *, run_manager: Optional[CallbackManagerForRetrieverRun] = None
|
|
53
|
+
) -> List[Document]:
|
|
54
|
+
"""Get relevant documents using multi-hop retrieval."""
|
|
55
|
+
if query in self._asked_questions:
|
|
56
|
+
return []
|
|
57
|
+
|
|
58
|
+
self._asked_questions.add(query)
|
|
59
|
+
|
|
60
|
+
# Get initial documents
|
|
61
|
+
docs = self.base_retriever._get_relevant_documents(query)
|
|
62
|
+
if not docs or len(self._asked_questions) >= self.max_hops:
|
|
63
|
+
return docs
|
|
64
|
+
|
|
65
|
+
# Generate follow-up questions
|
|
66
|
+
context = "\n".join(doc.page_content for doc in docs)
|
|
67
|
+
prompt = self.reformulation_template.format(
|
|
68
|
+
question=query,
|
|
69
|
+
context=context
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
try:
|
|
73
|
+
follow_up_questions = json.loads(self.llm.invoke(prompt))
|
|
74
|
+
if not isinstance(follow_up_questions, list):
|
|
75
|
+
return docs
|
|
76
|
+
except (json.JSONDecodeError, TypeError):
|
|
77
|
+
return docs
|
|
78
|
+
|
|
79
|
+
# Get documents for follow-up questions
|
|
80
|
+
for question in follow_up_questions:
|
|
81
|
+
if isinstance(question, str):
|
|
82
|
+
follow_up_docs = self._get_relevant_documents(question)
|
|
83
|
+
docs.extend(follow_up_docs)
|
|
84
|
+
|
|
85
|
+
return docs
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""Factory functions for creating retrievers."""
|
|
2
|
+
|
|
3
|
+
from mindsdb.integrations.utilities.rag.settings import RAGPipelineModel, RetrieverType
|
|
4
|
+
from mindsdb.integrations.utilities.rag.vector_store import VectorStoreOperator
|
|
5
|
+
from mindsdb.integrations.utilities.rag.retrievers.auto_retriever import AutoRetriever
|
|
6
|
+
from mindsdb.integrations.utilities.rag.retrievers.sql_retriever import SQLRetriever
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def create_vector_store_retriever(config: RAGPipelineModel):
|
|
10
|
+
"""Create a vector store retriever."""
|
|
11
|
+
if getattr(config.vector_store, '_mock_return_value', None) is not None:
|
|
12
|
+
# If vector_store is mocked, return a simple mock retriever for testing
|
|
13
|
+
from unittest.mock import MagicMock
|
|
14
|
+
mock_retriever = MagicMock()
|
|
15
|
+
mock_retriever._get_relevant_documents.return_value = [
|
|
16
|
+
{"page_content": "The Wright brothers invented the airplane."}
|
|
17
|
+
]
|
|
18
|
+
return mock_retriever
|
|
19
|
+
|
|
20
|
+
vector_store_operator = VectorStoreOperator(
|
|
21
|
+
vector_store=config.vector_store,
|
|
22
|
+
documents=config.documents,
|
|
23
|
+
embedding_model=config.embedding_model,
|
|
24
|
+
vector_store_config=config.vector_store_config
|
|
25
|
+
)
|
|
26
|
+
return vector_store_operator.vector_store.as_retriever()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def create_auto_retriever(config: RAGPipelineModel):
|
|
30
|
+
"""Create an auto retriever."""
|
|
31
|
+
return AutoRetriever(
|
|
32
|
+
vector_store=config.vector_store,
|
|
33
|
+
documents=config.documents,
|
|
34
|
+
embedding_model=config.embedding_model
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def create_sql_retriever(config: RAGPipelineModel):
|
|
39
|
+
"""Create a SQL retriever."""
|
|
40
|
+
return SQLRetriever(
|
|
41
|
+
sql_source=config.sql_source,
|
|
42
|
+
llm=config.llm
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def create_retriever(config: RAGPipelineModel, retriever_type: RetrieverType = None):
|
|
47
|
+
"""Create a retriever based on type."""
|
|
48
|
+
retriever_type = retriever_type or config.retriever_type
|
|
49
|
+
|
|
50
|
+
if retriever_type == RetrieverType.VECTOR_STORE:
|
|
51
|
+
return create_vector_store_retriever(config)
|
|
52
|
+
elif retriever_type == RetrieverType.AUTO:
|
|
53
|
+
return create_auto_retriever(config)
|
|
54
|
+
elif retriever_type == RetrieverType.SQL:
|
|
55
|
+
return create_sql_retriever(config)
|
|
56
|
+
else:
|
|
57
|
+
raise ValueError(f"Unsupported retriever type: {retriever_type}")
|
|
@@ -12,6 +12,9 @@ from langchain_core.retrievers import BaseRetriever
|
|
|
12
12
|
from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
|
|
13
13
|
from mindsdb.integrations.libs.vectordatabase_handler import DistanceFunction, VectorStoreHandler
|
|
14
14
|
from mindsdb.integrations.utilities.rag.settings import LLMExample, MetadataSchema, SearchKwargs
|
|
15
|
+
from mindsdb.utilities import log
|
|
16
|
+
|
|
17
|
+
logger = log.getLogger(__name__)
|
|
15
18
|
|
|
16
19
|
|
|
17
20
|
class SQLRetriever(BaseRetriever):
|
|
@@ -29,12 +32,15 @@ class SQLRetriever(BaseRetriever):
|
|
|
29
32
|
|
|
30
33
|
4. Actually execute the query against our vector database to retrieve documents & return them.
|
|
31
34
|
'''
|
|
35
|
+
fallback_retriever: BaseRetriever
|
|
32
36
|
vector_store_handler: VectorStoreHandler
|
|
33
37
|
metadata_schemas: Optional[List[MetadataSchema]] = None
|
|
34
38
|
examples: Optional[List[LLMExample]] = None
|
|
35
39
|
|
|
36
40
|
embeddings_model: Embeddings
|
|
37
41
|
rewrite_prompt_template: str
|
|
42
|
+
retry_prompt_template: str
|
|
43
|
+
num_retries: int
|
|
38
44
|
sql_prompt_template: str
|
|
39
45
|
query_checker_template: str
|
|
40
46
|
embeddings_table: str
|
|
@@ -120,6 +126,25 @@ Output:
|
|
|
120
126
|
query=sql_query
|
|
121
127
|
)
|
|
122
128
|
|
|
129
|
+
def _prepare_retry_query(self, query: str, error: str, run_manager: CallbackManagerForRetrieverRun) -> str:
|
|
130
|
+
sql_prompt = self._prepare_sql_prompt()
|
|
131
|
+
# Use provided schema as context for retrying failed queries.
|
|
132
|
+
schema = sql_prompt.partial_variables.get('schema', '')
|
|
133
|
+
retry_prompt = PromptTemplate(
|
|
134
|
+
input_variables=['query', 'dialect', 'error', 'embeddings_table', 'schema'],
|
|
135
|
+
template=self.retry_prompt_template
|
|
136
|
+
)
|
|
137
|
+
retry_chain = LLMChain(llm=self.llm, prompt=retry_prompt)
|
|
138
|
+
# Generate rewritten query.
|
|
139
|
+
return retry_chain.predict(
|
|
140
|
+
query=query,
|
|
141
|
+
dialect='postgres',
|
|
142
|
+
error=error,
|
|
143
|
+
embeddings_table=self.embeddings_table,
|
|
144
|
+
schema=schema,
|
|
145
|
+
callbacks=run_manager.get_child() if run_manager else None
|
|
146
|
+
)
|
|
147
|
+
|
|
123
148
|
def _get_relevant_documents(
|
|
124
149
|
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
|
125
150
|
) -> List[Document]:
|
|
@@ -137,8 +162,22 @@ Output:
|
|
|
137
162
|
checked_sql_query_with_embeddings = checked_sql_query_with_embeddings.replace('```', '')
|
|
138
163
|
# Actually execute the similarity search with metadata filters.
|
|
139
164
|
document_response = self.vector_store_handler.native_query(checked_sql_query_with_embeddings)
|
|
140
|
-
|
|
141
|
-
|
|
165
|
+
num_retries = 0
|
|
166
|
+
while document_response.resp_type == RESPONSE_TYPE.ERROR:
|
|
167
|
+
error_msg = document_response.error_message
|
|
168
|
+
# LLMs won't always generate a working SQL query so we should have a fallback after retrying.
|
|
169
|
+
logger.info(f'SQL Retriever query {checked_sql_query} failed with error {error_msg}')
|
|
170
|
+
if num_retries >= self.num_retries:
|
|
171
|
+
logger.info('Using fallback retriever in SQL retriever.')
|
|
172
|
+
return self.fallback_retriever._get_relevant_documents(retrieval_query, run_manager)
|
|
173
|
+
query_to_retry = self._prepare_retry_query(checked_sql_query, error_msg, run_manager)
|
|
174
|
+
query_to_retry_with_embeddings = query_to_retry.format(embeddings=str(embedded_query))
|
|
175
|
+
# Handle LLM output that has the ```sql delimiter possibly.
|
|
176
|
+
query_to_retry_with_embeddings = query_to_retry_with_embeddings.replace('```sql', '')
|
|
177
|
+
query_to_retry_with_embeddings = query_to_retry_with_embeddings.replace('```', '')
|
|
178
|
+
document_response = self.vector_store_handler.native_query(query_to_retry_with_embeddings)
|
|
179
|
+
num_retries += 1
|
|
180
|
+
|
|
142
181
|
document_df = document_response.data_frame
|
|
143
182
|
retrieved_documents = []
|
|
144
183
|
for _, document_row in document_df.iterrows():
|