langflow-base-nightly 0.5.0.dev30__py3-none-any.whl → 0.5.0.dev32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. langflow/__main__.py +130 -30
  2. langflow/api/router.py +2 -0
  3. langflow/api/v1/__init__.py +2 -0
  4. langflow/api/v1/knowledge_bases.py +437 -0
  5. langflow/base/data/kb_utils.py +104 -0
  6. langflow/components/data/__init__.py +4 -0
  7. langflow/components/data/kb_ingest.py +585 -0
  8. langflow/components/data/kb_retrieval.py +254 -0
  9. langflow/components/processing/save_file.py +1 -1
  10. langflow/frontend/assets/{SlackIcon-D2PxMQjX.js → SlackIcon-Bikuxo8x.js} +1 -1
  11. langflow/frontend/assets/{Wikipedia-BNM0lBPs.js → Wikipedia-B6aCFf5-.js} +1 -1
  12. langflow/frontend/assets/{Wolfram-COQyGyeC.js → Wolfram-CekL_M-a.js} +1 -1
  13. langflow/frontend/assets/{index-CTpfN0Cy.js → index-09CVJwsY.js} +1 -1
  14. langflow/frontend/assets/{index-DWUG3nTC.js → index-1MEYR1La.js} +1 -1
  15. langflow/frontend/assets/{index-Ds9y6kEK.js → index-2vQdFIK_.js} +1 -1
  16. langflow/frontend/assets/{index-DRdKSzTn.js → index-4Tl3Nxdo.js} +1 -1
  17. langflow/frontend/assets/{index-O_vPh7iD.js → index-5G402gB8.js} +1 -1
  18. langflow/frontend/assets/{index-D15h4ir2.js → index-5hW8VleF.js} +1 -1
  19. langflow/frontend/assets/{index-BydnMWnM.js → index-6GWpsedd.js} +1 -1
  20. langflow/frontend/assets/{index-4vIU43o6.js → index-7x3wNZ-4.js} +1 -1
  21. langflow/frontend/assets/{index-DrFpyu9Z.js → index-9gkURvG2.js} +1 -1
  22. langflow/frontend/assets/{index-DRe5h2N_.js → index-AOX7bbjJ.js} +1 -1
  23. langflow/frontend/assets/{index-fJyq3ZWN.js → index-B20KmxhS.js} +1 -1
  24. langflow/frontend/assets/{index-D_sHnnuS.js → index-B2EmwqKj.js} +1 -1
  25. langflow/frontend/assets/{index-DEc_2ba8.js → index-B4AtFbkN.js} +1 -1
  26. langflow/frontend/assets/{index-D_zQiboE.js → index-B4xLpgbM.js} +1 -1
  27. langflow/frontend/assets/{index-Db8Xgs-K.js → index-B9KRIJFi.js} +1 -1
  28. langflow/frontend/assets/{index-BzCZNz2f.js → index-B9uOBe6Y.js} +1 -1
  29. langflow/frontend/assets/{index-pFTvwRsJ.js → index-BDmbsLY2.js} +1 -1
  30. langflow/frontend/assets/{index-CGef2axA.js → index-BIKbxmIh.js} +1 -1
  31. langflow/frontend/assets/{index-BTl_mLju.js → index-BIjUtp6d.js} +1 -1
  32. langflow/frontend/assets/{index-Jze67eTW.js → index-BJIsQS8D.js} +1 -1
  33. langflow/frontend/assets/{index-DV-gdr7l.js → index-BO4fl1uU.js} +1 -1
  34. langflow/frontend/assets/{index-BUVmswbg.js → index-BRE8A4Q_.js} +1 -1
  35. langflow/frontend/assets/{index-CTzWsu8S.js → index-BRNhftot.js} +1 -1
  36. langflow/frontend/assets/{index-DFYBo38q.js → index-BRizlHaN.js} +1 -1
  37. langflow/frontend/assets/{index-DbPP5vss.js → index-BRwkzs92.js} +1 -1
  38. langflow/frontend/assets/{index-BzE7oL1n.js → index-BZCt_UnJ.js} +1 -1
  39. langflow/frontend/assets/{index-BhRSkpxu.js → index-B_ytx_iA.js} +1 -1
  40. langflow/frontend/assets/{index-ByCunkn4.js → index-BcqeL_f4.js} +1 -1
  41. langflow/frontend/assets/{index-CAAZbdRp.js → index-Bgd7yLoW.js} +1 -1
  42. langflow/frontend/assets/{index-DpDbxNdQ.js → index-BlRTHXW5.js} +1 -1
  43. langflow/frontend/assets/{index-jXSPQ_JS.js → index-BllNr21U.js} +1 -1
  44. langflow/frontend/assets/{index-fpMcQS2L.js → index-Bm7a2vMS.js} +1 -1
  45. langflow/frontend/assets/{index-BFQzmLDT.js → index-Bn4HAVDG.js} +1 -1
  46. langflow/frontend/assets/{index-D8EpAMC3.js → index-BwlYjc56.js} +1 -1
  47. langflow/frontend/assets/{index-BcCN9mpu.js → index-BzCjyHto.js} +1 -1
  48. langflow/frontend/assets/{index-D6-jZ4sc.js → index-C3RZz8WE.js} +1 -1
  49. langflow/frontend/assets/{index-D66JmFlL.js → index-C69gdJqw.js} +1 -1
  50. langflow/frontend/assets/{index-pYD0BTGu.js → index-C6P0vvSP.js} +1 -1
  51. langflow/frontend/assets/{index-CIjw_ZkP.js → index-C7wDSVVH.js} +1 -1
  52. langflow/frontend/assets/{index-BCTEK38J.js → index-CAzSTGAM.js} +1 -1
  53. langflow/frontend/assets/{index-8FjgS_Vj.js → index-CEn_71Wk.js} +1 -1
  54. langflow/frontend/assets/{index-BFiCUM5l.js → index-CGVDXKtN.js} +1 -1
  55. langflow/frontend/assets/{index-BIH2K0v8.js → index-CIYzjH2y.js} +1 -1
  56. langflow/frontend/assets/{index-gM8j2Wvk.js → index-COqjpsdy.js} +1 -1
  57. langflow/frontend/assets/{index-2q8IFBNP.js → index-CP0tFKwN.js} +1 -1
  58. langflow/frontend/assets/{index-CXpZa4H9.js → index-CPIdMJkX.js} +1 -1
  59. langflow/frontend/assets/{index-B-YjnRWx.js → index-CSRizl2S.js} +1 -1
  60. langflow/frontend/assets/{index-DFo0yfS5.js → index-CUe1ivTn.js} +1 -1
  61. langflow/frontend/assets/{index-C2x5hzgY.js → index-CVphnxXi.js} +1 -1
  62. langflow/frontend/assets/{index-Bz3QnhLZ.js → index-CY6LUi4V.js} +1 -1
  63. langflow/frontend/assets/{index-Cq6gk34q.js → index-C_2G2ZqJ.js} +1 -1
  64. langflow/frontend/assets/{index-CSXUVElo.js → index-C_K6Tof7.js} +1 -1
  65. langflow/frontend/assets/{index-1D7jZ8vz.js → index-C_UkF-RJ.js} +1 -1
  66. langflow/frontend/assets/{index-BVGZcHHC.js → index-Cbwk3f-p.js} +1 -1
  67. langflow/frontend/assets/{index-kiqvo0Zi.js → index-CdwjD4IX.js} +1 -1
  68. langflow/frontend/assets/{index-BNy3Al2s.js → index-CgbINWS8.js} +1 -1
  69. langflow/frontend/assets/{index-BXJpd9hg.js → index-CglSqvB5.js} +1 -1
  70. langflow/frontend/assets/{index-D9CF_54p.js → index-CmiRgF_-.js} +1 -1
  71. langflow/frontend/assets/{index-ez1EW657.js → index-Cp7Pmn03.js} +1 -1
  72. langflow/frontend/assets/{index-aypzjPzG.js → index-Cq30cQcP.js} +1 -1
  73. langflow/frontend/assets/index-CqS7zir1.css +1 -0
  74. langflow/frontend/assets/{index-DKv0y9Dp.js → index-Cr2oy5K2.js} +1 -1
  75. langflow/frontend/assets/{index-DrfwVxtD.js → index-Crq_yhkG.js} +1 -1
  76. langflow/frontend/assets/{index-CzJzRS6i.js → index-Cs_jt3dj.js} +1 -1
  77. langflow/frontend/assets/{index-DO0mS8FQ.js → index-Cy-ZEfWh.js} +1 -1
  78. langflow/frontend/assets/{index-Q0bwuTZY.js → index-Cyk3aCmP.js} +1 -1
  79. langflow/frontend/assets/{index-DToZROdu.js → index-D-HTZ68O.js} +1 -1
  80. langflow/frontend/assets/{index-C0AEZF1v.js → index-D1RgjMON.js} +1 -1
  81. langflow/frontend/assets/{index-DilRRF2S.js → index-D29n5mus.js} +1 -1
  82. langflow/frontend/assets/{index-CKLOrtrx.js → index-D2nHdRne.js} +1 -1
  83. langflow/frontend/assets/{index-sfFDGjjd.js → index-D7Vx6mgS.js} +1 -1
  84. langflow/frontend/assets/{index-BAHhLqW9.js → index-D7nFs6oq.js} +1 -1
  85. langflow/frontend/assets/{index-C7jY4x98.js → index-DAJafn16.js} +1 -1
  86. langflow/frontend/assets/{index-BefwTGbP.js → index-DDcpxWU4.js} +1 -1
  87. langflow/frontend/assets/{index-CTZ9iXFr.js → index-DEuXrfXH.js} +1 -1
  88. langflow/frontend/assets/{index-DFfr0xSt.js → index-DF0oWRdd.js} +1 -1
  89. langflow/frontend/assets/{index-Bh5pQAZC.js → index-DI0zAExi.js} +1 -1
  90. langflow/frontend/assets/{index-CG-Suo0F.js → index-DJs6FoYC.js} +1 -1
  91. langflow/frontend/assets/{index-dvTTQhKz.js → index-DNS4La1f.js} +1 -1
  92. langflow/frontend/assets/{index-nLDaeeZg.js → index-DOI0ceS-.js} +1 -1
  93. langflow/frontend/assets/{index-DakdEtbq.js → index-DOb9c2bf.js} +1 -1
  94. langflow/frontend/assets/{index-CEVnRp4_.js → index-DS4F_Phe.js} +1 -1
  95. langflow/frontend/assets/{index-DGRg2M1l.js → index-DTJX3yQa.js} +1 -1
  96. langflow/frontend/assets/{index-BjAsd-Vo.js → index-DVV_etfW.js} +1 -1
  97. langflow/frontend/assets/{index-BrIuZD2A.js → index-DX_InNVT.js} +1 -1
  98. langflow/frontend/assets/{index-jG-zLXRN.js → index-DbmqjLy6.js} +1 -1
  99. langflow/frontend/assets/{index-DSvOFGJR.js → index-Dc0p1Oxl.js} +1 -1
  100. langflow/frontend/assets/{index-87GFtXu5.js → index-DkJCCraf.js} +1 -1
  101. langflow/frontend/assets/{index-BXidWkLM.js → index-DlMAYATX.js} +1 -1
  102. langflow/frontend/assets/{index-sbTxhltT.js → index-DmaQAn3K.js} +1 -1
  103. langflow/frontend/assets/{index-DkC5vMvx.js → index-DmvjdU1N.js} +1 -1
  104. langflow/frontend/assets/{index-CSUglByd.js → index-DnusMCK1.js} +1 -1
  105. langflow/frontend/assets/{index-DZOTHXs0.js → index-DoFlaGDx.js} +1 -1
  106. langflow/frontend/assets/{index-CZkMjaa8.js → index-DqDQk0Cu.js} +1 -1
  107. langflow/frontend/assets/{index-lc10GnwG.js → index-DrvRK4_i.js} +1 -1
  108. langflow/frontend/assets/{index-BNm-yAYc.js → index-DtCsjX48.js} +1 -1
  109. langflow/frontend/assets/{index-BeLnhfG-.js → index-Dy7ehgeV.js} +1 -1
  110. langflow/frontend/assets/{index-RGG9hk9J.js → index-Dz0r9Idb.js} +1 -1
  111. langflow/frontend/assets/{index-Bcq2yA-p.js → index-DzDNhMMW.js} +1 -1
  112. langflow/frontend/assets/{index-P3f-GeAm.js → index-FYcoJPMP.js} +1 -1
  113. langflow/frontend/assets/{index-DQwvl_Rp.js → index-Iamzh9ZT.js} +1 -1
  114. langflow/frontend/assets/{index-Cy6n8tA9.js → index-J0pvFqLk.js} +1 -1
  115. langflow/frontend/assets/{index-D1XTMye3.js → index-J98sU-1p.js} +1 -1
  116. langflow/frontend/assets/{index-BZ0rL0tK.js → index-JHCxbvlW.js} +1 -1
  117. langflow/frontend/assets/{index-DmSH63k1.js → index-KnS52ylc.js} +1 -1
  118. langflow/frontend/assets/{index-WGZ88ShH.js → index-L7FKc9QN.js} +1 -1
  119. langflow/frontend/assets/{index-BIoFnUtx.js → index-RveG4dl9.js} +1 -1
  120. langflow/frontend/assets/{index-BDdkPrzu.js → index-T2jJOG85.js} +1 -1
  121. langflow/frontend/assets/{index-2839k6WO.js → index-TRyDa01A.js} +1 -1
  122. langflow/frontend/assets/{index-DvOdMz35.js → index-U7J1YiWE.js} +1 -1
  123. langflow/frontend/assets/{index-DzUx1-Bl.js → index-UI2ws3qp.js} +1984 -1984
  124. langflow/frontend/assets/{index-8Fx5I2fx.js → index-VO-pk-Hg.js} +1 -1
  125. langflow/frontend/assets/{index-e-RKmhti.js → index-_3qag0I4.js} +1 -1
  126. langflow/frontend/assets/{index-X67tRPXo.js → index-dfaj9-hY.js} +1 -1
  127. langflow/frontend/assets/{index-CHexGuNQ.js → index-eJwu5YEi.js} +1 -1
  128. langflow/frontend/assets/{index-Dz5YIK1W.js → index-in188l0A.js} +1 -1
  129. langflow/frontend/assets/{index-CTwkLLMr.js → index-pkOi9P45.js} +1 -1
  130. langflow/frontend/assets/{index-D6BaTmee.js → index-qXcoVIRo.js} +1 -1
  131. langflow/frontend/assets/{index-euS8RcNY.js → index-xVx59Op-.js} +1 -1
  132. langflow/frontend/assets/{index-C4WueQ4k.js → index-yIh6-LZT.js} +1 -1
  133. langflow/frontend/assets/lazyIconImports-kvf_Kak2.js +2 -0
  134. langflow/frontend/assets/{use-post-add-user-CA-_peAV.js → use-post-add-user-Bt6vZvvT.js} +1 -1
  135. langflow/frontend/index.html +2 -2
  136. langflow/initial_setup/starter_projects/Knowledge Ingestion.json +1052 -0
  137. langflow/initial_setup/starter_projects/Knowledge Retrieval.json +709 -0
  138. langflow/initial_setup/starter_projects/News Aggregator.json +4 -4
  139. langflow/services/database/models/user/crud.py +7 -0
  140. langflow/services/settings/auth.py +14 -1
  141. langflow/services/settings/base.py +3 -0
  142. langflow/services/utils.py +8 -3
  143. {langflow_base_nightly-0.5.0.dev30.dist-info → langflow_base_nightly-0.5.0.dev32.dist-info}/METADATA +2 -1
  144. {langflow_base_nightly-0.5.0.dev30.dist-info → langflow_base_nightly-0.5.0.dev32.dist-info}/RECORD +146 -140
  145. langflow/frontend/assets/index-DIcdzk44.css +0 -1
  146. langflow/frontend/assets/lazyIconImports-lnczjBhY.js +0 -2
  147. {langflow_base_nightly-0.5.0.dev30.dist-info → langflow_base_nightly-0.5.0.dev32.dist-info}/WHEEL +0 -0
  148. {langflow_base_nightly-0.5.0.dev30.dist-info → langflow_base_nightly-0.5.0.dev32.dist-info}/entry_points.txt +0 -0
langflow/__main__.py CHANGED
@@ -15,7 +15,9 @@ import click
15
15
  import httpx
16
16
  import typer
17
17
  from dotenv import load_dotenv
18
+ from fastapi import HTTPException
18
19
  from httpx import HTTPError
20
+ from jose import JWTError
19
21
  from multiprocess import cpu_count
20
22
  from multiprocess.context import Process
21
23
  from packaging import version as pkg_version
@@ -29,9 +31,9 @@ from langflow.cli.progress import create_langflow_progress
29
31
  from langflow.initial_setup.setup import get_or_create_default_folder
30
32
  from langflow.logging.logger import configure, logger
31
33
  from langflow.main import setup_app
32
- from langflow.services.database.utils import session_getter
34
+ from langflow.services.auth.utils import check_key, get_current_user_by_jwt
33
35
  from langflow.services.deps import get_db_service, get_settings_service, session_scope
34
- from langflow.services.settings.constants import DEFAULT_SUPERUSER
36
+ from langflow.services.settings.constants import DEFAULT_SUPERUSER, DEFAULT_SUPERUSER_PASSWORD
35
37
  from langflow.services.utils import initialize_services
36
38
  from langflow.utils.version import fetch_latest_version, get_version_info
37
39
  from langflow.utils.version import is_pre_release as langflow_is_pre_release
@@ -632,41 +634,138 @@ def print_banner(host: str, port: int, protocol: str) -> None:
632
634
 
633
635
  @app.command()
634
636
  def superuser(
635
- username: str = typer.Option(..., prompt=True, help="Username for the superuser."),
636
- password: str = typer.Option(..., prompt=True, hide_input=True, help="Password for the superuser."),
637
+ username: str = typer.Option(
638
+ None, help="Username for the superuser. Defaults to 'langflow' when AUTO_LOGIN is enabled."
639
+ ),
640
+ password: str = typer.Option(
641
+ None, help="Password for the superuser. Defaults to 'langflow' when AUTO_LOGIN is enabled."
642
+ ),
637
643
  log_level: str = typer.Option("error", help="Logging level.", envvar="LANGFLOW_LOG_LEVEL"),
644
+ auth_token: str = typer.Option(
645
+ None, help="Authentication token of existing superuser.", envvar="LANGFLOW_SUPERUSER_TOKEN"
646
+ ),
638
647
  ) -> None:
639
- """Create a superuser."""
648
+ """Create a superuser.
649
+
650
+ When AUTO_LOGIN is enabled, uses default credentials.
651
+ In production mode, requires authentication.
652
+ """
640
653
  configure(log_level=log_level)
641
- db_service = get_db_service()
642
654
 
643
- async def _create_superuser():
644
- await initialize_services()
645
- async with session_getter(db_service) as session:
646
- from langflow.services.auth.utils import create_super_user
647
-
648
- if await create_super_user(db=session, username=username, password=password):
649
- # Verify that the superuser was created
650
- from langflow.services.database.models.user.model import User
651
-
652
- stmt = select(User).where(User.username == username)
653
- user: User = (await session.exec(stmt)).first()
654
- if user is None or not user.is_superuser:
655
- typer.echo("Superuser creation failed.")
656
- return
657
- # Now create the first folder for the user
658
- result = await get_or_create_default_folder(session, user.id)
659
- if result:
660
- typer.echo("Default folder created successfully.")
661
- else:
662
- msg = "Could not create default folder."
663
- raise RuntimeError(msg)
664
- typer.echo("Superuser created successfully.")
655
+ asyncio.run(_create_superuser(username, password, auth_token))
665
656
 
666
- else:
657
+
658
+ async def _create_superuser(username: str, password: str, auth_token: str | None):
659
+ """Create a superuser."""
660
+ await initialize_services()
661
+
662
+ settings_service = get_settings_service()
663
+ # Check if superuser creation via CLI is enabled
664
+ if not settings_service.auth_settings.ENABLE_SUPERUSER_CLI:
665
+ typer.echo("Error: Superuser creation via CLI is disabled.")
666
+ typer.echo("Set LANGFLOW_ENABLE_SUPERUSER_CLI=true to enable this feature.")
667
+ raise typer.Exit(1)
668
+
669
+ if settings_service.auth_settings.AUTO_LOGIN:
670
+ # Force default credentials for AUTO_LOGIN mode
671
+ username = DEFAULT_SUPERUSER
672
+ password = DEFAULT_SUPERUSER_PASSWORD
673
+ else:
674
+ # Production mode - prompt for credentials if not provided
675
+ if not username:
676
+ username = typer.prompt("Username")
677
+ if not password:
678
+ password = typer.prompt("Password", hide_input=True)
679
+
680
+ from langflow.services.database.models.user.crud import get_all_superusers
681
+
682
+ existing_superusers = []
683
+ async with session_scope() as session:
684
+ # Note that the default superuser is created by the initialize_services() function,
685
+ # but leaving this check here in case we change that behavior
686
+ existing_superusers = await get_all_superusers(session)
687
+ is_first_setup = len(existing_superusers) == 0
688
+
689
+ # If AUTO_LOGIN is true, only allow default superuser creation
690
+ if settings_service.auth_settings.AUTO_LOGIN:
691
+ if not is_first_setup:
692
+ typer.echo("Error: Cannot create additional superusers when AUTO_LOGIN is enabled.")
693
+ typer.echo("AUTO_LOGIN mode is for development with only the default superuser.")
694
+ typer.echo("To create additional superusers:")
695
+ typer.echo("1. Set LANGFLOW_AUTO_LOGIN=false")
696
+ typer.echo("2. Run this command again with --auth-token")
697
+ raise typer.Exit(1)
698
+
699
+ typer.echo(f"AUTO_LOGIN enabled. Creating default superuser '{username}'...")
700
+ typer.echo(f"Note: Default credentials are {DEFAULT_SUPERUSER}/{DEFAULT_SUPERUSER_PASSWORD}")
701
+ # AUTO_LOGIN is false - production mode
702
+ elif is_first_setup:
703
+ typer.echo("No superusers found. Creating first superuser...")
704
+ else:
705
+ # Authentication is required in production mode
706
+ if not auth_token:
707
+ typer.echo("Error: Creating a superuser requires authentication.")
708
+ typer.echo("Please provide --auth-token with a valid superuser API key or JWT token.")
709
+ typer.echo("To get a token, use: `uv run langflow api_key`")
710
+ raise typer.Exit(1)
711
+
712
+ # Validate the auth token
713
+ try:
714
+ auth_user = None
715
+ async with session_scope() as session:
716
+ # Try JWT first
717
+ user = None
718
+ try:
719
+ user = await get_current_user_by_jwt(auth_token, session)
720
+ except (JWTError, HTTPException):
721
+ # Try API key
722
+ api_key_result = await check_key(session, auth_token)
723
+ if api_key_result and hasattr(api_key_result, "is_superuser"):
724
+ user = api_key_result
725
+ auth_user = user
726
+
727
+ if not auth_user or not auth_user.is_superuser:
728
+ typer.echo(
729
+ "Error: Invalid token or insufficient privileges. Only superusers can create other superusers."
730
+ )
731
+ raise typer.Exit(1)
732
+ except typer.Exit:
733
+ raise # Re-raise typer.Exit without wrapping
734
+ except Exception as e: # noqa: BLE001
735
+ typer.echo(f"Error: Authentication failed - {e!s}")
736
+ raise typer.Exit(1) from None
737
+
738
+ # Auth complete, create the superuser
739
+ async with session_scope() as session:
740
+ from langflow.services.auth.utils import create_super_user
741
+
742
+ if await create_super_user(db=session, username=username, password=password):
743
+ # Verify that the superuser was created
744
+ from langflow.services.database.models.user.model import User
745
+
746
+ stmt = select(User).where(User.username == username)
747
+ created_user: User = (await session.exec(stmt)).first()
748
+ if created_user is None or not created_user.is_superuser:
667
749
  typer.echo("Superuser creation failed.")
750
+ return
751
+ # Now create the first folder for the user
752
+ result = await get_or_create_default_folder(session, created_user.id)
753
+ if result:
754
+ typer.echo("Default folder created successfully.")
755
+ else:
756
+ msg = "Could not create default folder."
757
+ raise RuntimeError(msg)
668
758
 
669
- asyncio.run(_create_superuser())
759
+ # Log the superuser creation for audit purposes
760
+ logger.warning(
761
+ f"SECURITY AUDIT: New superuser '{username}' created via CLI command"
762
+ + (" by authenticated user" if auth_token else " (first-time setup)")
763
+ )
764
+ typer.echo("Superuser created successfully.")
765
+
766
+ else:
767
+ logger.error(f"SECURITY AUDIT: Failed attempt to create superuser '{username}' via CLI")
768
+ typer.echo("Superuser creation failed.")
670
769
 
671
770
 
672
771
  # command to copy the langflow database from the cache to the current directory
@@ -749,6 +848,7 @@ def api_key(
749
848
  settings_service = get_settings_service()
750
849
  auth_settings = settings_service.auth_settings
751
850
  if not auth_settings.AUTO_LOGIN:
851
+ # TODO: Allow non-auto-login users to create API keys via CLI
752
852
  typer.echo("Auto login is disabled. API keys cannot be created through the CLI.")
753
853
  return None
754
854
 
langflow/api/router.py CHANGED
@@ -8,6 +8,7 @@ from langflow.api.v1 import (
8
8
  files_router,
9
9
  flows_router,
10
10
  folders_router,
11
+ knowledge_bases_router,
11
12
  login_router,
12
13
  mcp_projects_router,
13
14
  mcp_router,
@@ -45,6 +46,7 @@ router_v1.include_router(monitor_router)
45
46
  router_v1.include_router(folders_router)
46
47
  router_v1.include_router(projects_router)
47
48
  router_v1.include_router(starter_projects_router)
49
+ router_v1.include_router(knowledge_bases_router)
48
50
  router_v1.include_router(mcp_router)
49
51
  router_v1.include_router(voice_mode_router)
50
52
  router_v1.include_router(mcp_projects_router)
@@ -4,6 +4,7 @@ from langflow.api.v1.endpoints import router as endpoints_router
4
4
  from langflow.api.v1.files import router as files_router
5
5
  from langflow.api.v1.flows import router as flows_router
6
6
  from langflow.api.v1.folders import router as folders_router
7
+ from langflow.api.v1.knowledge_bases import router as knowledge_bases_router
7
8
  from langflow.api.v1.login import router as login_router
8
9
  from langflow.api.v1.mcp import router as mcp_router
9
10
  from langflow.api.v1.mcp_projects import router as mcp_projects_router
@@ -23,6 +24,7 @@ __all__ = [
23
24
  "files_router",
24
25
  "flows_router",
25
26
  "folders_router",
27
+ "knowledge_bases_router",
26
28
  "login_router",
27
29
  "mcp_projects_router",
28
30
  "mcp_router",
@@ -0,0 +1,437 @@
1
+ import json
2
+ import shutil
3
+ from http import HTTPStatus
4
+ from pathlib import Path
5
+
6
+ import pandas as pd
7
+ from fastapi import APIRouter, HTTPException
8
+ from langchain_chroma import Chroma
9
+ from loguru import logger
10
+ from pydantic import BaseModel
11
+
12
+ from langflow.services.deps import get_settings_service
13
+
14
+ router = APIRouter(tags=["Knowledge Bases"], prefix="/knowledge_bases")
15
+
16
+
17
+ settings = get_settings_service().settings
18
+ knowledge_directory = settings.knowledge_bases_dir
19
+ if not knowledge_directory:
20
+ msg = "Knowledge bases directory is not set in the settings."
21
+ raise ValueError(msg)
22
+ KNOWLEDGE_BASES_DIR = Path(knowledge_directory).expanduser()
23
+
24
+
25
+ class KnowledgeBaseInfo(BaseModel):
26
+ id: str
27
+ name: str
28
+ embedding_provider: str | None = "Unknown"
29
+ embedding_model: str | None = "Unknown"
30
+ size: int = 0
31
+ words: int = 0
32
+ characters: int = 0
33
+ chunks: int = 0
34
+ avg_chunk_size: float = 0.0
35
+
36
+
37
+ class BulkDeleteRequest(BaseModel):
38
+ kb_names: list[str]
39
+
40
+
41
+ def get_kb_root_path() -> Path:
42
+ """Get the knowledge bases root path."""
43
+ return KNOWLEDGE_BASES_DIR
44
+
45
+
46
+ def get_directory_size(path: Path) -> int:
47
+ """Calculate the total size of all files in a directory."""
48
+ total_size = 0
49
+ try:
50
+ for file_path in path.rglob("*"):
51
+ if file_path.is_file():
52
+ total_size += file_path.stat().st_size
53
+ except (OSError, PermissionError):
54
+ pass
55
+ return total_size
56
+
57
+
58
+ def detect_embedding_provider(kb_path: Path) -> str:
59
+ """Detect the embedding provider from config files and directory structure."""
60
+ # Provider patterns to check for
61
+ provider_patterns = {
62
+ "OpenAI": ["openai", "text-embedding-ada", "text-embedding-3"],
63
+ "HuggingFace": ["sentence-transformers", "huggingface", "bert-"],
64
+ "Cohere": ["cohere", "embed-english", "embed-multilingual"],
65
+ "Google": ["palm", "gecko", "google"],
66
+ "Chroma": ["chroma"],
67
+ }
68
+
69
+ # Check JSON config files for provider information
70
+ for config_file in kb_path.glob("*.json"):
71
+ try:
72
+ with config_file.open("r", encoding="utf-8") as f:
73
+ config_data = json.load(f)
74
+ if not isinstance(config_data, dict):
75
+ continue
76
+
77
+ config_str = json.dumps(config_data).lower()
78
+
79
+ # Check for explicit provider fields first
80
+ provider_fields = ["embedding_provider", "provider", "embedding_model_provider"]
81
+ for field in provider_fields:
82
+ if field in config_data:
83
+ provider_value = str(config_data[field]).lower()
84
+ for provider, patterns in provider_patterns.items():
85
+ if any(pattern in provider_value for pattern in patterns):
86
+ return provider
87
+
88
+ # Check for model name patterns
89
+ for provider, patterns in provider_patterns.items():
90
+ if any(pattern in config_str for pattern in patterns):
91
+ return provider
92
+
93
+ except (OSError, json.JSONDecodeError) as _:
94
+ logger.exception("Error reading config file '%s'", config_file)
95
+ continue
96
+
97
+ # Fallback to directory structure
98
+ if (kb_path / "chroma").exists():
99
+ return "Chroma"
100
+ if (kb_path / "vectors.npy").exists():
101
+ return "Local"
102
+
103
+ return "Unknown"
104
+
105
+
106
+ def detect_embedding_model(kb_path: Path) -> str:
107
+ """Detect the embedding model from config files."""
108
+ # First check the embedding metadata file (most accurate)
109
+ metadata_file = kb_path / "embedding_metadata.json"
110
+ if metadata_file.exists():
111
+ try:
112
+ with metadata_file.open("r", encoding="utf-8") as f:
113
+ metadata = json.load(f)
114
+ if isinstance(metadata, dict) and "embedding_model" in metadata:
115
+ # Check for embedding model field
116
+ model_value = str(metadata.get("embedding_model", "unknown"))
117
+ if model_value and model_value.lower() != "unknown":
118
+ return model_value
119
+ except (OSError, json.JSONDecodeError) as _:
120
+ logger.exception("Error reading embedding metadata file '%s'", metadata_file)
121
+
122
+ # Check other JSON config files for model information
123
+ for config_file in kb_path.glob("*.json"):
124
+ # Skip the embedding metadata file since we already checked it
125
+ if config_file.name == "embedding_metadata.json":
126
+ continue
127
+
128
+ try:
129
+ with config_file.open("r", encoding="utf-8") as f:
130
+ config_data = json.load(f)
131
+ if not isinstance(config_data, dict):
132
+ continue
133
+
134
+ # Check for explicit model fields first and return the actual model name
135
+ model_fields = ["embedding_model", "model", "embedding_model_name", "model_name"]
136
+ for field in model_fields:
137
+ if field in config_data:
138
+ model_value = str(config_data[field])
139
+ if model_value and model_value.lower() != "unknown":
140
+ return model_value
141
+
142
+ # Check for OpenAI specific model names
143
+ if "openai" in json.dumps(config_data).lower():
144
+ openai_models = ["text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large"]
145
+ config_str = json.dumps(config_data).lower()
146
+ for model in openai_models:
147
+ if model in config_str:
148
+ return model
149
+
150
+ # Check for HuggingFace model names (usually in model field)
151
+ if "model" in config_data:
152
+ model_name = str(config_data["model"])
153
+ # Common HuggingFace embedding models
154
+ hf_patterns = ["sentence-transformers", "all-MiniLM", "all-mpnet", "multi-qa"]
155
+ if any(pattern in model_name for pattern in hf_patterns):
156
+ return model_name
157
+
158
+ except (OSError, json.JSONDecodeError) as _:
159
+ logger.exception("Error reading config file '%s'", config_file)
160
+ continue
161
+
162
+ return "Unknown"
163
+
164
+
165
+ def get_text_columns(df: pd.DataFrame, schema_data: list | None = None) -> list[str]:
166
+ """Get the text columns to analyze for word/character counts."""
167
+ # First try schema-defined text columns
168
+ if schema_data:
169
+ text_columns = [
170
+ col["column_name"]
171
+ for col in schema_data
172
+ if col.get("vectorize", False) and col.get("data_type") == "string"
173
+ ]
174
+ if text_columns:
175
+ return [col for col in text_columns if col in df.columns]
176
+
177
+ # Fallback to common text column names
178
+ common_names = ["text", "content", "document", "chunk"]
179
+ text_columns = [col for col in df.columns if col.lower() in common_names]
180
+ if text_columns:
181
+ return text_columns
182
+
183
+ # Last resort: all string columns
184
+ return [col for col in df.columns if df[col].dtype == "object"]
185
+
186
+
187
+ def calculate_text_metrics(df: pd.DataFrame, text_columns: list[str]) -> tuple[int, int]:
188
+ """Calculate total words and characters from text columns."""
189
+ total_words = 0
190
+ total_characters = 0
191
+
192
+ for col in text_columns:
193
+ if col not in df.columns:
194
+ continue
195
+
196
+ text_series = df[col].astype(str).fillna("")
197
+ total_characters += text_series.str.len().sum()
198
+ total_words += text_series.str.split().str.len().sum()
199
+
200
+ return int(total_words), int(total_characters)
201
+
202
+
203
+ def get_kb_metadata(kb_path: Path) -> dict:
204
+ """Extract metadata from a knowledge base directory."""
205
+ metadata: dict[str, float | int | str] = {
206
+ "chunks": 0,
207
+ "words": 0,
208
+ "characters": 0,
209
+ "avg_chunk_size": 0.0,
210
+ "embedding_provider": "Unknown",
211
+ "embedding_model": "Unknown",
212
+ }
213
+
214
+ try:
215
+ # First check embedding metadata file for accurate provider and model info
216
+ metadata_file = kb_path / "embedding_metadata.json"
217
+ if metadata_file.exists():
218
+ try:
219
+ with metadata_file.open("r", encoding="utf-8") as f:
220
+ embedding_metadata = json.load(f)
221
+ if isinstance(embedding_metadata, dict):
222
+ if "embedding_provider" in embedding_metadata:
223
+ metadata["embedding_provider"] = embedding_metadata["embedding_provider"]
224
+ if "embedding_model" in embedding_metadata:
225
+ metadata["embedding_model"] = embedding_metadata["embedding_model"]
226
+ except (OSError, json.JSONDecodeError) as _:
227
+ logger.exception("Error reading embedding metadata file '%s'", metadata_file)
228
+
229
+ # Fallback to detection if not found in metadata file
230
+ if metadata["embedding_provider"] == "Unknown":
231
+ metadata["embedding_provider"] = detect_embedding_provider(kb_path)
232
+ if metadata["embedding_model"] == "Unknown":
233
+ metadata["embedding_model"] = detect_embedding_model(kb_path)
234
+
235
+ # Read schema for text column information
236
+ schema_data = None
237
+ schema_file = kb_path / "schema.json"
238
+ if schema_file.exists():
239
+ try:
240
+ with schema_file.open("r", encoding="utf-8") as f:
241
+ schema_data = json.load(f)
242
+ if not isinstance(schema_data, list):
243
+ schema_data = None
244
+ except (ValueError, TypeError, OSError) as _:
245
+ logger.exception("Error reading schema file '%s'", schema_file)
246
+
247
+ # Create vector store
248
+ chroma = Chroma(
249
+ persist_directory=str(kb_path),
250
+ collection_name=kb_path.name,
251
+ )
252
+
253
+ # Access the raw collection
254
+ collection = chroma._collection
255
+
256
+ # Fetch all documents and metadata
257
+ results = collection.get(include=["documents", "metadatas"])
258
+
259
+ # Convert to pandas DataFrame
260
+ source_chunks = pd.DataFrame(
261
+ {
262
+ "document": results["documents"],
263
+ "metadata": results["metadatas"],
264
+ }
265
+ )
266
+
267
+ # Process the source data for metadata
268
+ try:
269
+ metadata["chunks"] = len(source_chunks)
270
+
271
+ # Get text columns and calculate metrics
272
+ text_columns = get_text_columns(source_chunks, schema_data)
273
+ if text_columns:
274
+ words, characters = calculate_text_metrics(source_chunks, text_columns)
275
+ metadata["words"] = words
276
+ metadata["characters"] = characters
277
+
278
+ # Calculate average chunk size
279
+ if int(metadata["chunks"]) > 0:
280
+ metadata["avg_chunk_size"] = round(int(characters) / int(metadata["chunks"]), 1)
281
+
282
+ except (OSError, ValueError, TypeError) as _:
283
+ logger.exception("Error processing Chroma DB '%s'", kb_path.name)
284
+
285
+ except (OSError, ValueError, TypeError) as _:
286
+ logger.exception("Error processing knowledge base directory '%s'", kb_path)
287
+
288
+ return metadata
289
+
290
+
291
+ @router.get("", status_code=HTTPStatus.OK)
292
+ @router.get("/", status_code=HTTPStatus.OK)
293
+ async def list_knowledge_bases() -> list[KnowledgeBaseInfo]:
294
+ """List all available knowledge bases."""
295
+ try:
296
+ kb_root_path = get_kb_root_path()
297
+
298
+ if not kb_root_path.exists():
299
+ return []
300
+
301
+ knowledge_bases = []
302
+
303
+ for kb_dir in kb_root_path.iterdir():
304
+ if not kb_dir.is_dir() or kb_dir.name.startswith("."):
305
+ continue
306
+
307
+ try:
308
+ # Get size of the directory
309
+ size = get_directory_size(kb_dir)
310
+
311
+ # Get metadata from KB files
312
+ metadata = get_kb_metadata(kb_dir)
313
+
314
+ kb_info = KnowledgeBaseInfo(
315
+ id=kb_dir.name,
316
+ name=kb_dir.name.replace("_", " ").replace("-", " ").title(),
317
+ embedding_provider=metadata["embedding_provider"],
318
+ embedding_model=metadata["embedding_model"],
319
+ size=size,
320
+ words=metadata["words"],
321
+ characters=metadata["characters"],
322
+ chunks=metadata["chunks"],
323
+ avg_chunk_size=metadata["avg_chunk_size"],
324
+ )
325
+
326
+ knowledge_bases.append(kb_info)
327
+
328
+ except OSError as _:
329
+ # Log the exception and skip directories that can't be read
330
+ logger.exception("Error reading knowledge base directory '%s'", kb_dir)
331
+ continue
332
+
333
+ # Sort by name alphabetically
334
+ knowledge_bases.sort(key=lambda x: x.name)
335
+
336
+ except Exception as e:
337
+ raise HTTPException(status_code=500, detail=f"Error listing knowledge bases: {e!s}") from e
338
+ else:
339
+ return knowledge_bases
340
+
341
+
342
+ @router.get("/{kb_name}", status_code=HTTPStatus.OK)
343
+ async def get_knowledge_base(kb_name: str) -> KnowledgeBaseInfo:
344
+ """Get detailed information about a specific knowledge base."""
345
+ try:
346
+ kb_root_path = get_kb_root_path()
347
+ kb_path = kb_root_path / kb_name
348
+
349
+ if not kb_path.exists() or not kb_path.is_dir():
350
+ raise HTTPException(status_code=404, detail=f"Knowledge base '{kb_name}' not found")
351
+
352
+ # Get size of the directory
353
+ size = get_directory_size(kb_path)
354
+
355
+ # Get metadata from KB files
356
+ metadata = get_kb_metadata(kb_path)
357
+
358
+ return KnowledgeBaseInfo(
359
+ id=kb_name,
360
+ name=kb_name.replace("_", " ").replace("-", " ").title(),
361
+ embedding_provider=metadata["embedding_provider"],
362
+ embedding_model=metadata["embedding_model"],
363
+ size=size,
364
+ words=metadata["words"],
365
+ characters=metadata["characters"],
366
+ chunks=metadata["chunks"],
367
+ avg_chunk_size=metadata["avg_chunk_size"],
368
+ )
369
+
370
+ except HTTPException:
371
+ raise
372
+ except Exception as e:
373
+ raise HTTPException(status_code=500, detail=f"Error getting knowledge base '{kb_name}': {e!s}") from e
374
+
375
+
376
+ @router.delete("/{kb_name}", status_code=HTTPStatus.OK)
377
+ async def delete_knowledge_base(kb_name: str) -> dict[str, str]:
378
+ """Delete a specific knowledge base."""
379
+ try:
380
+ kb_root_path = get_kb_root_path()
381
+ kb_path = kb_root_path / kb_name
382
+
383
+ if not kb_path.exists() or not kb_path.is_dir():
384
+ raise HTTPException(status_code=404, detail=f"Knowledge base '{kb_name}' not found")
385
+
386
+ # Delete the entire knowledge base directory
387
+ shutil.rmtree(kb_path)
388
+
389
+ except HTTPException:
390
+ raise
391
+ except Exception as e:
392
+ raise HTTPException(status_code=500, detail=f"Error deleting knowledge base '{kb_name}': {e!s}") from e
393
+ else:
394
+ return {"message": f"Knowledge base '{kb_name}' deleted successfully"}
395
+
396
+
397
+ @router.delete("", status_code=HTTPStatus.OK)
398
+ @router.delete("/", status_code=HTTPStatus.OK)
399
+ async def delete_knowledge_bases_bulk(request: BulkDeleteRequest) -> dict[str, object]:
400
+ """Delete multiple knowledge bases."""
401
+ try:
402
+ kb_root_path = get_kb_root_path()
403
+ deleted_count = 0
404
+ not_found_kbs = []
405
+
406
+ for kb_name in request.kb_names:
407
+ kb_path = kb_root_path / kb_name
408
+
409
+ if not kb_path.exists() or not kb_path.is_dir():
410
+ not_found_kbs.append(kb_name)
411
+ continue
412
+
413
+ try:
414
+ # Delete the entire knowledge base directory
415
+ shutil.rmtree(kb_path)
416
+ deleted_count += 1
417
+ except (OSError, PermissionError) as e:
418
+ logger.exception("Error deleting knowledge base '%s': %s", kb_name, e)
419
+ # Continue with other deletions even if one fails
420
+
421
+ if not_found_kbs and deleted_count == 0:
422
+ raise HTTPException(status_code=404, detail=f"Knowledge bases not found: {', '.join(not_found_kbs)}")
423
+
424
+ result = {
425
+ "message": f"Successfully deleted {deleted_count} knowledge base(s)",
426
+ "deleted_count": deleted_count,
427
+ }
428
+
429
+ if not_found_kbs:
430
+ result["not_found"] = ", ".join(not_found_kbs)
431
+
432
+ except HTTPException:
433
+ raise
434
+ except Exception as e:
435
+ raise HTTPException(status_code=500, detail=f"Error deleting knowledge bases: {e!s}") from e
436
+ else:
437
+ return result