langflow-base-nightly 0.5.0.dev34__py3-none-any.whl → 0.5.0.dev35__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langflow/alembic/versions/1cb603706752_modify_uniqueness_constraint_on_file_.py +279 -0
- langflow/components/agents/mcp_component.py +21 -4
- langflow/components/data/kb_ingest.py +15 -16
- langflow/components/processing/save_file.py +31 -4
- langflow/initial_setup/starter_projects/Knowledge Ingestion.json +2 -2
- langflow/initial_setup/starter_projects/News Aggregator.json +19 -2
- langflow/initial_setup/starter_projects/Nvidia Remix.json +19 -2
- langflow/services/database/models/file/model.py +4 -2
- {langflow_base_nightly-0.5.0.dev34.dist-info → langflow_base_nightly-0.5.0.dev35.dist-info}/METADATA +1 -1
- {langflow_base_nightly-0.5.0.dev34.dist-info → langflow_base_nightly-0.5.0.dev35.dist-info}/RECORD +12 -11
- {langflow_base_nightly-0.5.0.dev34.dist-info → langflow_base_nightly-0.5.0.dev35.dist-info}/WHEEL +0 -0
- {langflow_base_nightly-0.5.0.dev34.dist-info → langflow_base_nightly-0.5.0.dev35.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
"""Modify uniqueness constraint on file names
|
|
2
|
+
|
|
3
|
+
Revision ID: 1cb603706752
|
|
4
|
+
Revises: 3162e83e485f
|
|
5
|
+
Create Date: 2025-07-24 07:02:14.896583
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
import re
|
|
12
|
+
import time
|
|
13
|
+
from typing import Sequence, Union, Iterable, Optional, Set, Tuple
|
|
14
|
+
|
|
15
|
+
from alembic import op
|
|
16
|
+
import sqlalchemy as sa
|
|
17
|
+
from sqlalchemy import inspect
|
|
18
|
+
|
|
19
|
+
# revision identifiers, used by Alembic.
|
|
20
|
+
revision: str = "1cb603706752"
|
|
21
|
+
down_revision: Union[str, None] = "3162e83e485f"
|
|
22
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
23
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
# Behavior constants
|
|
28
|
+
DUPLICATE_SUFFIX_START = 2 # first suffix to use, e.g., "name_2.ext"
|
|
29
|
+
BATCH_SIZE = 1000 # Process duplicates in batches for large datasets
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _get_unique_constraints_by_columns(
|
|
33
|
+
inspector, table: str, expected_cols: Iterable[str]
|
|
34
|
+
) -> Optional[str]:
|
|
35
|
+
"""Return the name of a unique constraint that matches the exact set of expected columns."""
|
|
36
|
+
expected = set(expected_cols)
|
|
37
|
+
for c in inspector.get_unique_constraints(table):
|
|
38
|
+
cols = set(c.get("column_names") or [])
|
|
39
|
+
if cols == expected:
|
|
40
|
+
return c.get("name")
|
|
41
|
+
return None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _split_base_ext(name: str) -> Tuple[str, str]:
|
|
45
|
+
"""Split a filename into (base, ext) where ext does not include the leading dot; ext may be ''."""
|
|
46
|
+
if "." in name:
|
|
47
|
+
base, ext = name.rsplit(".", 1)
|
|
48
|
+
return base, ext
|
|
49
|
+
return name, ""
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _escape_like(s: str) -> str:
|
|
53
|
+
# escape backslash first, then SQL LIKE wildcards
|
|
54
|
+
return s.replace("\\", "\\\\").replace("%", r"\%").replace("_", r"\_")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _like_for_suffixes(base: str, ext: str) -> str:
|
|
58
|
+
eb = _escape_like(base)
|
|
59
|
+
if ext:
|
|
60
|
+
ex = ext.replace("%", r"\%").replace("_", r"\_")
|
|
61
|
+
return f"{eb}\\_%." + ex # literal underscore
|
|
62
|
+
else:
|
|
63
|
+
return f"{eb}\\_%"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _next_available_name(conn, user_id: str, base_name: str) -> str:
|
|
67
|
+
"""
|
|
68
|
+
Compute the next available non-conflicting name for a given user.
|
|
69
|
+
Handles names with or without extensions and existing _N suffixes.
|
|
70
|
+
"""
|
|
71
|
+
base, ext = _split_base_ext(base_name)
|
|
72
|
+
|
|
73
|
+
# Load all sibling names once
|
|
74
|
+
rows = conn.execute(
|
|
75
|
+
sa.text("""
|
|
76
|
+
SELECT name
|
|
77
|
+
FROM file
|
|
78
|
+
WHERE user_id = :uid
|
|
79
|
+
AND (name = :base_name OR name LIKE :like ESCAPE '\\')
|
|
80
|
+
"""),
|
|
81
|
+
{"uid": user_id, "base_name": base_name, "like": _like_for_suffixes(base, ext)},
|
|
82
|
+
).scalars().all()
|
|
83
|
+
|
|
84
|
+
taken: Set[str] = set(rows)
|
|
85
|
+
|
|
86
|
+
# Pattern to detect base_N(.ext) and capture N
|
|
87
|
+
if ext:
|
|
88
|
+
rx = re.compile(rf"^{re.escape(base)}_(\d+)\.{re.escape(ext)}$")
|
|
89
|
+
else:
|
|
90
|
+
rx = re.compile(rf"^{re.escape(base)}_(\d+)$")
|
|
91
|
+
|
|
92
|
+
max_n = 1
|
|
93
|
+
for n in rows:
|
|
94
|
+
m = rx.match(n)
|
|
95
|
+
if m:
|
|
96
|
+
max_n = max(max_n, int(m.group(1)))
|
|
97
|
+
|
|
98
|
+
n = max(max_n + 1, DUPLICATE_SUFFIX_START)
|
|
99
|
+
while True:
|
|
100
|
+
candidate = f"{base}_{n}.{ext}" if ext else f"{base}_{n}"
|
|
101
|
+
if candidate not in taken:
|
|
102
|
+
return candidate
|
|
103
|
+
n += 1
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _handle_duplicates_before_upgrade(conn) -> None:
|
|
107
|
+
"""
|
|
108
|
+
Ensure (user_id, name) is unique by renaming older duplicates before adding the composite unique constraint.
|
|
109
|
+
Keeps the most recently updated/created/id-highest record; renames the rest with _N suffix.
|
|
110
|
+
"""
|
|
111
|
+
logger.info("Scanning for duplicate file names per user...")
|
|
112
|
+
duplicates = conn.execute(
|
|
113
|
+
sa.text(
|
|
114
|
+
"""
|
|
115
|
+
SELECT user_id, name, COUNT(*) AS cnt
|
|
116
|
+
FROM file
|
|
117
|
+
GROUP BY user_id, name
|
|
118
|
+
HAVING COUNT(*) > 1
|
|
119
|
+
"""
|
|
120
|
+
)
|
|
121
|
+
).fetchall()
|
|
122
|
+
|
|
123
|
+
if not duplicates:
|
|
124
|
+
logger.info("No duplicates found.")
|
|
125
|
+
return
|
|
126
|
+
|
|
127
|
+
logger.info("Found %d duplicate sets. Resolving...", len(duplicates))
|
|
128
|
+
|
|
129
|
+
# Add progress indicator for large datasets
|
|
130
|
+
if len(duplicates) > 100:
|
|
131
|
+
logger.info("Large number of duplicates detected. This may take several minutes...")
|
|
132
|
+
|
|
133
|
+
# Wrap in a nested transaction so we fail cleanly on any error
|
|
134
|
+
with conn.begin_nested():
|
|
135
|
+
# Process duplicates in batches for better performance on large datasets
|
|
136
|
+
for batch_start in range(0, len(duplicates), BATCH_SIZE):
|
|
137
|
+
batch_end = min(batch_start + BATCH_SIZE, len(duplicates))
|
|
138
|
+
batch = duplicates[batch_start:batch_end]
|
|
139
|
+
|
|
140
|
+
if len(duplicates) > BATCH_SIZE:
|
|
141
|
+
logger.info("Processing batch %d-%d of %d duplicate sets...",
|
|
142
|
+
batch_start + 1, batch_end, len(duplicates))
|
|
143
|
+
|
|
144
|
+
for user_id, name, cnt in batch:
|
|
145
|
+
logger.debug("Resolving duplicates for user=%s, name=%r (count=%s)", user_id, name, cnt)
|
|
146
|
+
|
|
147
|
+
file_ids = conn.execute(
|
|
148
|
+
sa.text(
|
|
149
|
+
"""
|
|
150
|
+
SELECT id
|
|
151
|
+
FROM file
|
|
152
|
+
WHERE user_id = :uid AND name = :name
|
|
153
|
+
ORDER BY updated_at DESC, created_at DESC, id DESC
|
|
154
|
+
"""
|
|
155
|
+
),
|
|
156
|
+
{"uid": user_id, "name": name},
|
|
157
|
+
).scalars().all()
|
|
158
|
+
|
|
159
|
+
# Keep the first (most recent), rename the rest
|
|
160
|
+
for file_id in file_ids[1:]:
|
|
161
|
+
new_name = _next_available_name(conn, user_id, name)
|
|
162
|
+
conn.execute(
|
|
163
|
+
sa.text("UPDATE file SET name = :new_name WHERE id = :fid"),
|
|
164
|
+
{"new_name": new_name, "fid": file_id},
|
|
165
|
+
)
|
|
166
|
+
logger.debug("Renamed id=%s: %r -> %r", file_id, name, new_name)
|
|
167
|
+
|
|
168
|
+
# Progress update for large batches
|
|
169
|
+
if len(duplicates) > BATCH_SIZE and batch_end < len(duplicates):
|
|
170
|
+
logger.info("Completed %d of %d duplicate sets (%.1f%%)",
|
|
171
|
+
batch_end, len(duplicates), (batch_end / len(duplicates)) * 100)
|
|
172
|
+
|
|
173
|
+
logger.info("Duplicate resolution completed.")
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def upgrade() -> None:
|
|
177
|
+
start_time = time.time()
|
|
178
|
+
logger.info("Starting upgrade: adding composite unique (name, user_id) on file")
|
|
179
|
+
|
|
180
|
+
conn = op.get_bind()
|
|
181
|
+
inspector = inspect(conn)
|
|
182
|
+
|
|
183
|
+
# 1) Resolve pre-existing duplicates so the new unique can be created
|
|
184
|
+
duplicate_start = time.time()
|
|
185
|
+
_handle_duplicates_before_upgrade(conn)
|
|
186
|
+
duplicate_duration = time.time() - duplicate_start
|
|
187
|
+
|
|
188
|
+
if duplicate_duration > 1.0: # Only log if it took more than 1 second
|
|
189
|
+
logger.info("Duplicate resolution completed in %.2f seconds", duplicate_duration)
|
|
190
|
+
|
|
191
|
+
# 2) Detect existing single-column unique on name (if any)
|
|
192
|
+
inspector = inspect(conn) # refresh inspector
|
|
193
|
+
single_name_uc = _get_unique_constraints_by_columns(inspector, "file", {"name"})
|
|
194
|
+
composite_uc = _get_unique_constraints_by_columns(inspector, "file", {"name", "user_id"})
|
|
195
|
+
|
|
196
|
+
# 3) Use a unified, reflection-based batch_alter_table for both Postgres and SQLite.
|
|
197
|
+
# recreate="always" ensures a safe table rebuild on SQLite and a standard alter on Postgres.
|
|
198
|
+
constraint_start = time.time()
|
|
199
|
+
with op.batch_alter_table("file", recreate="always") as batch_op:
|
|
200
|
+
# Drop old single-column unique if present
|
|
201
|
+
if single_name_uc:
|
|
202
|
+
logger.info("Dropping existing single-column unique: %s", single_name_uc)
|
|
203
|
+
batch_op.drop_constraint(single_name_uc, type_="unique")
|
|
204
|
+
|
|
205
|
+
# Create composite unique if not already present
|
|
206
|
+
if not composite_uc:
|
|
207
|
+
logger.info("Creating composite unique: file_name_user_id_key on (name, user_id)")
|
|
208
|
+
batch_op.create_unique_constraint("file_name_user_id_key", ["name", "user_id"])
|
|
209
|
+
else:
|
|
210
|
+
logger.info("Composite unique already present: %s", composite_uc)
|
|
211
|
+
|
|
212
|
+
constraint_duration = time.time() - constraint_start
|
|
213
|
+
if constraint_duration > 1.0: # Only log if it took more than 1 second
|
|
214
|
+
logger.info("Constraint operations completed in %.2f seconds", constraint_duration)
|
|
215
|
+
|
|
216
|
+
total_duration = time.time() - start_time
|
|
217
|
+
logger.info("Upgrade completed successfully in %.2f seconds", total_duration)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def downgrade() -> None:
|
|
221
|
+
start_time = time.time()
|
|
222
|
+
logger.info("Starting downgrade: reverting to single-column unique on (name)")
|
|
223
|
+
|
|
224
|
+
conn = op.get_bind()
|
|
225
|
+
inspector = inspect(conn)
|
|
226
|
+
|
|
227
|
+
# 1) Ensure no cross-user duplicates on name (since we'll enforce global uniqueness on name)
|
|
228
|
+
logger.info("Checking for cross-user duplicate names prior to downgrade...")
|
|
229
|
+
validation_start = time.time()
|
|
230
|
+
|
|
231
|
+
dup_names = conn.execute(
|
|
232
|
+
sa.text(
|
|
233
|
+
"""
|
|
234
|
+
SELECT name, COUNT(*) AS cnt
|
|
235
|
+
FROM file
|
|
236
|
+
GROUP BY name
|
|
237
|
+
HAVING COUNT(*) > 1
|
|
238
|
+
"""
|
|
239
|
+
)
|
|
240
|
+
).fetchall()
|
|
241
|
+
|
|
242
|
+
validation_duration = time.time() - validation_start
|
|
243
|
+
if validation_duration > 1.0: # Only log if it took more than 1 second
|
|
244
|
+
logger.info("Validation completed in %.2f seconds", validation_duration)
|
|
245
|
+
|
|
246
|
+
if dup_names:
|
|
247
|
+
examples = [row[0] for row in dup_names[:10]]
|
|
248
|
+
raise RuntimeError(
|
|
249
|
+
"Downgrade aborted: duplicate names exist across users. "
|
|
250
|
+
f"Examples: {examples}{'...' if len(dup_names) > 10 else ''}. "
|
|
251
|
+
"Rename conflicting files before downgrading."
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
# 2) Detect constraints
|
|
255
|
+
inspector = inspect(conn) # refresh
|
|
256
|
+
composite_uc = _get_unique_constraints_by_columns(inspector, "file", {"name", "user_id"})
|
|
257
|
+
single_name_uc = _get_unique_constraints_by_columns(inspector, "file", {"name"})
|
|
258
|
+
|
|
259
|
+
# 3) Perform alteration using batch with reflect to preserve other objects
|
|
260
|
+
constraint_start = time.time()
|
|
261
|
+
with op.batch_alter_table("file", recreate="always") as batch_op:
|
|
262
|
+
if composite_uc:
|
|
263
|
+
logger.info("Dropping composite unique: %s", composite_uc)
|
|
264
|
+
batch_op.drop_constraint(composite_uc, type_="unique")
|
|
265
|
+
else:
|
|
266
|
+
logger.info("No composite unique found to drop.")
|
|
267
|
+
|
|
268
|
+
if not single_name_uc:
|
|
269
|
+
logger.info("Creating single-column unique: file_name_key on (name)")
|
|
270
|
+
batch_op.create_unique_constraint("file_name_key", ["name"])
|
|
271
|
+
else:
|
|
272
|
+
logger.info("Single-column unique already present: %s", single_name_uc)
|
|
273
|
+
|
|
274
|
+
constraint_duration = time.time() - constraint_start
|
|
275
|
+
if constraint_duration > 1.0: # Only log if it took more than 1 second
|
|
276
|
+
logger.info("Constraint operations completed in %.2f seconds", constraint_duration)
|
|
277
|
+
|
|
278
|
+
total_duration = time.time() - start_time
|
|
279
|
+
logger.info("Downgrade completed successfully in %.2f seconds", total_duration)
|
|
@@ -16,14 +16,14 @@ from langflow.base.mcp.util import (
|
|
|
16
16
|
)
|
|
17
17
|
from langflow.custom.custom_component.component_with_cache import ComponentWithCache
|
|
18
18
|
from langflow.inputs.inputs import InputTypes # noqa: TC001
|
|
19
|
-
from langflow.io import DropdownInput, McpInput, MessageTextInput, Output
|
|
19
|
+
from langflow.io import DropdownInput, McpInput, MessageTextInput, Output, SecretStrInput
|
|
20
20
|
from langflow.io.schema import flatten_schema, schema_to_langflow_inputs
|
|
21
21
|
from langflow.logging import logger
|
|
22
22
|
from langflow.schema.dataframe import DataFrame
|
|
23
23
|
from langflow.schema.message import Message
|
|
24
|
-
from langflow.services.auth.utils import create_user_longterm_token
|
|
25
24
|
|
|
26
25
|
# Import get_server from the backend API
|
|
26
|
+
from langflow.services.auth.utils import create_user_longterm_token, get_current_user
|
|
27
27
|
from langflow.services.database.models.user.crud import get_user_by_id
|
|
28
28
|
from langflow.services.deps import get_session, get_settings_service, get_storage_service
|
|
29
29
|
|
|
@@ -96,6 +96,13 @@ class MCPToolsComponent(ComponentWithCache):
|
|
|
96
96
|
show=False,
|
|
97
97
|
tool_mode=False,
|
|
98
98
|
),
|
|
99
|
+
SecretStrInput(
|
|
100
|
+
name="api_key",
|
|
101
|
+
display_name="Langflow API Key",
|
|
102
|
+
info="Langflow API key for authentication when fetching MCP servers and tools.",
|
|
103
|
+
required=False,
|
|
104
|
+
advanced=True,
|
|
105
|
+
),
|
|
99
106
|
]
|
|
100
107
|
|
|
101
108
|
outputs = [
|
|
@@ -155,8 +162,18 @@ class MCPToolsComponent(ComponentWithCache):
|
|
|
155
162
|
|
|
156
163
|
try:
|
|
157
164
|
async for db in get_session():
|
|
158
|
-
|
|
159
|
-
|
|
165
|
+
# TODO: In 1.6, this may need to be removed or adjusted
|
|
166
|
+
# Try to get the super user token, if possible
|
|
167
|
+
if self.api_key:
|
|
168
|
+
current_user = await get_current_user(
|
|
169
|
+
token=None,
|
|
170
|
+
query_param=self.api_key,
|
|
171
|
+
header_param=None,
|
|
172
|
+
db=db,
|
|
173
|
+
)
|
|
174
|
+
else:
|
|
175
|
+
user_id, _ = await create_user_longterm_token(db)
|
|
176
|
+
current_user = await get_user_by_id(db, user_id)
|
|
160
177
|
|
|
161
178
|
# Try to get server config from DB/API
|
|
162
179
|
server_config = await get_server(
|
|
@@ -139,8 +139,8 @@ class KBIngestionComponent(Component):
|
|
|
139
139
|
{
|
|
140
140
|
"column_name": "text",
|
|
141
141
|
"vectorize": True,
|
|
142
|
-
"identifier":
|
|
143
|
-
}
|
|
142
|
+
"identifier": True,
|
|
143
|
+
},
|
|
144
144
|
],
|
|
145
145
|
),
|
|
146
146
|
IntInput(
|
|
@@ -187,9 +187,8 @@ class KBIngestionComponent(Component):
|
|
|
187
187
|
df_columns = set(df_source.columns)
|
|
188
188
|
for config in config_list:
|
|
189
189
|
col_name = config.get("column_name")
|
|
190
|
-
if col_name not in df_columns
|
|
190
|
+
if col_name not in df_columns:
|
|
191
191
|
msg = f"Column '{col_name}' not found in DataFrame. Available columns: {sorted(df_columns)}"
|
|
192
|
-
self.log(f"Warning: {msg}")
|
|
193
192
|
raise ValueError(msg)
|
|
194
193
|
|
|
195
194
|
return config_list
|
|
@@ -295,9 +294,7 @@ class KBIngestionComponent(Component):
|
|
|
295
294
|
if not cfg_path.exists():
|
|
296
295
|
cfg_path.write_text(json.dumps(config_list, indent=2))
|
|
297
296
|
|
|
298
|
-
except
|
|
299
|
-
if not self.silent_errors:
|
|
300
|
-
raise
|
|
297
|
+
except (OSError, TypeError, ValueError) as e:
|
|
301
298
|
self.log(f"Error saving KB files: {e}")
|
|
302
299
|
|
|
303
300
|
def _build_column_metadata(self, config_list: list[dict[str, Any]], df_source: pd.DataFrame) -> dict[str, Any]:
|
|
@@ -367,9 +364,7 @@ class KBIngestionComponent(Component):
|
|
|
367
364
|
chroma.add_documents(documents)
|
|
368
365
|
self.log(f"Added {len(documents)} documents to vector store '{self.knowledge_base}'")
|
|
369
366
|
|
|
370
|
-
except
|
|
371
|
-
if not self.silent_errors:
|
|
372
|
-
raise
|
|
367
|
+
except (OSError, ValueError, RuntimeError) as e:
|
|
373
368
|
self.log(f"Error creating vector store: {e}")
|
|
374
369
|
|
|
375
370
|
def _convert_df_to_data_objects(self, df_source: pd.DataFrame, config_list: list[dict[str, Any]]) -> list[Data]:
|
|
@@ -407,16 +402,22 @@ class KBIngestionComponent(Component):
|
|
|
407
402
|
|
|
408
403
|
# Convert each row to a Data object
|
|
409
404
|
for _, row in df_source.iterrows():
|
|
410
|
-
# Build content text from
|
|
411
|
-
|
|
405
|
+
# Build content text from identifier columns using list comprehension
|
|
406
|
+
identifier_parts = [str(row[col]) for col in content_cols if col in row and pd.notna(row[col])]
|
|
412
407
|
|
|
413
|
-
|
|
408
|
+
# Join all parts into a single string
|
|
409
|
+
page_content = " ".join(identifier_parts)
|
|
414
410
|
|
|
415
411
|
# Build metadata from NON-vectorized columns only (simple key-value pairs)
|
|
416
412
|
data_dict = {
|
|
417
413
|
"text": page_content, # Main content for vectorization
|
|
418
414
|
}
|
|
419
415
|
|
|
416
|
+
# Add identifier columns if they exist
|
|
417
|
+
if identifier_cols:
|
|
418
|
+
identifier_parts = [str(row[col]) for col in identifier_cols if col in row and pd.notna(row[col])]
|
|
419
|
+
page_content = " ".join(identifier_parts)
|
|
420
|
+
|
|
420
421
|
# Add metadata columns as simple key-value pairs
|
|
421
422
|
for col in df_source.columns:
|
|
422
423
|
if col not in content_cols and col in row and pd.notna(row[col]):
|
|
@@ -526,9 +527,7 @@ class KBIngestionComponent(Component):
|
|
|
526
527
|
|
|
527
528
|
return Data(data=meta)
|
|
528
529
|
|
|
529
|
-
except
|
|
530
|
-
if not self.silent_errors:
|
|
531
|
-
raise
|
|
530
|
+
except (OSError, ValueError, RuntimeError, KeyError) as e:
|
|
532
531
|
self.log(f"Error in KB ingestion: {e}")
|
|
533
532
|
self.status = f"❌ KB ingestion failed: {e}"
|
|
534
533
|
return Data(data={"error": str(e), "kb_name": self.knowledge_base})
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from collections.abc import AsyncIterator, Iterator
|
|
3
3
|
from pathlib import Path
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
4
5
|
|
|
5
6
|
import orjson
|
|
6
7
|
import pandas as pd
|
|
@@ -9,13 +10,16 @@ from fastapi.encoders import jsonable_encoder
|
|
|
9
10
|
|
|
10
11
|
from langflow.api.v2.files import upload_user_file
|
|
11
12
|
from langflow.custom import Component
|
|
12
|
-
from langflow.io import DropdownInput, HandleInput, StrInput
|
|
13
|
+
from langflow.io import DropdownInput, HandleInput, SecretStrInput, StrInput
|
|
13
14
|
from langflow.schema import Data, DataFrame, Message
|
|
14
|
-
from langflow.services.auth.utils import create_user_longterm_token
|
|
15
|
+
from langflow.services.auth.utils import create_user_longterm_token, get_current_user
|
|
15
16
|
from langflow.services.database.models.user.crud import get_user_by_id
|
|
16
17
|
from langflow.services.deps import get_session, get_settings_service, get_storage_service
|
|
17
18
|
from langflow.template.field.base import Output
|
|
18
19
|
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from langflow.services.database.models.user.model import User
|
|
22
|
+
|
|
19
23
|
|
|
20
24
|
class SaveToFileComponent(Component):
|
|
21
25
|
display_name = "Save File"
|
|
@@ -51,6 +55,13 @@ class SaveToFileComponent(Component):
|
|
|
51
55
|
value="",
|
|
52
56
|
advanced=True,
|
|
53
57
|
),
|
|
58
|
+
SecretStrInput(
|
|
59
|
+
name="api_key",
|
|
60
|
+
display_name="Langflow API Key",
|
|
61
|
+
info="Langflow API key for authentication when saving the file.",
|
|
62
|
+
required=False,
|
|
63
|
+
advanced=True,
|
|
64
|
+
),
|
|
54
65
|
]
|
|
55
66
|
|
|
56
67
|
outputs = [Output(display_name="File Path", name="message", method="save_to_file")]
|
|
@@ -138,8 +149,24 @@ class SaveToFileComponent(Component):
|
|
|
138
149
|
|
|
139
150
|
with file_path.open("rb") as f:
|
|
140
151
|
async for db in get_session():
|
|
141
|
-
|
|
142
|
-
|
|
152
|
+
# TODO: In 1.6, this may need to be removed or adjusted
|
|
153
|
+
# Try to get the super user token, if possible
|
|
154
|
+
current_user: User | None = None
|
|
155
|
+
if self.api_key:
|
|
156
|
+
current_user = await get_current_user(
|
|
157
|
+
token="",
|
|
158
|
+
query_param=self.api_key,
|
|
159
|
+
header_param="",
|
|
160
|
+
db=db,
|
|
161
|
+
)
|
|
162
|
+
else:
|
|
163
|
+
user_id, _ = await create_user_longterm_token(db)
|
|
164
|
+
current_user = await get_user_by_id(db, user_id)
|
|
165
|
+
|
|
166
|
+
# Fail if the user is not found
|
|
167
|
+
if not current_user:
|
|
168
|
+
msg = "User not found. Please provide a valid API key or ensure the user exists."
|
|
169
|
+
raise ValueError(msg)
|
|
143
170
|
|
|
144
171
|
await upload_user_file(
|
|
145
172
|
file=UploadFile(filename=file_path.name, file=f, size=file_path.stat().st_size),
|
|
@@ -702,7 +702,7 @@
|
|
|
702
702
|
"last_updated": "2025-08-13T19:45:49.122Z",
|
|
703
703
|
"legacy": false,
|
|
704
704
|
"metadata": {
|
|
705
|
-
"code_hash": "
|
|
705
|
+
"code_hash": "e1ebcd66ecbc",
|
|
706
706
|
"module": "langflow.components.data.kb_ingest.KBIngestionComponent"
|
|
707
707
|
},
|
|
708
708
|
"minimized": false,
|
|
@@ -795,7 +795,7 @@
|
|
|
795
795
|
"show": true,
|
|
796
796
|
"title_case": false,
|
|
797
797
|
"type": "code",
|
|
798
|
-
"value": "from __future__ import annotations\n\nimport hashlib\nimport json\nimport re\nimport uuid\nfrom dataclasses import asdict, dataclass, field\nfrom datetime import datetime, timezone\nfrom pathlib import Path\nfrom typing import Any\n\nimport pandas as pd\nfrom cryptography.fernet import InvalidToken\nfrom langchain_chroma import Chroma\nfrom loguru import logger\n\nfrom langflow.base.models.openai_constants import OPENAI_EMBEDDING_MODEL_NAMES\nfrom langflow.custom import Component\nfrom langflow.io import BoolInput, DataFrameInput, DropdownInput, IntInput, Output, SecretStrInput, StrInput, TableInput\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict # noqa: TC001\nfrom langflow.schema.table import EditMode\nfrom langflow.services.auth.utils import decrypt_api_key, encrypt_api_key\nfrom langflow.services.deps import get_settings_service\n\nHUGGINGFACE_MODEL_NAMES = [\"sentence-transformers/all-MiniLM-L6-v2\", \"sentence-transformers/all-mpnet-base-v2\"]\nCOHERE_MODEL_NAMES = [\"embed-english-v3.0\", \"embed-multilingual-v3.0\"]\n\nsettings = get_settings_service().settings\nknowledge_directory = settings.knowledge_bases_dir\nif not knowledge_directory:\n msg = \"Knowledge bases directory is not set in the settings.\"\n raise ValueError(msg)\nKNOWLEDGE_BASES_ROOT_PATH = Path(knowledge_directory).expanduser()\n\n\nclass KBIngestionComponent(Component):\n \"\"\"Create or append to Langflow Knowledge from a DataFrame.\"\"\"\n\n # ------ UI metadata ---------------------------------------------------\n display_name = \"Knowledge Ingestion\"\n description = \"Create or update knowledge in Langflow.\"\n icon = \"database\"\n name = \"KBIngestion\"\n\n @dataclass\n class NewKnowledgeBaseInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_knowledge_base\",\n \"description\": \"Create new knowledge in Langflow.\",\n \"display_name\": \"Create new knowledge\",\n \"field_order\": [\"01_new_kb_name\", \"02_embedding_model\", \"03_api_key\"],\n \"template\": {\n \"01_new_kb_name\": StrInput(\n name=\"new_kb_name\",\n display_name=\"Knowledge Name\",\n info=\"Name of the new knowledge to create.\",\n required=True,\n ),\n \"02_embedding_model\": DropdownInput(\n name=\"embedding_model\",\n display_name=\"Model Name\",\n info=\"Select the embedding model to use for this knowledge base.\",\n required=True,\n options=OPENAI_EMBEDDING_MODEL_NAMES + HUGGINGFACE_MODEL_NAMES + COHERE_MODEL_NAMES,\n options_metadata=[{\"icon\": \"OpenAI\"} for _ in OPENAI_EMBEDDING_MODEL_NAMES]\n + [{\"icon\": \"HuggingFace\"} for _ in HUGGINGFACE_MODEL_NAMES]\n + [{\"icon\": \"Cohere\"} for _ in COHERE_MODEL_NAMES],\n ),\n \"03_api_key\": SecretStrInput(\n name=\"api_key\",\n display_name=\"API Key\",\n info=\"Provider API key for embedding model\",\n required=True,\n load_from_db=True,\n ),\n },\n },\n }\n }\n )\n\n # ------ Inputs --------------------------------------------------------\n inputs = [\n DropdownInput(\n name=\"knowledge_base\",\n display_name=\"Knowledge\",\n info=\"Select the knowledge to load data from.\",\n required=True,\n options=[\n str(d.name) for d in KNOWLEDGE_BASES_ROOT_PATH.iterdir() if not d.name.startswith(\".\") and d.is_dir()\n ]\n if KNOWLEDGE_BASES_ROOT_PATH.exists()\n else [],\n refresh_button=True,\n dialog_inputs=asdict(NewKnowledgeBaseInput()),\n ),\n DataFrameInput(\n name=\"input_df\",\n display_name=\"Data\",\n info=\"Table with all original columns (already chunked / processed).\",\n required=True,\n ),\n TableInput(\n name=\"column_config\",\n display_name=\"Column Configuration\",\n info=\"Configure column behavior for the knowledge base.\",\n required=True,\n table_schema=[\n {\n \"name\": \"column_name\",\n \"display_name\": \"Column Name\",\n \"type\": \"str\",\n \"description\": \"Name of the column in the source DataFrame\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"vectorize\",\n \"display_name\": \"Vectorize\",\n \"type\": \"boolean\",\n \"description\": \"Create embeddings for this column\",\n \"default\": False,\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"identifier\",\n \"display_name\": \"Identifier\",\n \"type\": \"boolean\",\n \"description\": \"Use this column as unique identifier\",\n \"default\": False,\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n value=[\n {\n \"column_name\": \"text\",\n \"vectorize\": True,\n \"identifier\": False,\n }\n ],\n ),\n IntInput(\n name=\"chunk_size\",\n display_name=\"Chunk Size\",\n info=\"Batch size for processing embeddings\",\n advanced=True,\n value=1000,\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"Embedding Provider API Key\",\n info=\"API key for the embedding provider to generate embeddings.\",\n advanced=True,\n required=False,\n ),\n BoolInput(\n name=\"allow_duplicates\",\n display_name=\"Allow Duplicates\",\n info=\"Allow duplicate rows in the knowledge base\",\n advanced=True,\n value=False,\n ),\n ]\n\n # ------ Outputs -------------------------------------------------------\n outputs = [Output(display_name=\"DataFrame\", name=\"dataframe\", method=\"build_kb_info\")]\n\n # ------ Internal helpers ---------------------------------------------\n def _get_kb_root(self) -> Path:\n \"\"\"Return the root directory for knowledge bases.\"\"\"\n return KNOWLEDGE_BASES_ROOT_PATH\n\n def _validate_column_config(self, df_source: pd.DataFrame) -> list[dict[str, Any]]:\n \"\"\"Validate column configuration using Structured Output patterns.\"\"\"\n if not self.column_config:\n msg = \"Column configuration cannot be empty\"\n raise ValueError(msg)\n\n # Convert table input to list of dicts (similar to Structured Output)\n config_list = self.column_config if isinstance(self.column_config, list) else []\n\n # Validate column names exist in DataFrame\n df_columns = set(df_source.columns)\n for config in config_list:\n col_name = config.get(\"column_name\")\n if col_name not in df_columns and not self.silent_errors:\n msg = f\"Column '{col_name}' not found in DataFrame. Available columns: {sorted(df_columns)}\"\n self.log(f\"Warning: {msg}\")\n raise ValueError(msg)\n\n return config_list\n\n def _get_embedding_provider(self, embedding_model: str) -> str:\n \"\"\"Get embedding provider by matching model name to lists.\"\"\"\n if embedding_model in OPENAI_EMBEDDING_MODEL_NAMES:\n return \"OpenAI\"\n if embedding_model in HUGGINGFACE_MODEL_NAMES:\n return \"HuggingFace\"\n if embedding_model in COHERE_MODEL_NAMES:\n return \"Cohere\"\n return \"Custom\"\n\n def _build_embeddings(self, embedding_model: str, api_key: str):\n \"\"\"Build embedding model using provider patterns.\"\"\"\n # Get provider by matching model name to lists\n provider = self._get_embedding_provider(embedding_model)\n\n # Validate provider and model\n if provider == \"OpenAI\":\n from langchain_openai import OpenAIEmbeddings\n\n if not api_key:\n msg = \"OpenAI API key is required when using OpenAI provider\"\n raise ValueError(msg)\n return OpenAIEmbeddings(\n model=embedding_model,\n api_key=api_key,\n chunk_size=self.chunk_size,\n )\n if provider == \"HuggingFace\":\n from langchain_huggingface import HuggingFaceEmbeddings\n\n return HuggingFaceEmbeddings(\n model=embedding_model,\n )\n if provider == \"Cohere\":\n from langchain_cohere import CohereEmbeddings\n\n if not api_key:\n msg = \"Cohere API key is required when using Cohere provider\"\n raise ValueError(msg)\n return CohereEmbeddings(\n model=embedding_model,\n cohere_api_key=api_key,\n )\n if provider == \"Custom\":\n # For custom embedding models, we would need additional configuration\n msg = \"Custom embedding models not yet supported\"\n raise NotImplementedError(msg)\n msg = f\"Unknown provider: {provider}\"\n raise ValueError(msg)\n\n def _build_embedding_metadata(self, embedding_model, api_key) -> dict[str, Any]:\n \"\"\"Build embedding model metadata.\"\"\"\n # Get provider by matching model name to lists\n embedding_provider = self._get_embedding_provider(embedding_model)\n\n api_key_to_save = None\n if api_key and hasattr(api_key, \"get_secret_value\"):\n api_key_to_save = api_key.get_secret_value()\n elif isinstance(api_key, str):\n api_key_to_save = api_key\n\n encrypted_api_key = None\n if api_key_to_save:\n settings_service = get_settings_service()\n try:\n encrypted_api_key = encrypt_api_key(api_key_to_save, settings_service=settings_service)\n except (TypeError, ValueError) as e:\n self.log(f\"Could not encrypt API key: {e}\")\n logger.error(f\"Could not encrypt API key: {e}\")\n\n return {\n \"embedding_provider\": embedding_provider,\n \"embedding_model\": embedding_model,\n \"api_key\": encrypted_api_key,\n \"api_key_used\": bool(api_key),\n \"chunk_size\": self.chunk_size,\n \"created_at\": datetime.now(timezone.utc).isoformat(),\n }\n\n def _save_embedding_metadata(self, kb_path: Path, embedding_model: str, api_key: str) -> None:\n \"\"\"Save embedding model metadata.\"\"\"\n embedding_metadata = self._build_embedding_metadata(embedding_model, api_key)\n metadata_path = kb_path / \"embedding_metadata.json\"\n metadata_path.write_text(json.dumps(embedding_metadata, indent=2))\n\n def _save_kb_files(\n self,\n kb_path: Path,\n config_list: list[dict[str, Any]],\n ) -> None:\n \"\"\"Save KB files using File Component storage patterns.\"\"\"\n try:\n # Create directory (following File Component patterns)\n kb_path.mkdir(parents=True, exist_ok=True)\n\n # Save column configuration\n # Only do this if the file doesn't exist already\n cfg_path = kb_path / \"schema.json\"\n if not cfg_path.exists():\n cfg_path.write_text(json.dumps(config_list, indent=2))\n\n except Exception as e:\n if not self.silent_errors:\n raise\n self.log(f\"Error saving KB files: {e}\")\n\n def _build_column_metadata(self, config_list: list[dict[str, Any]], df_source: pd.DataFrame) -> dict[str, Any]:\n \"\"\"Build detailed column metadata.\"\"\"\n metadata: dict[str, Any] = {\n \"total_columns\": len(df_source.columns),\n \"mapped_columns\": len(config_list),\n \"unmapped_columns\": len(df_source.columns) - len(config_list),\n \"columns\": [],\n \"summary\": {\"vectorized_columns\": [], \"identifier_columns\": []},\n }\n\n for config in config_list:\n col_name = config.get(\"column_name\")\n vectorize = config.get(\"vectorize\") == \"True\" or config.get(\"vectorize\") is True\n identifier = config.get(\"identifier\") == \"True\" or config.get(\"identifier\") is True\n\n # Add to columns list\n metadata[\"columns\"].append(\n {\n \"name\": col_name,\n \"vectorize\": vectorize,\n \"identifier\": identifier,\n }\n )\n\n # Update summary\n if vectorize:\n metadata[\"summary\"][\"vectorized_columns\"].append(col_name)\n if identifier:\n metadata[\"summary\"][\"identifier_columns\"].append(col_name)\n\n return metadata\n\n def _create_vector_store(\n self, df_source: pd.DataFrame, config_list: list[dict[str, Any]], embedding_model: str, api_key: str\n ) -> None:\n \"\"\"Create vector store following Local DB component pattern.\"\"\"\n try:\n # Set up vector store directory\n base_dir = self._get_kb_root()\n\n vector_store_dir = base_dir / self.knowledge_base\n vector_store_dir.mkdir(parents=True, exist_ok=True)\n\n # Create embeddings model\n embedding_function = self._build_embeddings(embedding_model, api_key)\n\n # Convert DataFrame to Data objects (following Local DB pattern)\n data_objects = self._convert_df_to_data_objects(df_source, config_list)\n\n # Create vector store\n chroma = Chroma(\n persist_directory=str(vector_store_dir),\n embedding_function=embedding_function,\n collection_name=self.knowledge_base,\n )\n\n # Convert Data objects to LangChain Documents\n documents = []\n for data_obj in data_objects:\n doc = data_obj.to_lc_document()\n documents.append(doc)\n\n # Add documents to vector store\n if documents:\n chroma.add_documents(documents)\n self.log(f\"Added {len(documents)} documents to vector store '{self.knowledge_base}'\")\n\n except Exception as e:\n if not self.silent_errors:\n raise\n self.log(f\"Error creating vector store: {e}\")\n\n def _convert_df_to_data_objects(self, df_source: pd.DataFrame, config_list: list[dict[str, Any]]) -> list[Data]:\n \"\"\"Convert DataFrame to Data objects for vector store.\"\"\"\n data_objects: list[Data] = []\n\n # Set up vector store directory\n base_dir = self._get_kb_root()\n\n # If we don't allow duplicates, we need to get the existing hashes\n chroma = Chroma(\n persist_directory=str(base_dir / self.knowledge_base),\n collection_name=self.knowledge_base,\n )\n\n # Get all documents and their metadata\n all_docs = chroma.get()\n\n # Extract all _id values from metadata\n id_list = [metadata.get(\"_id\") for metadata in all_docs[\"metadatas\"] if metadata.get(\"_id\")]\n\n # Get column roles\n content_cols = []\n identifier_cols = []\n\n for config in config_list:\n col_name = config.get(\"column_name\")\n vectorize = config.get(\"vectorize\") == \"True\" or config.get(\"vectorize\") is True\n identifier = config.get(\"identifier\") == \"True\" or config.get(\"identifier\") is True\n\n if vectorize:\n content_cols.append(col_name)\n elif identifier:\n identifier_cols.append(col_name)\n\n # Convert each row to a Data object\n for _, row in df_source.iterrows():\n # Build content text from vectorized columns using list comprehension\n content_parts = [str(row[col]) for col in content_cols if col in row and pd.notna(row[col])]\n\n page_content = \" \".join(content_parts)\n\n # Build metadata from NON-vectorized columns only (simple key-value pairs)\n data_dict = {\n \"text\": page_content, # Main content for vectorization\n }\n\n # Add metadata columns as simple key-value pairs\n for col in df_source.columns:\n if col not in content_cols and col in row and pd.notna(row[col]):\n # Convert to simple types for Chroma metadata\n value = row[col]\n data_dict[col] = str(value) # Convert complex types to string\n\n # Hash the page_content for unique ID\n page_content_hash = hashlib.sha256(page_content.encode()).hexdigest()\n data_dict[\"_id\"] = page_content_hash\n\n # If duplicates are disallowed, and hash exists, prevent adding this row\n if not self.allow_duplicates and page_content_hash in id_list:\n self.log(f\"Skipping duplicate row with hash {page_content_hash}\")\n continue\n\n # Create Data object - everything except \"text\" becomes metadata\n data_obj = Data(data=data_dict)\n data_objects.append(data_obj)\n\n return data_objects\n\n def is_valid_collection_name(self, name, min_length: int = 3, max_length: int = 63) -> bool:\n \"\"\"Validates collection name against conditions 1-3.\n\n 1. Contains 3-63 characters\n 2. Starts and ends with alphanumeric character\n 3. Contains only alphanumeric characters, underscores, or hyphens.\n\n Args:\n name (str): Collection name to validate\n min_length (int): Minimum length of the name\n max_length (int): Maximum length of the name\n\n Returns:\n bool: True if valid, False otherwise\n \"\"\"\n # Check length (condition 1)\n if not (min_length <= len(name) <= max_length):\n return False\n\n # Check start/end with alphanumeric (condition 2)\n if not (name[0].isalnum() and name[-1].isalnum()):\n return False\n\n # Check allowed characters (condition 3)\n return re.match(r\"^[a-zA-Z0-9_-]+$\", name) is not None\n\n # ---------------------------------------------------------------------\n # OUTPUT METHODS\n # ---------------------------------------------------------------------\n def build_kb_info(self) -> Data:\n \"\"\"Main ingestion routine → returns a dict with KB metadata.\"\"\"\n try:\n # Get source DataFrame\n df_source: pd.DataFrame = self.input_df\n\n # Validate column configuration (using Structured Output patterns)\n config_list = self._validate_column_config(df_source)\n column_metadata = self._build_column_metadata(config_list, df_source)\n\n # Prepare KB folder (using File Component patterns)\n kb_root = self._get_kb_root()\n kb_path = kb_root / self.knowledge_base\n\n # Read the embedding info from the knowledge base folder\n metadata_path = kb_path / \"embedding_metadata.json\"\n\n # If the API key is not provided, try to read it from the metadata file\n if metadata_path.exists():\n settings_service = get_settings_service()\n metadata = json.loads(metadata_path.read_text())\n embedding_model = metadata.get(\"embedding_model\")\n try:\n api_key = decrypt_api_key(metadata[\"api_key\"], settings_service)\n except (InvalidToken, TypeError, ValueError) as e:\n logger.error(f\"Could not decrypt API key. Please provide it manually. Error: {e}\")\n\n # Check if a custom API key was provided, update metadata if so\n if self.api_key:\n api_key = self.api_key\n self._save_embedding_metadata(\n kb_path=kb_path,\n embedding_model=embedding_model,\n api_key=api_key,\n )\n\n # Create vector store following Local DB component pattern\n self._create_vector_store(df_source, config_list, embedding_model=embedding_model, api_key=api_key)\n\n # Save KB files (using File Component storage patterns)\n self._save_kb_files(kb_path, config_list)\n\n # Build metadata response\n meta: dict[str, Any] = {\n \"kb_id\": str(uuid.uuid4()),\n \"kb_name\": self.knowledge_base,\n \"rows\": len(df_source),\n \"column_metadata\": column_metadata,\n \"path\": str(kb_path),\n \"config_columns\": len(config_list),\n \"timestamp\": datetime.now(tz=timezone.utc).isoformat(),\n }\n\n # Set status message\n self.status = f\"✅ KB **{self.knowledge_base}** saved · {len(df_source)} chunks.\"\n\n return Data(data=meta)\n\n except Exception as e:\n if not self.silent_errors:\n raise\n self.log(f\"Error in KB ingestion: {e}\")\n self.status = f\"❌ KB ingestion failed: {e}\"\n return Data(data={\"error\": str(e), \"kb_name\": self.knowledge_base})\n\n def _get_knowledge_bases(self) -> list[str]:\n \"\"\"Retrieve a list of available knowledge bases.\n\n Returns:\n A list of knowledge base names.\n \"\"\"\n # Return the list of directories in the knowledge base root path\n kb_root_path = self._get_kb_root()\n\n if not kb_root_path.exists():\n return []\n\n return [str(d.name) for d in kb_root_path.iterdir() if not d.name.startswith(\".\") and d.is_dir()]\n\n def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None) -> dotdict:\n \"\"\"Update build configuration based on provider selection.\"\"\"\n # Create a new knowledge base\n if field_name == \"knowledge_base\":\n if isinstance(field_value, dict) and \"01_new_kb_name\" in field_value:\n # Validate the knowledge base name - Make sure it follows these rules:\n if not self.is_valid_collection_name(field_value[\"01_new_kb_name\"]):\n msg = f\"Invalid knowledge base name: {field_value['01_new_kb_name']}\"\n raise ValueError(msg)\n\n # We need to test the API Key one time against the embedding model\n embed_model = self._build_embeddings(\n embedding_model=field_value[\"02_embedding_model\"], api_key=field_value[\"03_api_key\"]\n )\n\n # Try to generate a dummy embedding to validate the API key\n embed_model.embed_query(\"test\")\n\n # Create the new knowledge base directory\n kb_path = KNOWLEDGE_BASES_ROOT_PATH / field_value[\"01_new_kb_name\"]\n kb_path.mkdir(parents=True, exist_ok=True)\n\n # Save the embedding metadata\n build_config[\"knowledge_base\"][\"value\"] = field_value[\"01_new_kb_name\"]\n self._save_embedding_metadata(\n kb_path=kb_path,\n embedding_model=field_value[\"02_embedding_model\"],\n api_key=field_value[\"03_api_key\"],\n )\n\n # Update the knowledge base options dynamically\n build_config[\"knowledge_base\"][\"options\"] = self._get_knowledge_bases()\n if build_config[\"knowledge_base\"][\"value\"] not in build_config[\"knowledge_base\"][\"options\"]:\n build_config[\"knowledge_base\"][\"value\"] = None\n\n return build_config\n"
|
|
798
|
+
"value": "from __future__ import annotations\n\nimport hashlib\nimport json\nimport re\nimport uuid\nfrom dataclasses import asdict, dataclass, field\nfrom datetime import datetime, timezone\nfrom pathlib import Path\nfrom typing import Any\n\nimport pandas as pd\nfrom cryptography.fernet import InvalidToken\nfrom langchain_chroma import Chroma\nfrom loguru import logger\n\nfrom langflow.base.models.openai_constants import OPENAI_EMBEDDING_MODEL_NAMES\nfrom langflow.custom import Component\nfrom langflow.io import BoolInput, DataFrameInput, DropdownInput, IntInput, Output, SecretStrInput, StrInput, TableInput\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict # noqa: TC001\nfrom langflow.schema.table import EditMode\nfrom langflow.services.auth.utils import decrypt_api_key, encrypt_api_key\nfrom langflow.services.deps import get_settings_service\n\nHUGGINGFACE_MODEL_NAMES = [\"sentence-transformers/all-MiniLM-L6-v2\", \"sentence-transformers/all-mpnet-base-v2\"]\nCOHERE_MODEL_NAMES = [\"embed-english-v3.0\", \"embed-multilingual-v3.0\"]\n\nsettings = get_settings_service().settings\nknowledge_directory = settings.knowledge_bases_dir\nif not knowledge_directory:\n msg = \"Knowledge bases directory is not set in the settings.\"\n raise ValueError(msg)\nKNOWLEDGE_BASES_ROOT_PATH = Path(knowledge_directory).expanduser()\n\n\nclass KBIngestionComponent(Component):\n \"\"\"Create or append to Langflow Knowledge from a DataFrame.\"\"\"\n\n # ------ UI metadata ---------------------------------------------------\n display_name = \"Knowledge Ingestion\"\n description = \"Create or update knowledge in Langflow.\"\n icon = \"database\"\n name = \"KBIngestion\"\n\n @dataclass\n class NewKnowledgeBaseInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_knowledge_base\",\n \"description\": \"Create new knowledge in Langflow.\",\n \"display_name\": \"Create new knowledge\",\n \"field_order\": [\"01_new_kb_name\", \"02_embedding_model\", \"03_api_key\"],\n \"template\": {\n \"01_new_kb_name\": StrInput(\n name=\"new_kb_name\",\n display_name=\"Knowledge Name\",\n info=\"Name of the new knowledge to create.\",\n required=True,\n ),\n \"02_embedding_model\": DropdownInput(\n name=\"embedding_model\",\n display_name=\"Model Name\",\n info=\"Select the embedding model to use for this knowledge base.\",\n required=True,\n options=OPENAI_EMBEDDING_MODEL_NAMES + HUGGINGFACE_MODEL_NAMES + COHERE_MODEL_NAMES,\n options_metadata=[{\"icon\": \"OpenAI\"} for _ in OPENAI_EMBEDDING_MODEL_NAMES]\n + [{\"icon\": \"HuggingFace\"} for _ in HUGGINGFACE_MODEL_NAMES]\n + [{\"icon\": \"Cohere\"} for _ in COHERE_MODEL_NAMES],\n ),\n \"03_api_key\": SecretStrInput(\n name=\"api_key\",\n display_name=\"API Key\",\n info=\"Provider API key for embedding model\",\n required=True,\n load_from_db=True,\n ),\n },\n },\n }\n }\n )\n\n # ------ Inputs --------------------------------------------------------\n inputs = [\n DropdownInput(\n name=\"knowledge_base\",\n display_name=\"Knowledge\",\n info=\"Select the knowledge to load data from.\",\n required=True,\n options=[\n str(d.name) for d in KNOWLEDGE_BASES_ROOT_PATH.iterdir() if not d.name.startswith(\".\") and d.is_dir()\n ]\n if KNOWLEDGE_BASES_ROOT_PATH.exists()\n else [],\n refresh_button=True,\n dialog_inputs=asdict(NewKnowledgeBaseInput()),\n ),\n DataFrameInput(\n name=\"input_df\",\n display_name=\"Data\",\n info=\"Table with all original columns (already chunked / processed).\",\n required=True,\n ),\n TableInput(\n name=\"column_config\",\n display_name=\"Column Configuration\",\n info=\"Configure column behavior for the knowledge base.\",\n required=True,\n table_schema=[\n {\n \"name\": \"column_name\",\n \"display_name\": \"Column Name\",\n \"type\": \"str\",\n \"description\": \"Name of the column in the source DataFrame\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"vectorize\",\n \"display_name\": \"Vectorize\",\n \"type\": \"boolean\",\n \"description\": \"Create embeddings for this column\",\n \"default\": False,\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"identifier\",\n \"display_name\": \"Identifier\",\n \"type\": \"boolean\",\n \"description\": \"Use this column as unique identifier\",\n \"default\": False,\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n value=[\n {\n \"column_name\": \"text\",\n \"vectorize\": True,\n \"identifier\": True,\n },\n ],\n ),\n IntInput(\n name=\"chunk_size\",\n display_name=\"Chunk Size\",\n info=\"Batch size for processing embeddings\",\n advanced=True,\n value=1000,\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"Embedding Provider API Key\",\n info=\"API key for the embedding provider to generate embeddings.\",\n advanced=True,\n required=False,\n ),\n BoolInput(\n name=\"allow_duplicates\",\n display_name=\"Allow Duplicates\",\n info=\"Allow duplicate rows in the knowledge base\",\n advanced=True,\n value=False,\n ),\n ]\n\n # ------ Outputs -------------------------------------------------------\n outputs = [Output(display_name=\"DataFrame\", name=\"dataframe\", method=\"build_kb_info\")]\n\n # ------ Internal helpers ---------------------------------------------\n def _get_kb_root(self) -> Path:\n \"\"\"Return the root directory for knowledge bases.\"\"\"\n return KNOWLEDGE_BASES_ROOT_PATH\n\n def _validate_column_config(self, df_source: pd.DataFrame) -> list[dict[str, Any]]:\n \"\"\"Validate column configuration using Structured Output patterns.\"\"\"\n if not self.column_config:\n msg = \"Column configuration cannot be empty\"\n raise ValueError(msg)\n\n # Convert table input to list of dicts (similar to Structured Output)\n config_list = self.column_config if isinstance(self.column_config, list) else []\n\n # Validate column names exist in DataFrame\n df_columns = set(df_source.columns)\n for config in config_list:\n col_name = config.get(\"column_name\")\n if col_name not in df_columns:\n msg = f\"Column '{col_name}' not found in DataFrame. Available columns: {sorted(df_columns)}\"\n raise ValueError(msg)\n\n return config_list\n\n def _get_embedding_provider(self, embedding_model: str) -> str:\n \"\"\"Get embedding provider by matching model name to lists.\"\"\"\n if embedding_model in OPENAI_EMBEDDING_MODEL_NAMES:\n return \"OpenAI\"\n if embedding_model in HUGGINGFACE_MODEL_NAMES:\n return \"HuggingFace\"\n if embedding_model in COHERE_MODEL_NAMES:\n return \"Cohere\"\n return \"Custom\"\n\n def _build_embeddings(self, embedding_model: str, api_key: str):\n \"\"\"Build embedding model using provider patterns.\"\"\"\n # Get provider by matching model name to lists\n provider = self._get_embedding_provider(embedding_model)\n\n # Validate provider and model\n if provider == \"OpenAI\":\n from langchain_openai import OpenAIEmbeddings\n\n if not api_key:\n msg = \"OpenAI API key is required when using OpenAI provider\"\n raise ValueError(msg)\n return OpenAIEmbeddings(\n model=embedding_model,\n api_key=api_key,\n chunk_size=self.chunk_size,\n )\n if provider == \"HuggingFace\":\n from langchain_huggingface import HuggingFaceEmbeddings\n\n return HuggingFaceEmbeddings(\n model=embedding_model,\n )\n if provider == \"Cohere\":\n from langchain_cohere import CohereEmbeddings\n\n if not api_key:\n msg = \"Cohere API key is required when using Cohere provider\"\n raise ValueError(msg)\n return CohereEmbeddings(\n model=embedding_model,\n cohere_api_key=api_key,\n )\n if provider == \"Custom\":\n # For custom embedding models, we would need additional configuration\n msg = \"Custom embedding models not yet supported\"\n raise NotImplementedError(msg)\n msg = f\"Unknown provider: {provider}\"\n raise ValueError(msg)\n\n def _build_embedding_metadata(self, embedding_model, api_key) -> dict[str, Any]:\n \"\"\"Build embedding model metadata.\"\"\"\n # Get provider by matching model name to lists\n embedding_provider = self._get_embedding_provider(embedding_model)\n\n api_key_to_save = None\n if api_key and hasattr(api_key, \"get_secret_value\"):\n api_key_to_save = api_key.get_secret_value()\n elif isinstance(api_key, str):\n api_key_to_save = api_key\n\n encrypted_api_key = None\n if api_key_to_save:\n settings_service = get_settings_service()\n try:\n encrypted_api_key = encrypt_api_key(api_key_to_save, settings_service=settings_service)\n except (TypeError, ValueError) as e:\n self.log(f\"Could not encrypt API key: {e}\")\n logger.error(f\"Could not encrypt API key: {e}\")\n\n return {\n \"embedding_provider\": embedding_provider,\n \"embedding_model\": embedding_model,\n \"api_key\": encrypted_api_key,\n \"api_key_used\": bool(api_key),\n \"chunk_size\": self.chunk_size,\n \"created_at\": datetime.now(timezone.utc).isoformat(),\n }\n\n def _save_embedding_metadata(self, kb_path: Path, embedding_model: str, api_key: str) -> None:\n \"\"\"Save embedding model metadata.\"\"\"\n embedding_metadata = self._build_embedding_metadata(embedding_model, api_key)\n metadata_path = kb_path / \"embedding_metadata.json\"\n metadata_path.write_text(json.dumps(embedding_metadata, indent=2))\n\n def _save_kb_files(\n self,\n kb_path: Path,\n config_list: list[dict[str, Any]],\n ) -> None:\n \"\"\"Save KB files using File Component storage patterns.\"\"\"\n try:\n # Create directory (following File Component patterns)\n kb_path.mkdir(parents=True, exist_ok=True)\n\n # Save column configuration\n # Only do this if the file doesn't exist already\n cfg_path = kb_path / \"schema.json\"\n if not cfg_path.exists():\n cfg_path.write_text(json.dumps(config_list, indent=2))\n\n except (OSError, TypeError, ValueError) as e:\n self.log(f\"Error saving KB files: {e}\")\n\n def _build_column_metadata(self, config_list: list[dict[str, Any]], df_source: pd.DataFrame) -> dict[str, Any]:\n \"\"\"Build detailed column metadata.\"\"\"\n metadata: dict[str, Any] = {\n \"total_columns\": len(df_source.columns),\n \"mapped_columns\": len(config_list),\n \"unmapped_columns\": len(df_source.columns) - len(config_list),\n \"columns\": [],\n \"summary\": {\"vectorized_columns\": [], \"identifier_columns\": []},\n }\n\n for config in config_list:\n col_name = config.get(\"column_name\")\n vectorize = config.get(\"vectorize\") == \"True\" or config.get(\"vectorize\") is True\n identifier = config.get(\"identifier\") == \"True\" or config.get(\"identifier\") is True\n\n # Add to columns list\n metadata[\"columns\"].append(\n {\n \"name\": col_name,\n \"vectorize\": vectorize,\n \"identifier\": identifier,\n }\n )\n\n # Update summary\n if vectorize:\n metadata[\"summary\"][\"vectorized_columns\"].append(col_name)\n if identifier:\n metadata[\"summary\"][\"identifier_columns\"].append(col_name)\n\n return metadata\n\n def _create_vector_store(\n self, df_source: pd.DataFrame, config_list: list[dict[str, Any]], embedding_model: str, api_key: str\n ) -> None:\n \"\"\"Create vector store following Local DB component pattern.\"\"\"\n try:\n # Set up vector store directory\n base_dir = self._get_kb_root()\n\n vector_store_dir = base_dir / self.knowledge_base\n vector_store_dir.mkdir(parents=True, exist_ok=True)\n\n # Create embeddings model\n embedding_function = self._build_embeddings(embedding_model, api_key)\n\n # Convert DataFrame to Data objects (following Local DB pattern)\n data_objects = self._convert_df_to_data_objects(df_source, config_list)\n\n # Create vector store\n chroma = Chroma(\n persist_directory=str(vector_store_dir),\n embedding_function=embedding_function,\n collection_name=self.knowledge_base,\n )\n\n # Convert Data objects to LangChain Documents\n documents = []\n for data_obj in data_objects:\n doc = data_obj.to_lc_document()\n documents.append(doc)\n\n # Add documents to vector store\n if documents:\n chroma.add_documents(documents)\n self.log(f\"Added {len(documents)} documents to vector store '{self.knowledge_base}'\")\n\n except (OSError, ValueError, RuntimeError) as e:\n self.log(f\"Error creating vector store: {e}\")\n\n def _convert_df_to_data_objects(self, df_source: pd.DataFrame, config_list: list[dict[str, Any]]) -> list[Data]:\n \"\"\"Convert DataFrame to Data objects for vector store.\"\"\"\n data_objects: list[Data] = []\n\n # Set up vector store directory\n base_dir = self._get_kb_root()\n\n # If we don't allow duplicates, we need to get the existing hashes\n chroma = Chroma(\n persist_directory=str(base_dir / self.knowledge_base),\n collection_name=self.knowledge_base,\n )\n\n # Get all documents and their metadata\n all_docs = chroma.get()\n\n # Extract all _id values from metadata\n id_list = [metadata.get(\"_id\") for metadata in all_docs[\"metadatas\"] if metadata.get(\"_id\")]\n\n # Get column roles\n content_cols = []\n identifier_cols = []\n\n for config in config_list:\n col_name = config.get(\"column_name\")\n vectorize = config.get(\"vectorize\") == \"True\" or config.get(\"vectorize\") is True\n identifier = config.get(\"identifier\") == \"True\" or config.get(\"identifier\") is True\n\n if vectorize:\n content_cols.append(col_name)\n elif identifier:\n identifier_cols.append(col_name)\n\n # Convert each row to a Data object\n for _, row in df_source.iterrows():\n # Build content text from identifier columns using list comprehension\n identifier_parts = [str(row[col]) for col in content_cols if col in row and pd.notna(row[col])]\n\n # Join all parts into a single string\n page_content = \" \".join(identifier_parts)\n\n # Build metadata from NON-vectorized columns only (simple key-value pairs)\n data_dict = {\n \"text\": page_content, # Main content for vectorization\n }\n\n # Add identifier columns if they exist\n if identifier_cols:\n identifier_parts = [str(row[col]) for col in identifier_cols if col in row and pd.notna(row[col])]\n page_content = \" \".join(identifier_parts)\n\n # Add metadata columns as simple key-value pairs\n for col in df_source.columns:\n if col not in content_cols and col in row and pd.notna(row[col]):\n # Convert to simple types for Chroma metadata\n value = row[col]\n data_dict[col] = str(value) # Convert complex types to string\n\n # Hash the page_content for unique ID\n page_content_hash = hashlib.sha256(page_content.encode()).hexdigest()\n data_dict[\"_id\"] = page_content_hash\n\n # If duplicates are disallowed, and hash exists, prevent adding this row\n if not self.allow_duplicates and page_content_hash in id_list:\n self.log(f\"Skipping duplicate row with hash {page_content_hash}\")\n continue\n\n # Create Data object - everything except \"text\" becomes metadata\n data_obj = Data(data=data_dict)\n data_objects.append(data_obj)\n\n return data_objects\n\n def is_valid_collection_name(self, name, min_length: int = 3, max_length: int = 63) -> bool:\n \"\"\"Validates collection name against conditions 1-3.\n\n 1. Contains 3-63 characters\n 2. Starts and ends with alphanumeric character\n 3. Contains only alphanumeric characters, underscores, or hyphens.\n\n Args:\n name (str): Collection name to validate\n min_length (int): Minimum length of the name\n max_length (int): Maximum length of the name\n\n Returns:\n bool: True if valid, False otherwise\n \"\"\"\n # Check length (condition 1)\n if not (min_length <= len(name) <= max_length):\n return False\n\n # Check start/end with alphanumeric (condition 2)\n if not (name[0].isalnum() and name[-1].isalnum()):\n return False\n\n # Check allowed characters (condition 3)\n return re.match(r\"^[a-zA-Z0-9_-]+$\", name) is not None\n\n # ---------------------------------------------------------------------\n # OUTPUT METHODS\n # ---------------------------------------------------------------------\n def build_kb_info(self) -> Data:\n \"\"\"Main ingestion routine → returns a dict with KB metadata.\"\"\"\n try:\n # Get source DataFrame\n df_source: pd.DataFrame = self.input_df\n\n # Validate column configuration (using Structured Output patterns)\n config_list = self._validate_column_config(df_source)\n column_metadata = self._build_column_metadata(config_list, df_source)\n\n # Prepare KB folder (using File Component patterns)\n kb_root = self._get_kb_root()\n kb_path = kb_root / self.knowledge_base\n\n # Read the embedding info from the knowledge base folder\n metadata_path = kb_path / \"embedding_metadata.json\"\n\n # If the API key is not provided, try to read it from the metadata file\n if metadata_path.exists():\n settings_service = get_settings_service()\n metadata = json.loads(metadata_path.read_text())\n embedding_model = metadata.get(\"embedding_model\")\n try:\n api_key = decrypt_api_key(metadata[\"api_key\"], settings_service)\n except (InvalidToken, TypeError, ValueError) as e:\n logger.error(f\"Could not decrypt API key. Please provide it manually. Error: {e}\")\n\n # Check if a custom API key was provided, update metadata if so\n if self.api_key:\n api_key = self.api_key\n self._save_embedding_metadata(\n kb_path=kb_path,\n embedding_model=embedding_model,\n api_key=api_key,\n )\n\n # Create vector store following Local DB component pattern\n self._create_vector_store(df_source, config_list, embedding_model=embedding_model, api_key=api_key)\n\n # Save KB files (using File Component storage patterns)\n self._save_kb_files(kb_path, config_list)\n\n # Build metadata response\n meta: dict[str, Any] = {\n \"kb_id\": str(uuid.uuid4()),\n \"kb_name\": self.knowledge_base,\n \"rows\": len(df_source),\n \"column_metadata\": column_metadata,\n \"path\": str(kb_path),\n \"config_columns\": len(config_list),\n \"timestamp\": datetime.now(tz=timezone.utc).isoformat(),\n }\n\n # Set status message\n self.status = f\"✅ KB **{self.knowledge_base}** saved · {len(df_source)} chunks.\"\n\n return Data(data=meta)\n\n except (OSError, ValueError, RuntimeError, KeyError) as e:\n self.log(f\"Error in KB ingestion: {e}\")\n self.status = f\"❌ KB ingestion failed: {e}\"\n return Data(data={\"error\": str(e), \"kb_name\": self.knowledge_base})\n\n def _get_knowledge_bases(self) -> list[str]:\n \"\"\"Retrieve a list of available knowledge bases.\n\n Returns:\n A list of knowledge base names.\n \"\"\"\n # Return the list of directories in the knowledge base root path\n kb_root_path = self._get_kb_root()\n\n if not kb_root_path.exists():\n return []\n\n return [str(d.name) for d in kb_root_path.iterdir() if not d.name.startswith(\".\") and d.is_dir()]\n\n def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None) -> dotdict:\n \"\"\"Update build configuration based on provider selection.\"\"\"\n # Create a new knowledge base\n if field_name == \"knowledge_base\":\n if isinstance(field_value, dict) and \"01_new_kb_name\" in field_value:\n # Validate the knowledge base name - Make sure it follows these rules:\n if not self.is_valid_collection_name(field_value[\"01_new_kb_name\"]):\n msg = f\"Invalid knowledge base name: {field_value['01_new_kb_name']}\"\n raise ValueError(msg)\n\n # We need to test the API Key one time against the embedding model\n embed_model = self._build_embeddings(\n embedding_model=field_value[\"02_embedding_model\"], api_key=field_value[\"03_api_key\"]\n )\n\n # Try to generate a dummy embedding to validate the API key\n embed_model.embed_query(\"test\")\n\n # Create the new knowledge base directory\n kb_path = KNOWLEDGE_BASES_ROOT_PATH / field_value[\"01_new_kb_name\"]\n kb_path.mkdir(parents=True, exist_ok=True)\n\n # Save the embedding metadata\n build_config[\"knowledge_base\"][\"value\"] = field_value[\"01_new_kb_name\"]\n self._save_embedding_metadata(\n kb_path=kb_path,\n embedding_model=field_value[\"02_embedding_model\"],\n api_key=field_value[\"03_api_key\"],\n )\n\n # Update the knowledge base options dynamically\n build_config[\"knowledge_base\"][\"options\"] = self._get_knowledge_bases()\n if build_config[\"knowledge_base\"][\"value\"] not in build_config[\"knowledge_base\"][\"options\"]:\n build_config[\"knowledge_base\"][\"value\"] = None\n\n return build_config\n"
|
|
799
799
|
},
|
|
800
800
|
"column_config": {
|
|
801
801
|
"_input_type": "TableInput",
|
|
@@ -1208,7 +1208,7 @@
|
|
|
1208
1208
|
"legacy": false,
|
|
1209
1209
|
"lf_version": "1.4.3",
|
|
1210
1210
|
"metadata": {
|
|
1211
|
-
"code_hash": "
|
|
1211
|
+
"code_hash": "d9af728ce02a",
|
|
1212
1212
|
"module": "langflow.components.processing.save_file.SaveToFileComponent"
|
|
1213
1213
|
},
|
|
1214
1214
|
"minimized": false,
|
|
@@ -1232,6 +1232,23 @@
|
|
|
1232
1232
|
"pinned": false,
|
|
1233
1233
|
"template": {
|
|
1234
1234
|
"_type": "Component",
|
|
1235
|
+
"api_key": {
|
|
1236
|
+
"_input_type": "SecretStrInput",
|
|
1237
|
+
"advanced": true,
|
|
1238
|
+
"display_name": "Langflow API Key",
|
|
1239
|
+
"dynamic": false,
|
|
1240
|
+
"info": "Langflow API key for authentication when saving the file.",
|
|
1241
|
+
"input_types": [],
|
|
1242
|
+
"load_from_db": true,
|
|
1243
|
+
"name": "api_key",
|
|
1244
|
+
"password": true,
|
|
1245
|
+
"placeholder": "",
|
|
1246
|
+
"required": false,
|
|
1247
|
+
"show": true,
|
|
1248
|
+
"title_case": false,
|
|
1249
|
+
"type": "str",
|
|
1250
|
+
"value": ""
|
|
1251
|
+
},
|
|
1235
1252
|
"code": {
|
|
1236
1253
|
"advanced": true,
|
|
1237
1254
|
"dynamic": true,
|
|
@@ -1248,7 +1265,7 @@
|
|
|
1248
1265
|
"show": true,
|
|
1249
1266
|
"title_case": false,
|
|
1250
1267
|
"type": "code",
|
|
1251
|
-
"value": "import json\nfrom collections.abc import AsyncIterator, Iterator\nfrom pathlib import Path\n\nimport orjson\nimport pandas as pd\nfrom fastapi import UploadFile\nfrom fastapi.encoders import jsonable_encoder\n\nfrom langflow.api.v2.files import upload_user_file\nfrom langflow.custom import Component\nfrom langflow.io import DropdownInput, HandleInput, StrInput\nfrom langflow.schema import Data, DataFrame, Message\nfrom langflow.services.auth.utils import create_user_longterm_token\nfrom langflow.services.database.models.user.crud import get_user_by_id\nfrom langflow.services.deps import get_session, get_settings_service, get_storage_service\nfrom langflow.template.field.base import Output\n\n\nclass SaveToFileComponent(Component):\n display_name = \"Save File\"\n description = \"Save data to a local file in the selected format.\"\n documentation: str = \"https://docs.langflow.org/components-processing#save-file\"\n icon = \"save\"\n name = \"SaveToFile\"\n\n # File format options for different types\n DATA_FORMAT_CHOICES = [\"csv\", \"excel\", \"json\", \"markdown\"]\n MESSAGE_FORMAT_CHOICES = [\"txt\", \"json\", \"markdown\"]\n\n inputs = [\n HandleInput(\n name=\"input\",\n display_name=\"Input\",\n info=\"The input to save.\",\n dynamic=True,\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n StrInput(\n name=\"file_name\",\n display_name=\"File Name\",\n info=\"Name file will be saved as (without extension).\",\n required=True,\n ),\n DropdownInput(\n name=\"file_format\",\n display_name=\"File Format\",\n options=list(dict.fromkeys(DATA_FORMAT_CHOICES + MESSAGE_FORMAT_CHOICES)),\n info=\"Select the file format to save the input. If not provided, the default format will be used.\",\n value=\"\",\n advanced=True,\n ),\n ]\n\n outputs = [Output(display_name=\"File Path\", name=\"message\", method=\"save_to_file\")]\n\n async def save_to_file(self) -> Message:\n \"\"\"Save the input to a file and upload it, returning a confirmation message.\"\"\"\n # Validate inputs\n if not self.file_name:\n msg = \"File name must be provided.\"\n raise ValueError(msg)\n if not self._get_input_type():\n msg = \"Input type is not set.\"\n raise ValueError(msg)\n\n # Validate file format based on input type\n file_format = self.file_format or self._get_default_format()\n allowed_formats = (\n self.MESSAGE_FORMAT_CHOICES if self._get_input_type() == \"Message\" else self.DATA_FORMAT_CHOICES\n )\n if file_format not in allowed_formats:\n msg = f\"Invalid file format '{file_format}' for {self._get_input_type()}. Allowed: {allowed_formats}\"\n raise ValueError(msg)\n\n # Prepare file path\n file_path = Path(self.file_name).expanduser()\n if not file_path.parent.exists():\n file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path = self._adjust_file_path_with_format(file_path, file_format)\n\n # Save the input to file based on type\n if self._get_input_type() == \"DataFrame\":\n confirmation = self._save_dataframe(self.input, file_path, file_format)\n elif self._get_input_type() == \"Data\":\n confirmation = self._save_data(self.input, file_path, file_format)\n elif self._get_input_type() == \"Message\":\n confirmation = await self._save_message(self.input, file_path, file_format)\n else:\n msg = f\"Unsupported input type: {self._get_input_type()}\"\n raise ValueError(msg)\n\n # Upload the saved file\n await self._upload_file(file_path)\n\n # Return the final file path and confirmation message\n final_path = Path.cwd() / file_path if not file_path.is_absolute() else file_path\n\n return Message(text=f\"{confirmation} at {final_path}\")\n\n def _get_input_type(self) -> str:\n \"\"\"Determine the input type based on the provided input.\"\"\"\n # Use exact type checking (type() is) instead of isinstance() to avoid inheritance issues.\n # Since Message inherits from Data, isinstance(message, Data) would return True for Message objects,\n # causing Message inputs to be incorrectly identified as Data type.\n if type(self.input) is DataFrame:\n return \"DataFrame\"\n if type(self.input) is Message:\n return \"Message\"\n if type(self.input) is Data:\n return \"Data\"\n msg = f\"Unsupported input type: {type(self.input)}\"\n raise ValueError(msg)\n\n def _get_default_format(self) -> str:\n \"\"\"Return the default file format based on input type.\"\"\"\n if self._get_input_type() == \"DataFrame\":\n return \"csv\"\n if self._get_input_type() == \"Data\":\n return \"json\"\n if self._get_input_type() == \"Message\":\n return \"json\"\n return \"json\" # Fallback\n\n def _adjust_file_path_with_format(self, path: Path, fmt: str) -> Path:\n \"\"\"Adjust the file path to include the correct extension.\"\"\"\n file_extension = path.suffix.lower().lstrip(\".\")\n if fmt == \"excel\":\n return Path(f\"{path}.xlsx\").expanduser() if file_extension not in [\"xlsx\", \"xls\"] else path\n return Path(f\"{path}.{fmt}\").expanduser() if file_extension != fmt else path\n\n async def _upload_file(self, file_path: Path) -> None:\n \"\"\"Upload the saved file using the upload_user_file service.\"\"\"\n if not file_path.exists():\n msg = f\"File not found: {file_path}\"\n raise FileNotFoundError(msg)\n\n with file_path.open(\"rb\") as f:\n async for db in get_session():\n user_id, _ = await create_user_longterm_token(db)\n current_user = await get_user_by_id(db, user_id)\n\n await upload_user_file(\n file=UploadFile(filename=file_path.name, file=f, size=file_path.stat().st_size),\n session=db,\n current_user=current_user,\n storage_service=get_storage_service(),\n settings_service=get_settings_service(),\n )\n\n def _save_dataframe(self, dataframe: DataFrame, path: Path, fmt: str) -> str:\n \"\"\"Save a DataFrame to the specified file format.\"\"\"\n if fmt == \"csv\":\n dataframe.to_csv(path, index=False)\n elif fmt == \"excel\":\n dataframe.to_excel(path, index=False, engine=\"openpyxl\")\n elif fmt == \"json\":\n dataframe.to_json(path, orient=\"records\", indent=2)\n elif fmt == \"markdown\":\n path.write_text(dataframe.to_markdown(index=False), encoding=\"utf-8\")\n else:\n msg = f\"Unsupported DataFrame format: {fmt}\"\n raise ValueError(msg)\n return f\"DataFrame saved successfully as '{path}'\"\n\n def _save_data(self, data: Data, path: Path, fmt: str) -> str:\n \"\"\"Save a Data object to the specified file format.\"\"\"\n if fmt == \"csv\":\n pd.DataFrame(data.data).to_csv(path, index=False)\n elif fmt == \"excel\":\n pd.DataFrame(data.data).to_excel(path, index=False, engine=\"openpyxl\")\n elif fmt == \"json\":\n path.write_text(\n orjson.dumps(jsonable_encoder(data.data), option=orjson.OPT_INDENT_2).decode(\"utf-8\"), encoding=\"utf-8\"\n )\n elif fmt == \"markdown\":\n path.write_text(pd.DataFrame(data.data).to_markdown(index=False), encoding=\"utf-8\")\n else:\n msg = f\"Unsupported Data format: {fmt}\"\n raise ValueError(msg)\n return f\"Data saved successfully as '{path}'\"\n\n async def _save_message(self, message: Message, path: Path, fmt: str) -> str:\n \"\"\"Save a Message to the specified file format, handling async iterators.\"\"\"\n content = \"\"\n if message.text is None:\n content = \"\"\n elif isinstance(message.text, AsyncIterator):\n async for item in message.text:\n content += str(item) + \" \"\n content = content.strip()\n elif isinstance(message.text, Iterator):\n content = \" \".join(str(item) for item in message.text)\n else:\n content = str(message.text)\n\n if fmt == \"txt\":\n path.write_text(content, encoding=\"utf-8\")\n elif fmt == \"json\":\n path.write_text(json.dumps({\"message\": content}, indent=2), encoding=\"utf-8\")\n elif fmt == \"markdown\":\n path.write_text(f\"**Message:**\\n\\n{content}\", encoding=\"utf-8\")\n else:\n msg = f\"Unsupported Message format: {fmt}\"\n raise ValueError(msg)\n return f\"Message saved successfully as '{path}'\"\n"
|
|
1268
|
+
"value": "import json\nfrom collections.abc import AsyncIterator, Iterator\nfrom pathlib import Path\nfrom typing import TYPE_CHECKING\n\nimport orjson\nimport pandas as pd\nfrom fastapi import UploadFile\nfrom fastapi.encoders import jsonable_encoder\n\nfrom langflow.api.v2.files import upload_user_file\nfrom langflow.custom import Component\nfrom langflow.io import DropdownInput, HandleInput, SecretStrInput, StrInput\nfrom langflow.schema import Data, DataFrame, Message\nfrom langflow.services.auth.utils import create_user_longterm_token, get_current_user\nfrom langflow.services.database.models.user.crud import get_user_by_id\nfrom langflow.services.deps import get_session, get_settings_service, get_storage_service\nfrom langflow.template.field.base import Output\n\nif TYPE_CHECKING:\n from langflow.services.database.models.user.model import User\n\n\nclass SaveToFileComponent(Component):\n display_name = \"Save File\"\n description = \"Save data to a local file in the selected format.\"\n documentation: str = \"https://docs.langflow.org/components-processing#save-file\"\n icon = \"save\"\n name = \"SaveToFile\"\n\n # File format options for different types\n DATA_FORMAT_CHOICES = [\"csv\", \"excel\", \"json\", \"markdown\"]\n MESSAGE_FORMAT_CHOICES = [\"txt\", \"json\", \"markdown\"]\n\n inputs = [\n HandleInput(\n name=\"input\",\n display_name=\"Input\",\n info=\"The input to save.\",\n dynamic=True,\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n StrInput(\n name=\"file_name\",\n display_name=\"File Name\",\n info=\"Name file will be saved as (without extension).\",\n required=True,\n ),\n DropdownInput(\n name=\"file_format\",\n display_name=\"File Format\",\n options=list(dict.fromkeys(DATA_FORMAT_CHOICES + MESSAGE_FORMAT_CHOICES)),\n info=\"Select the file format to save the input. If not provided, the default format will be used.\",\n value=\"\",\n advanced=True,\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"Langflow API Key\",\n info=\"Langflow API key for authentication when saving the file.\",\n required=False,\n advanced=True,\n ),\n ]\n\n outputs = [Output(display_name=\"File Path\", name=\"message\", method=\"save_to_file\")]\n\n async def save_to_file(self) -> Message:\n \"\"\"Save the input to a file and upload it, returning a confirmation message.\"\"\"\n # Validate inputs\n if not self.file_name:\n msg = \"File name must be provided.\"\n raise ValueError(msg)\n if not self._get_input_type():\n msg = \"Input type is not set.\"\n raise ValueError(msg)\n\n # Validate file format based on input type\n file_format = self.file_format or self._get_default_format()\n allowed_formats = (\n self.MESSAGE_FORMAT_CHOICES if self._get_input_type() == \"Message\" else self.DATA_FORMAT_CHOICES\n )\n if file_format not in allowed_formats:\n msg = f\"Invalid file format '{file_format}' for {self._get_input_type()}. Allowed: {allowed_formats}\"\n raise ValueError(msg)\n\n # Prepare file path\n file_path = Path(self.file_name).expanduser()\n if not file_path.parent.exists():\n file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path = self._adjust_file_path_with_format(file_path, file_format)\n\n # Save the input to file based on type\n if self._get_input_type() == \"DataFrame\":\n confirmation = self._save_dataframe(self.input, file_path, file_format)\n elif self._get_input_type() == \"Data\":\n confirmation = self._save_data(self.input, file_path, file_format)\n elif self._get_input_type() == \"Message\":\n confirmation = await self._save_message(self.input, file_path, file_format)\n else:\n msg = f\"Unsupported input type: {self._get_input_type()}\"\n raise ValueError(msg)\n\n # Upload the saved file\n await self._upload_file(file_path)\n\n # Return the final file path and confirmation message\n final_path = Path.cwd() / file_path if not file_path.is_absolute() else file_path\n\n return Message(text=f\"{confirmation} at {final_path}\")\n\n def _get_input_type(self) -> str:\n \"\"\"Determine the input type based on the provided input.\"\"\"\n # Use exact type checking (type() is) instead of isinstance() to avoid inheritance issues.\n # Since Message inherits from Data, isinstance(message, Data) would return True for Message objects,\n # causing Message inputs to be incorrectly identified as Data type.\n if type(self.input) is DataFrame:\n return \"DataFrame\"\n if type(self.input) is Message:\n return \"Message\"\n if type(self.input) is Data:\n return \"Data\"\n msg = f\"Unsupported input type: {type(self.input)}\"\n raise ValueError(msg)\n\n def _get_default_format(self) -> str:\n \"\"\"Return the default file format based on input type.\"\"\"\n if self._get_input_type() == \"DataFrame\":\n return \"csv\"\n if self._get_input_type() == \"Data\":\n return \"json\"\n if self._get_input_type() == \"Message\":\n return \"json\"\n return \"json\" # Fallback\n\n def _adjust_file_path_with_format(self, path: Path, fmt: str) -> Path:\n \"\"\"Adjust the file path to include the correct extension.\"\"\"\n file_extension = path.suffix.lower().lstrip(\".\")\n if fmt == \"excel\":\n return Path(f\"{path}.xlsx\").expanduser() if file_extension not in [\"xlsx\", \"xls\"] else path\n return Path(f\"{path}.{fmt}\").expanduser() if file_extension != fmt else path\n\n async def _upload_file(self, file_path: Path) -> None:\n \"\"\"Upload the saved file using the upload_user_file service.\"\"\"\n if not file_path.exists():\n msg = f\"File not found: {file_path}\"\n raise FileNotFoundError(msg)\n\n with file_path.open(\"rb\") as f:\n async for db in get_session():\n # TODO: In 1.6, this may need to be removed or adjusted\n # Try to get the super user token, if possible\n current_user: User | None = None\n if self.api_key:\n current_user = await get_current_user(\n token=\"\",\n query_param=self.api_key,\n header_param=\"\",\n db=db,\n )\n else:\n user_id, _ = await create_user_longterm_token(db)\n current_user = await get_user_by_id(db, user_id)\n\n # Fail if the user is not found\n if not current_user:\n msg = \"User not found. Please provide a valid API key or ensure the user exists.\"\n raise ValueError(msg)\n\n await upload_user_file(\n file=UploadFile(filename=file_path.name, file=f, size=file_path.stat().st_size),\n session=db,\n current_user=current_user,\n storage_service=get_storage_service(),\n settings_service=get_settings_service(),\n )\n\n def _save_dataframe(self, dataframe: DataFrame, path: Path, fmt: str) -> str:\n \"\"\"Save a DataFrame to the specified file format.\"\"\"\n if fmt == \"csv\":\n dataframe.to_csv(path, index=False)\n elif fmt == \"excel\":\n dataframe.to_excel(path, index=False, engine=\"openpyxl\")\n elif fmt == \"json\":\n dataframe.to_json(path, orient=\"records\", indent=2)\n elif fmt == \"markdown\":\n path.write_text(dataframe.to_markdown(index=False), encoding=\"utf-8\")\n else:\n msg = f\"Unsupported DataFrame format: {fmt}\"\n raise ValueError(msg)\n return f\"DataFrame saved successfully as '{path}'\"\n\n def _save_data(self, data: Data, path: Path, fmt: str) -> str:\n \"\"\"Save a Data object to the specified file format.\"\"\"\n if fmt == \"csv\":\n pd.DataFrame(data.data).to_csv(path, index=False)\n elif fmt == \"excel\":\n pd.DataFrame(data.data).to_excel(path, index=False, engine=\"openpyxl\")\n elif fmt == \"json\":\n path.write_text(\n orjson.dumps(jsonable_encoder(data.data), option=orjson.OPT_INDENT_2).decode(\"utf-8\"), encoding=\"utf-8\"\n )\n elif fmt == \"markdown\":\n path.write_text(pd.DataFrame(data.data).to_markdown(index=False), encoding=\"utf-8\")\n else:\n msg = f\"Unsupported Data format: {fmt}\"\n raise ValueError(msg)\n return f\"Data saved successfully as '{path}'\"\n\n async def _save_message(self, message: Message, path: Path, fmt: str) -> str:\n \"\"\"Save a Message to the specified file format, handling async iterators.\"\"\"\n content = \"\"\n if message.text is None:\n content = \"\"\n elif isinstance(message.text, AsyncIterator):\n async for item in message.text:\n content += str(item) + \" \"\n content = content.strip()\n elif isinstance(message.text, Iterator):\n content = \" \".join(str(item) for item in message.text)\n else:\n content = str(message.text)\n\n if fmt == \"txt\":\n path.write_text(content, encoding=\"utf-8\")\n elif fmt == \"json\":\n path.write_text(json.dumps({\"message\": content}, indent=2), encoding=\"utf-8\")\n elif fmt == \"markdown\":\n path.write_text(f\"**Message:**\\n\\n{content}\", encoding=\"utf-8\")\n else:\n msg = f\"Unsupported Message format: {fmt}\"\n raise ValueError(msg)\n return f\"Message saved successfully as '{path}'\"\n"
|
|
1252
1269
|
},
|
|
1253
1270
|
"file_format": {
|
|
1254
1271
|
"_input_type": "DropdownInput",
|
|
@@ -2518,7 +2518,7 @@
|
|
|
2518
2518
|
"legacy": false,
|
|
2519
2519
|
"lf_version": "1.4.2",
|
|
2520
2520
|
"metadata": {
|
|
2521
|
-
"code_hash": "
|
|
2521
|
+
"code_hash": "6839fa3cae99",
|
|
2522
2522
|
"module": "langflow.components.agents.mcp_component.MCPToolsComponent"
|
|
2523
2523
|
},
|
|
2524
2524
|
"minimized": false,
|
|
@@ -2545,6 +2545,23 @@
|
|
|
2545
2545
|
"score": 0.003932426697386162,
|
|
2546
2546
|
"template": {
|
|
2547
2547
|
"_type": "Component",
|
|
2548
|
+
"api_key": {
|
|
2549
|
+
"_input_type": "SecretStrInput",
|
|
2550
|
+
"advanced": true,
|
|
2551
|
+
"display_name": "Langflow API Key",
|
|
2552
|
+
"dynamic": false,
|
|
2553
|
+
"info": "Langflow API key for authentication when fetching MCP servers and tools.",
|
|
2554
|
+
"input_types": [],
|
|
2555
|
+
"load_from_db": true,
|
|
2556
|
+
"name": "api_key",
|
|
2557
|
+
"password": true,
|
|
2558
|
+
"placeholder": "",
|
|
2559
|
+
"required": false,
|
|
2560
|
+
"show": true,
|
|
2561
|
+
"title_case": false,
|
|
2562
|
+
"type": "str",
|
|
2563
|
+
"value": ""
|
|
2564
|
+
},
|
|
2548
2565
|
"code": {
|
|
2549
2566
|
"advanced": true,
|
|
2550
2567
|
"dynamic": true,
|
|
@@ -2561,7 +2578,7 @@
|
|
|
2561
2578
|
"show": true,
|
|
2562
2579
|
"title_case": false,
|
|
2563
2580
|
"type": "code",
|
|
2564
|
-
"value": "from __future__ import annotations\n\nimport asyncio\nimport uuid\nfrom typing import Any\n\nfrom langchain_core.tools import StructuredTool # noqa: TC002\n\nfrom langflow.api.v2.mcp import get_server\nfrom langflow.base.agents.utils import maybe_unflatten_dict, safe_cache_get, safe_cache_set\nfrom langflow.base.mcp.util import (\n MCPSseClient,\n MCPStdioClient,\n create_input_schema_from_json_schema,\n update_tools,\n)\nfrom langflow.custom.custom_component.component_with_cache import ComponentWithCache\nfrom langflow.inputs.inputs import InputTypes # noqa: TC001\nfrom langflow.io import DropdownInput, McpInput, MessageTextInput, Output\nfrom langflow.io.schema import flatten_schema, schema_to_langflow_inputs\nfrom langflow.logging import logger\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.schema.message import Message\nfrom langflow.services.auth.utils import create_user_longterm_token\n\n# Import get_server from the backend API\nfrom langflow.services.database.models.user.crud import get_user_by_id\nfrom langflow.services.deps import get_session, get_settings_service, get_storage_service\n\n\nclass MCPToolsComponent(ComponentWithCache):\n schema_inputs: list = []\n tools: list[StructuredTool] = []\n _not_load_actions: bool = False\n _tool_cache: dict = {}\n _last_selected_server: str | None = None # Cache for the last selected server\n\n def __init__(self, **data) -> None:\n super().__init__(**data)\n # Initialize cache keys to avoid CacheMiss when accessing them\n self._ensure_cache_structure()\n\n # Initialize clients with access to the component cache\n self.stdio_client: MCPStdioClient = MCPStdioClient(component_cache=self._shared_component_cache)\n self.sse_client: MCPSseClient = MCPSseClient(component_cache=self._shared_component_cache)\n\n def _ensure_cache_structure(self):\n \"\"\"Ensure the cache has the required structure.\"\"\"\n # Check if servers key exists and is not CacheMiss\n servers_value = safe_cache_get(self._shared_component_cache, \"servers\")\n if servers_value is None:\n safe_cache_set(self._shared_component_cache, \"servers\", {})\n\n # Check if last_selected_server key exists and is not CacheMiss\n last_server_value = safe_cache_get(self._shared_component_cache, \"last_selected_server\")\n if last_server_value is None:\n safe_cache_set(self._shared_component_cache, \"last_selected_server\", \"\")\n\n default_keys: list[str] = [\n \"code\",\n \"_type\",\n \"tool_mode\",\n \"tool_placeholder\",\n \"mcp_server\",\n \"tool\",\n ]\n\n display_name = \"MCP Tools\"\n description = \"Connect to an MCP server to use its tools.\"\n documentation: str = \"https://docs.langflow.org/mcp-client\"\n icon = \"Mcp\"\n name = \"MCPTools\"\n\n inputs = [\n McpInput(\n name=\"mcp_server\",\n display_name=\"MCP Server\",\n info=\"Select the MCP Server that will be used by this component\",\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"tool\",\n display_name=\"Tool\",\n options=[],\n value=\"\",\n info=\"Select the tool to execute\",\n show=False,\n required=True,\n real_time_refresh=True,\n ),\n MessageTextInput(\n name=\"tool_placeholder\",\n display_name=\"Tool Placeholder\",\n info=\"Placeholder for the tool\",\n value=\"\",\n show=False,\n tool_mode=False,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Response\", name=\"response\", method=\"build_output\"),\n ]\n\n async def _validate_schema_inputs(self, tool_obj) -> list[InputTypes]:\n \"\"\"Validate and process schema inputs for a tool.\"\"\"\n try:\n if not tool_obj or not hasattr(tool_obj, \"args_schema\"):\n msg = \"Invalid tool object or missing input schema\"\n raise ValueError(msg)\n\n flat_schema = flatten_schema(tool_obj.args_schema.schema())\n input_schema = create_input_schema_from_json_schema(flat_schema)\n if not input_schema:\n msg = f\"Empty input schema for tool '{tool_obj.name}'\"\n raise ValueError(msg)\n\n schema_inputs = schema_to_langflow_inputs(input_schema)\n if not schema_inputs:\n msg = f\"No input parameters defined for tool '{tool_obj.name}'\"\n logger.warning(msg)\n return []\n\n except Exception as e:\n msg = f\"Error validating schema inputs: {e!s}\"\n logger.exception(msg)\n raise ValueError(msg) from e\n else:\n return schema_inputs\n\n async def update_tool_list(self, mcp_server_value=None):\n # Accepts mcp_server_value as dict {name, config} or uses self.mcp_server\n mcp_server = mcp_server_value if mcp_server_value is not None else getattr(self, \"mcp_server\", None)\n server_name = None\n server_config_from_value = None\n if isinstance(mcp_server, dict):\n server_name = mcp_server.get(\"name\")\n server_config_from_value = mcp_server.get(\"config\")\n else:\n server_name = mcp_server\n if not server_name:\n self.tools = []\n return [], {\"name\": server_name, \"config\": server_config_from_value}\n\n # Use shared cache if available\n servers_cache = safe_cache_get(self._shared_component_cache, \"servers\", {})\n cached = servers_cache.get(server_name) if isinstance(servers_cache, dict) else None\n\n if cached is not None:\n self.tools = cached[\"tools\"]\n self.tool_names = cached[\"tool_names\"]\n self._tool_cache = cached[\"tool_cache\"]\n server_config_from_value = cached[\"config\"]\n return self.tools, {\"name\": server_name, \"config\": server_config_from_value}\n\n try:\n async for db in get_session():\n user_id, _ = await create_user_longterm_token(db)\n current_user = await get_user_by_id(db, user_id)\n\n # Try to get server config from DB/API\n server_config = await get_server(\n server_name,\n current_user,\n db,\n storage_service=get_storage_service(),\n settings_service=get_settings_service(),\n )\n\n # If get_server returns empty but we have a config, use it\n if not server_config and server_config_from_value:\n server_config = server_config_from_value\n\n if not server_config:\n self.tools = []\n return [], {\"name\": server_name, \"config\": server_config}\n\n _, tool_list, tool_cache = await update_tools(\n server_name=server_name,\n server_config=server_config,\n mcp_stdio_client=self.stdio_client,\n mcp_sse_client=self.sse_client,\n )\n\n self.tool_names = [tool.name for tool in tool_list if hasattr(tool, \"name\")]\n self._tool_cache = tool_cache\n self.tools = tool_list\n # Cache the result using shared cache\n cache_data = {\n \"tools\": tool_list,\n \"tool_names\": self.tool_names,\n \"tool_cache\": tool_cache,\n \"config\": server_config,\n }\n\n # Safely update the servers cache\n current_servers_cache = safe_cache_get(self._shared_component_cache, \"servers\", {})\n if isinstance(current_servers_cache, dict):\n current_servers_cache[server_name] = cache_data\n safe_cache_set(self._shared_component_cache, \"servers\", current_servers_cache)\n\n return tool_list, {\"name\": server_name, \"config\": server_config}\n except (TimeoutError, asyncio.TimeoutError) as e:\n msg = f\"Timeout updating tool list: {e!s}\"\n logger.exception(msg)\n raise TimeoutError(msg) from e\n except Exception as e:\n msg = f\"Error updating tool list: {e!s}\"\n logger.exception(msg)\n raise ValueError(msg) from e\n\n async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n \"\"\"Toggle the visibility of connection-specific fields based on the selected mode.\"\"\"\n try:\n if field_name == \"tool\":\n try:\n if len(self.tools) == 0:\n try:\n self.tools, build_config[\"mcp_server\"][\"value\"] = await self.update_tool_list()\n build_config[\"tool\"][\"options\"] = [tool.name for tool in self.tools]\n build_config[\"tool\"][\"placeholder\"] = \"Select a tool\"\n except (TimeoutError, asyncio.TimeoutError) as e:\n msg = f\"Timeout updating tool list: {e!s}\"\n logger.exception(msg)\n if not build_config[\"tools_metadata\"][\"show\"]:\n build_config[\"tool\"][\"show\"] = True\n build_config[\"tool\"][\"options\"] = []\n build_config[\"tool\"][\"value\"] = \"\"\n build_config[\"tool\"][\"placeholder\"] = \"Timeout on MCP server\"\n else:\n build_config[\"tool\"][\"show\"] = False\n except ValueError:\n if not build_config[\"tools_metadata\"][\"show\"]:\n build_config[\"tool\"][\"show\"] = True\n build_config[\"tool\"][\"options\"] = []\n build_config[\"tool\"][\"value\"] = \"\"\n build_config[\"tool\"][\"placeholder\"] = \"Error on MCP Server\"\n else:\n build_config[\"tool\"][\"show\"] = False\n\n if field_value == \"\":\n return build_config\n tool_obj = None\n for tool in self.tools:\n if tool.name == field_value:\n tool_obj = tool\n break\n if tool_obj is None:\n msg = f\"Tool {field_value} not found in available tools: {self.tools}\"\n logger.warning(msg)\n return build_config\n await self._update_tool_config(build_config, field_value)\n except Exception as e:\n build_config[\"tool\"][\"options\"] = []\n msg = f\"Failed to update tools: {e!s}\"\n raise ValueError(msg) from e\n else:\n return build_config\n elif field_name == \"mcp_server\":\n if not field_value:\n build_config[\"tool\"][\"show\"] = False\n build_config[\"tool\"][\"options\"] = []\n build_config[\"tool\"][\"value\"] = \"\"\n build_config[\"tool\"][\"placeholder\"] = \"\"\n build_config[\"tool_placeholder\"][\"tool_mode\"] = False\n self.remove_non_default_keys(build_config)\n return build_config\n\n build_config[\"tool_placeholder\"][\"tool_mode\"] = True\n\n current_server_name = field_value.get(\"name\") if isinstance(field_value, dict) else field_value\n _last_selected_server = safe_cache_get(self._shared_component_cache, \"last_selected_server\", \"\")\n\n # To avoid unnecessary updates, only proceed if the server has actually changed\n if (_last_selected_server in (current_server_name, \"\")) and build_config[\"tool\"][\"show\"]:\n return build_config\n\n # Determine if \"Tool Mode\" is active by checking if the tool dropdown is hidden.\n is_in_tool_mode = build_config[\"tools_metadata\"][\"show\"]\n safe_cache_set(self._shared_component_cache, \"last_selected_server\", current_server_name)\n\n # Check if tools are already cached for this server before clearing\n cached_tools = None\n if current_server_name:\n servers_cache = safe_cache_get(self._shared_component_cache, \"servers\", {})\n if isinstance(servers_cache, dict):\n cached = servers_cache.get(current_server_name)\n if cached is not None:\n cached_tools = cached[\"tools\"]\n self.tools = cached_tools\n self.tool_names = cached[\"tool_names\"]\n self._tool_cache = cached[\"tool_cache\"]\n\n # Only clear tools if we don't have cached tools for the current server\n if not cached_tools:\n self.tools = [] # Clear previous tools only if no cache\n\n self.remove_non_default_keys(build_config) # Clear previous tool inputs\n\n # Only show the tool dropdown if not in tool_mode\n if not is_in_tool_mode:\n build_config[\"tool\"][\"show\"] = True\n if cached_tools:\n # Use cached tools to populate options immediately\n build_config[\"tool\"][\"options\"] = [tool.name for tool in cached_tools]\n build_config[\"tool\"][\"placeholder\"] = \"Select a tool\"\n else:\n # Show loading state only when we need to fetch tools\n build_config[\"tool\"][\"placeholder\"] = \"Loading tools...\"\n build_config[\"tool\"][\"options\"] = []\n build_config[\"tool\"][\"value\"] = uuid.uuid4()\n else:\n # Keep the tool dropdown hidden if in tool_mode\n self._not_load_actions = True\n build_config[\"tool\"][\"show\"] = False\n\n elif field_name == \"tool_mode\":\n build_config[\"tool\"][\"placeholder\"] = \"\"\n build_config[\"tool\"][\"show\"] = not bool(field_value) and bool(build_config[\"mcp_server\"])\n self.remove_non_default_keys(build_config)\n self.tool = build_config[\"tool\"][\"value\"]\n if field_value:\n self._not_load_actions = True\n else:\n build_config[\"tool\"][\"value\"] = uuid.uuid4()\n build_config[\"tool\"][\"options\"] = []\n build_config[\"tool\"][\"show\"] = True\n build_config[\"tool\"][\"placeholder\"] = \"Loading tools...\"\n elif field_name == \"tools_metadata\":\n self._not_load_actions = False\n\n except Exception as e:\n msg = f\"Error in update_build_config: {e!s}\"\n logger.exception(msg)\n raise ValueError(msg) from e\n else:\n return build_config\n\n def get_inputs_for_all_tools(self, tools: list) -> dict:\n \"\"\"Get input schemas for all tools.\"\"\"\n inputs = {}\n for tool in tools:\n if not tool or not hasattr(tool, \"name\"):\n continue\n try:\n flat_schema = flatten_schema(tool.args_schema.schema())\n input_schema = create_input_schema_from_json_schema(flat_schema)\n langflow_inputs = schema_to_langflow_inputs(input_schema)\n inputs[tool.name] = langflow_inputs\n except (AttributeError, ValueError, TypeError, KeyError) as e:\n msg = f\"Error getting inputs for tool {getattr(tool, 'name', 'unknown')}: {e!s}\"\n logger.exception(msg)\n continue\n return inputs\n\n def remove_input_schema_from_build_config(\n self, build_config: dict, tool_name: str, input_schema: dict[list[InputTypes], Any]\n ):\n \"\"\"Remove the input schema for the tool from the build config.\"\"\"\n # Keep only schemas that don't belong to the current tool\n input_schema = {k: v for k, v in input_schema.items() if k != tool_name}\n # Remove all inputs from other tools\n for value in input_schema.values():\n for _input in value:\n if _input.name in build_config:\n build_config.pop(_input.name)\n\n def remove_non_default_keys(self, build_config: dict) -> None:\n \"\"\"Remove non-default keys from the build config.\"\"\"\n for key in list(build_config.keys()):\n if key not in self.default_keys:\n build_config.pop(key)\n\n async def _update_tool_config(self, build_config: dict, tool_name: str) -> None:\n \"\"\"Update tool configuration with proper error handling.\"\"\"\n if not self.tools:\n self.tools, build_config[\"mcp_server\"][\"value\"] = await self.update_tool_list()\n\n if not tool_name:\n return\n\n tool_obj = next((tool for tool in self.tools if tool.name == tool_name), None)\n if not tool_obj:\n msg = f\"Tool {tool_name} not found in available tools: {self.tools}\"\n self.remove_non_default_keys(build_config)\n build_config[\"tool\"][\"value\"] = \"\"\n logger.warning(msg)\n return\n\n try:\n # Store current values before removing inputs\n current_values = {}\n for key, value in build_config.items():\n if key not in self.default_keys and isinstance(value, dict) and \"value\" in value:\n current_values[key] = value[\"value\"]\n\n # Get all tool inputs and remove old ones\n input_schema_for_all_tools = self.get_inputs_for_all_tools(self.tools)\n self.remove_input_schema_from_build_config(build_config, tool_name, input_schema_for_all_tools)\n\n # Get and validate new inputs\n self.schema_inputs = await self._validate_schema_inputs(tool_obj)\n if not self.schema_inputs:\n msg = f\"No input parameters to configure for tool '{tool_name}'\"\n logger.info(msg)\n return\n\n # Add new inputs to build config\n for schema_input in self.schema_inputs:\n if not schema_input or not hasattr(schema_input, \"name\"):\n msg = \"Invalid schema input detected, skipping\"\n logger.warning(msg)\n continue\n\n try:\n name = schema_input.name\n input_dict = schema_input.to_dict()\n input_dict.setdefault(\"value\", None)\n input_dict.setdefault(\"required\", True)\n\n build_config[name] = input_dict\n\n # Preserve existing value if the parameter name exists in current_values\n if name in current_values:\n build_config[name][\"value\"] = current_values[name]\n\n except (AttributeError, KeyError, TypeError) as e:\n msg = f\"Error processing schema input {schema_input}: {e!s}\"\n logger.exception(msg)\n continue\n except ValueError as e:\n msg = f\"Schema validation error for tool {tool_name}: {e!s}\"\n logger.exception(msg)\n self.schema_inputs = []\n return\n except (AttributeError, KeyError, TypeError) as e:\n msg = f\"Error updating tool config: {e!s}\"\n logger.exception(msg)\n raise ValueError(msg) from e\n\n async def build_output(self) -> DataFrame:\n \"\"\"Build output with improved error handling and validation.\"\"\"\n try:\n self.tools, _ = await self.update_tool_list()\n if self.tool != \"\":\n # Set session context for persistent MCP sessions using Langflow session ID\n session_context = self._get_session_context()\n if session_context:\n self.stdio_client.set_session_context(session_context)\n self.sse_client.set_session_context(session_context)\n\n exec_tool = self._tool_cache[self.tool]\n tool_args = self.get_inputs_for_all_tools(self.tools)[self.tool]\n kwargs = {}\n for arg in tool_args:\n value = getattr(self, arg.name, None)\n if value is not None:\n if isinstance(value, Message):\n kwargs[arg.name] = value.text\n else:\n kwargs[arg.name] = value\n\n unflattened_kwargs = maybe_unflatten_dict(kwargs)\n\n output = await exec_tool.coroutine(**unflattened_kwargs)\n\n tool_content = []\n for item in output.content:\n item_dict = item.model_dump()\n tool_content.append(item_dict)\n return DataFrame(data=tool_content)\n return DataFrame(data=[{\"error\": \"You must select a tool\"}])\n except Exception as e:\n msg = f\"Error in build_output: {e!s}\"\n logger.exception(msg)\n raise ValueError(msg) from e\n\n def _get_session_context(self) -> str | None:\n \"\"\"Get the Langflow session ID for MCP session caching.\"\"\"\n # Try to get session ID from the component's execution context\n if hasattr(self, \"graph\") and hasattr(self.graph, \"session_id\"):\n session_id = self.graph.session_id\n # Include server name to ensure different servers get different sessions\n server_name = \"\"\n mcp_server = getattr(self, \"mcp_server\", None)\n if isinstance(mcp_server, dict):\n server_name = mcp_server.get(\"name\", \"\")\n elif mcp_server:\n server_name = str(mcp_server)\n return f\"{session_id}_{server_name}\" if session_id else None\n return None\n\n async def _get_tools(self):\n \"\"\"Get cached tools or update if necessary.\"\"\"\n mcp_server = getattr(self, \"mcp_server\", None)\n if not self._not_load_actions:\n tools, _ = await self.update_tool_list(mcp_server)\n return tools\n return []\n"
|
|
2581
|
+
"value": "from __future__ import annotations\n\nimport asyncio\nimport uuid\nfrom typing import Any\n\nfrom langchain_core.tools import StructuredTool # noqa: TC002\n\nfrom langflow.api.v2.mcp import get_server\nfrom langflow.base.agents.utils import maybe_unflatten_dict, safe_cache_get, safe_cache_set\nfrom langflow.base.mcp.util import (\n MCPSseClient,\n MCPStdioClient,\n create_input_schema_from_json_schema,\n update_tools,\n)\nfrom langflow.custom.custom_component.component_with_cache import ComponentWithCache\nfrom langflow.inputs.inputs import InputTypes # noqa: TC001\nfrom langflow.io import DropdownInput, McpInput, MessageTextInput, Output, SecretStrInput\nfrom langflow.io.schema import flatten_schema, schema_to_langflow_inputs\nfrom langflow.logging import logger\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.schema.message import Message\n\n# Import get_server from the backend API\nfrom langflow.services.auth.utils import create_user_longterm_token, get_current_user\nfrom langflow.services.database.models.user.crud import get_user_by_id\nfrom langflow.services.deps import get_session, get_settings_service, get_storage_service\n\n\nclass MCPToolsComponent(ComponentWithCache):\n schema_inputs: list = []\n tools: list[StructuredTool] = []\n _not_load_actions: bool = False\n _tool_cache: dict = {}\n _last_selected_server: str | None = None # Cache for the last selected server\n\n def __init__(self, **data) -> None:\n super().__init__(**data)\n # Initialize cache keys to avoid CacheMiss when accessing them\n self._ensure_cache_structure()\n\n # Initialize clients with access to the component cache\n self.stdio_client: MCPStdioClient = MCPStdioClient(component_cache=self._shared_component_cache)\n self.sse_client: MCPSseClient = MCPSseClient(component_cache=self._shared_component_cache)\n\n def _ensure_cache_structure(self):\n \"\"\"Ensure the cache has the required structure.\"\"\"\n # Check if servers key exists and is not CacheMiss\n servers_value = safe_cache_get(self._shared_component_cache, \"servers\")\n if servers_value is None:\n safe_cache_set(self._shared_component_cache, \"servers\", {})\n\n # Check if last_selected_server key exists and is not CacheMiss\n last_server_value = safe_cache_get(self._shared_component_cache, \"last_selected_server\")\n if last_server_value is None:\n safe_cache_set(self._shared_component_cache, \"last_selected_server\", \"\")\n\n default_keys: list[str] = [\n \"code\",\n \"_type\",\n \"tool_mode\",\n \"tool_placeholder\",\n \"mcp_server\",\n \"tool\",\n ]\n\n display_name = \"MCP Tools\"\n description = \"Connect to an MCP server to use its tools.\"\n documentation: str = \"https://docs.langflow.org/mcp-client\"\n icon = \"Mcp\"\n name = \"MCPTools\"\n\n inputs = [\n McpInput(\n name=\"mcp_server\",\n display_name=\"MCP Server\",\n info=\"Select the MCP Server that will be used by this component\",\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"tool\",\n display_name=\"Tool\",\n options=[],\n value=\"\",\n info=\"Select the tool to execute\",\n show=False,\n required=True,\n real_time_refresh=True,\n ),\n MessageTextInput(\n name=\"tool_placeholder\",\n display_name=\"Tool Placeholder\",\n info=\"Placeholder for the tool\",\n value=\"\",\n show=False,\n tool_mode=False,\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"Langflow API Key\",\n info=\"Langflow API key for authentication when fetching MCP servers and tools.\",\n required=False,\n advanced=True,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Response\", name=\"response\", method=\"build_output\"),\n ]\n\n async def _validate_schema_inputs(self, tool_obj) -> list[InputTypes]:\n \"\"\"Validate and process schema inputs for a tool.\"\"\"\n try:\n if not tool_obj or not hasattr(tool_obj, \"args_schema\"):\n msg = \"Invalid tool object or missing input schema\"\n raise ValueError(msg)\n\n flat_schema = flatten_schema(tool_obj.args_schema.schema())\n input_schema = create_input_schema_from_json_schema(flat_schema)\n if not input_schema:\n msg = f\"Empty input schema for tool '{tool_obj.name}'\"\n raise ValueError(msg)\n\n schema_inputs = schema_to_langflow_inputs(input_schema)\n if not schema_inputs:\n msg = f\"No input parameters defined for tool '{tool_obj.name}'\"\n logger.warning(msg)\n return []\n\n except Exception as e:\n msg = f\"Error validating schema inputs: {e!s}\"\n logger.exception(msg)\n raise ValueError(msg) from e\n else:\n return schema_inputs\n\n async def update_tool_list(self, mcp_server_value=None):\n # Accepts mcp_server_value as dict {name, config} or uses self.mcp_server\n mcp_server = mcp_server_value if mcp_server_value is not None else getattr(self, \"mcp_server\", None)\n server_name = None\n server_config_from_value = None\n if isinstance(mcp_server, dict):\n server_name = mcp_server.get(\"name\")\n server_config_from_value = mcp_server.get(\"config\")\n else:\n server_name = mcp_server\n if not server_name:\n self.tools = []\n return [], {\"name\": server_name, \"config\": server_config_from_value}\n\n # Use shared cache if available\n servers_cache = safe_cache_get(self._shared_component_cache, \"servers\", {})\n cached = servers_cache.get(server_name) if isinstance(servers_cache, dict) else None\n\n if cached is not None:\n self.tools = cached[\"tools\"]\n self.tool_names = cached[\"tool_names\"]\n self._tool_cache = cached[\"tool_cache\"]\n server_config_from_value = cached[\"config\"]\n return self.tools, {\"name\": server_name, \"config\": server_config_from_value}\n\n try:\n async for db in get_session():\n # TODO: In 1.6, this may need to be removed or adjusted\n # Try to get the super user token, if possible\n if self.api_key:\n current_user = await get_current_user(\n token=None,\n query_param=self.api_key,\n header_param=None,\n db=db,\n )\n else:\n user_id, _ = await create_user_longterm_token(db)\n current_user = await get_user_by_id(db, user_id)\n\n # Try to get server config from DB/API\n server_config = await get_server(\n server_name,\n current_user,\n db,\n storage_service=get_storage_service(),\n settings_service=get_settings_service(),\n )\n\n # If get_server returns empty but we have a config, use it\n if not server_config and server_config_from_value:\n server_config = server_config_from_value\n\n if not server_config:\n self.tools = []\n return [], {\"name\": server_name, \"config\": server_config}\n\n _, tool_list, tool_cache = await update_tools(\n server_name=server_name,\n server_config=server_config,\n mcp_stdio_client=self.stdio_client,\n mcp_sse_client=self.sse_client,\n )\n\n self.tool_names = [tool.name for tool in tool_list if hasattr(tool, \"name\")]\n self._tool_cache = tool_cache\n self.tools = tool_list\n # Cache the result using shared cache\n cache_data = {\n \"tools\": tool_list,\n \"tool_names\": self.tool_names,\n \"tool_cache\": tool_cache,\n \"config\": server_config,\n }\n\n # Safely update the servers cache\n current_servers_cache = safe_cache_get(self._shared_component_cache, \"servers\", {})\n if isinstance(current_servers_cache, dict):\n current_servers_cache[server_name] = cache_data\n safe_cache_set(self._shared_component_cache, \"servers\", current_servers_cache)\n\n return tool_list, {\"name\": server_name, \"config\": server_config}\n except (TimeoutError, asyncio.TimeoutError) as e:\n msg = f\"Timeout updating tool list: {e!s}\"\n logger.exception(msg)\n raise TimeoutError(msg) from e\n except Exception as e:\n msg = f\"Error updating tool list: {e!s}\"\n logger.exception(msg)\n raise ValueError(msg) from e\n\n async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n \"\"\"Toggle the visibility of connection-specific fields based on the selected mode.\"\"\"\n try:\n if field_name == \"tool\":\n try:\n if len(self.tools) == 0:\n try:\n self.tools, build_config[\"mcp_server\"][\"value\"] = await self.update_tool_list()\n build_config[\"tool\"][\"options\"] = [tool.name for tool in self.tools]\n build_config[\"tool\"][\"placeholder\"] = \"Select a tool\"\n except (TimeoutError, asyncio.TimeoutError) as e:\n msg = f\"Timeout updating tool list: {e!s}\"\n logger.exception(msg)\n if not build_config[\"tools_metadata\"][\"show\"]:\n build_config[\"tool\"][\"show\"] = True\n build_config[\"tool\"][\"options\"] = []\n build_config[\"tool\"][\"value\"] = \"\"\n build_config[\"tool\"][\"placeholder\"] = \"Timeout on MCP server\"\n else:\n build_config[\"tool\"][\"show\"] = False\n except ValueError:\n if not build_config[\"tools_metadata\"][\"show\"]:\n build_config[\"tool\"][\"show\"] = True\n build_config[\"tool\"][\"options\"] = []\n build_config[\"tool\"][\"value\"] = \"\"\n build_config[\"tool\"][\"placeholder\"] = \"Error on MCP Server\"\n else:\n build_config[\"tool\"][\"show\"] = False\n\n if field_value == \"\":\n return build_config\n tool_obj = None\n for tool in self.tools:\n if tool.name == field_value:\n tool_obj = tool\n break\n if tool_obj is None:\n msg = f\"Tool {field_value} not found in available tools: {self.tools}\"\n logger.warning(msg)\n return build_config\n await self._update_tool_config(build_config, field_value)\n except Exception as e:\n build_config[\"tool\"][\"options\"] = []\n msg = f\"Failed to update tools: {e!s}\"\n raise ValueError(msg) from e\n else:\n return build_config\n elif field_name == \"mcp_server\":\n if not field_value:\n build_config[\"tool\"][\"show\"] = False\n build_config[\"tool\"][\"options\"] = []\n build_config[\"tool\"][\"value\"] = \"\"\n build_config[\"tool\"][\"placeholder\"] = \"\"\n build_config[\"tool_placeholder\"][\"tool_mode\"] = False\n self.remove_non_default_keys(build_config)\n return build_config\n\n build_config[\"tool_placeholder\"][\"tool_mode\"] = True\n\n current_server_name = field_value.get(\"name\") if isinstance(field_value, dict) else field_value\n _last_selected_server = safe_cache_get(self._shared_component_cache, \"last_selected_server\", \"\")\n\n # To avoid unnecessary updates, only proceed if the server has actually changed\n if (_last_selected_server in (current_server_name, \"\")) and build_config[\"tool\"][\"show\"]:\n return build_config\n\n # Determine if \"Tool Mode\" is active by checking if the tool dropdown is hidden.\n is_in_tool_mode = build_config[\"tools_metadata\"][\"show\"]\n safe_cache_set(self._shared_component_cache, \"last_selected_server\", current_server_name)\n\n # Check if tools are already cached for this server before clearing\n cached_tools = None\n if current_server_name:\n servers_cache = safe_cache_get(self._shared_component_cache, \"servers\", {})\n if isinstance(servers_cache, dict):\n cached = servers_cache.get(current_server_name)\n if cached is not None:\n cached_tools = cached[\"tools\"]\n self.tools = cached_tools\n self.tool_names = cached[\"tool_names\"]\n self._tool_cache = cached[\"tool_cache\"]\n\n # Only clear tools if we don't have cached tools for the current server\n if not cached_tools:\n self.tools = [] # Clear previous tools only if no cache\n\n self.remove_non_default_keys(build_config) # Clear previous tool inputs\n\n # Only show the tool dropdown if not in tool_mode\n if not is_in_tool_mode:\n build_config[\"tool\"][\"show\"] = True\n if cached_tools:\n # Use cached tools to populate options immediately\n build_config[\"tool\"][\"options\"] = [tool.name for tool in cached_tools]\n build_config[\"tool\"][\"placeholder\"] = \"Select a tool\"\n else:\n # Show loading state only when we need to fetch tools\n build_config[\"tool\"][\"placeholder\"] = \"Loading tools...\"\n build_config[\"tool\"][\"options\"] = []\n build_config[\"tool\"][\"value\"] = uuid.uuid4()\n else:\n # Keep the tool dropdown hidden if in tool_mode\n self._not_load_actions = True\n build_config[\"tool\"][\"show\"] = False\n\n elif field_name == \"tool_mode\":\n build_config[\"tool\"][\"placeholder\"] = \"\"\n build_config[\"tool\"][\"show\"] = not bool(field_value) and bool(build_config[\"mcp_server\"])\n self.remove_non_default_keys(build_config)\n self.tool = build_config[\"tool\"][\"value\"]\n if field_value:\n self._not_load_actions = True\n else:\n build_config[\"tool\"][\"value\"] = uuid.uuid4()\n build_config[\"tool\"][\"options\"] = []\n build_config[\"tool\"][\"show\"] = True\n build_config[\"tool\"][\"placeholder\"] = \"Loading tools...\"\n elif field_name == \"tools_metadata\":\n self._not_load_actions = False\n\n except Exception as e:\n msg = f\"Error in update_build_config: {e!s}\"\n logger.exception(msg)\n raise ValueError(msg) from e\n else:\n return build_config\n\n def get_inputs_for_all_tools(self, tools: list) -> dict:\n \"\"\"Get input schemas for all tools.\"\"\"\n inputs = {}\n for tool in tools:\n if not tool or not hasattr(tool, \"name\"):\n continue\n try:\n flat_schema = flatten_schema(tool.args_schema.schema())\n input_schema = create_input_schema_from_json_schema(flat_schema)\n langflow_inputs = schema_to_langflow_inputs(input_schema)\n inputs[tool.name] = langflow_inputs\n except (AttributeError, ValueError, TypeError, KeyError) as e:\n msg = f\"Error getting inputs for tool {getattr(tool, 'name', 'unknown')}: {e!s}\"\n logger.exception(msg)\n continue\n return inputs\n\n def remove_input_schema_from_build_config(\n self, build_config: dict, tool_name: str, input_schema: dict[list[InputTypes], Any]\n ):\n \"\"\"Remove the input schema for the tool from the build config.\"\"\"\n # Keep only schemas that don't belong to the current tool\n input_schema = {k: v for k, v in input_schema.items() if k != tool_name}\n # Remove all inputs from other tools\n for value in input_schema.values():\n for _input in value:\n if _input.name in build_config:\n build_config.pop(_input.name)\n\n def remove_non_default_keys(self, build_config: dict) -> None:\n \"\"\"Remove non-default keys from the build config.\"\"\"\n for key in list(build_config.keys()):\n if key not in self.default_keys:\n build_config.pop(key)\n\n async def _update_tool_config(self, build_config: dict, tool_name: str) -> None:\n \"\"\"Update tool configuration with proper error handling.\"\"\"\n if not self.tools:\n self.tools, build_config[\"mcp_server\"][\"value\"] = await self.update_tool_list()\n\n if not tool_name:\n return\n\n tool_obj = next((tool for tool in self.tools if tool.name == tool_name), None)\n if not tool_obj:\n msg = f\"Tool {tool_name} not found in available tools: {self.tools}\"\n self.remove_non_default_keys(build_config)\n build_config[\"tool\"][\"value\"] = \"\"\n logger.warning(msg)\n return\n\n try:\n # Store current values before removing inputs\n current_values = {}\n for key, value in build_config.items():\n if key not in self.default_keys and isinstance(value, dict) and \"value\" in value:\n current_values[key] = value[\"value\"]\n\n # Get all tool inputs and remove old ones\n input_schema_for_all_tools = self.get_inputs_for_all_tools(self.tools)\n self.remove_input_schema_from_build_config(build_config, tool_name, input_schema_for_all_tools)\n\n # Get and validate new inputs\n self.schema_inputs = await self._validate_schema_inputs(tool_obj)\n if not self.schema_inputs:\n msg = f\"No input parameters to configure for tool '{tool_name}'\"\n logger.info(msg)\n return\n\n # Add new inputs to build config\n for schema_input in self.schema_inputs:\n if not schema_input or not hasattr(schema_input, \"name\"):\n msg = \"Invalid schema input detected, skipping\"\n logger.warning(msg)\n continue\n\n try:\n name = schema_input.name\n input_dict = schema_input.to_dict()\n input_dict.setdefault(\"value\", None)\n input_dict.setdefault(\"required\", True)\n\n build_config[name] = input_dict\n\n # Preserve existing value if the parameter name exists in current_values\n if name in current_values:\n build_config[name][\"value\"] = current_values[name]\n\n except (AttributeError, KeyError, TypeError) as e:\n msg = f\"Error processing schema input {schema_input}: {e!s}\"\n logger.exception(msg)\n continue\n except ValueError as e:\n msg = f\"Schema validation error for tool {tool_name}: {e!s}\"\n logger.exception(msg)\n self.schema_inputs = []\n return\n except (AttributeError, KeyError, TypeError) as e:\n msg = f\"Error updating tool config: {e!s}\"\n logger.exception(msg)\n raise ValueError(msg) from e\n\n async def build_output(self) -> DataFrame:\n \"\"\"Build output with improved error handling and validation.\"\"\"\n try:\n self.tools, _ = await self.update_tool_list()\n if self.tool != \"\":\n # Set session context for persistent MCP sessions using Langflow session ID\n session_context = self._get_session_context()\n if session_context:\n self.stdio_client.set_session_context(session_context)\n self.sse_client.set_session_context(session_context)\n\n exec_tool = self._tool_cache[self.tool]\n tool_args = self.get_inputs_for_all_tools(self.tools)[self.tool]\n kwargs = {}\n for arg in tool_args:\n value = getattr(self, arg.name, None)\n if value is not None:\n if isinstance(value, Message):\n kwargs[arg.name] = value.text\n else:\n kwargs[arg.name] = value\n\n unflattened_kwargs = maybe_unflatten_dict(kwargs)\n\n output = await exec_tool.coroutine(**unflattened_kwargs)\n\n tool_content = []\n for item in output.content:\n item_dict = item.model_dump()\n tool_content.append(item_dict)\n return DataFrame(data=tool_content)\n return DataFrame(data=[{\"error\": \"You must select a tool\"}])\n except Exception as e:\n msg = f\"Error in build_output: {e!s}\"\n logger.exception(msg)\n raise ValueError(msg) from e\n\n def _get_session_context(self) -> str | None:\n \"\"\"Get the Langflow session ID for MCP session caching.\"\"\"\n # Try to get session ID from the component's execution context\n if hasattr(self, \"graph\") and hasattr(self.graph, \"session_id\"):\n session_id = self.graph.session_id\n # Include server name to ensure different servers get different sessions\n server_name = \"\"\n mcp_server = getattr(self, \"mcp_server\", None)\n if isinstance(mcp_server, dict):\n server_name = mcp_server.get(\"name\", \"\")\n elif mcp_server:\n server_name = str(mcp_server)\n return f\"{session_id}_{server_name}\" if session_id else None\n return None\n\n async def _get_tools(self):\n \"\"\"Get cached tools or update if necessary.\"\"\"\n mcp_server = getattr(self, \"mcp_server\", None)\n if not self._not_load_actions:\n tools, _ = await self.update_tool_list(mcp_server)\n return tools\n return []\n"
|
|
2565
2582
|
},
|
|
2566
2583
|
"mcp_server": {
|
|
2567
2584
|
"_input_type": "McpInput",
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from datetime import datetime, timezone
|
|
2
2
|
from uuid import UUID, uuid4
|
|
3
3
|
|
|
4
|
-
from sqlmodel import Field, SQLModel
|
|
4
|
+
from sqlmodel import Field, SQLModel, UniqueConstraint
|
|
5
5
|
|
|
6
6
|
from langflow.schema.serialize import UUIDstr
|
|
7
7
|
|
|
@@ -9,9 +9,11 @@ from langflow.schema.serialize import UUIDstr
|
|
|
9
9
|
class File(SQLModel, table=True): # type: ignore[call-arg]
|
|
10
10
|
id: UUIDstr = Field(default_factory=uuid4, primary_key=True)
|
|
11
11
|
user_id: UUID = Field(foreign_key="user.id")
|
|
12
|
-
name: str = Field(
|
|
12
|
+
name: str = Field(nullable=False)
|
|
13
13
|
path: str = Field(nullable=False)
|
|
14
14
|
size: int = Field(nullable=False)
|
|
15
15
|
provider: str | None = Field(default=None)
|
|
16
16
|
created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
|
17
17
|
updated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
|
18
|
+
|
|
19
|
+
__table_args__ = (UniqueConstraint("name", "user_id"),)
|
{langflow_base_nightly-0.5.0.dev34.dist-info → langflow_base_nightly-0.5.0.dev35.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: langflow-base-nightly
|
|
3
|
-
Version: 0.5.0.
|
|
3
|
+
Version: 0.5.0.dev35
|
|
4
4
|
Summary: A Python package with a built-in web application
|
|
5
5
|
Project-URL: Repository, https://github.com/langflow-ai/langflow
|
|
6
6
|
Project-URL: Documentation, https://docs.langflow.org
|
{langflow_base_nightly-0.5.0.dev34.dist-info → langflow_base_nightly-0.5.0.dev35.dist-info}/RECORD
RENAMED
|
@@ -20,6 +20,7 @@ langflow/alembic/versions/0d60fcbd4e8e_create_vertex_builds_table.py,sha256=BYNy
|
|
|
20
20
|
langflow/alembic/versions/1a110b568907_replace_credential_table_with_variable.py,sha256=Xh1DEgm3GaDG5NApaUVUJjbEKYDBMZAM_mINO_72Wf0,2620
|
|
21
21
|
langflow/alembic/versions/1b8b740a6fa3_remove_fk_constraint_in_message_.py,sha256=mrpCamCU7Q-AY7q_P7sdq_HlgqffIO2P_sk0GU4Qrc0,15034
|
|
22
22
|
langflow/alembic/versions/1c79524817ed_add_unique_constraints_per_user_in_.py,sha256=9GoBzcOjomBz19f3fFya8tLDgNk-PoF4RdalvQANRWY,1531
|
|
23
|
+
langflow/alembic/versions/1cb603706752_modify_uniqueness_constraint_on_file_.py,sha256=qo8JoNA_LgnRaEqS1JwNnqPEjnZlJ9VR-q4cs_OAoRw,10618
|
|
23
24
|
langflow/alembic/versions/1d90f8a0efe1_update_description_columns_type.py,sha256=iqeMojwUhUMUCDFpRdZWRPhsV4EiQ20lYKOMJN6rXfo,3275
|
|
24
25
|
langflow/alembic/versions/1eab2c3eb45e_event_error.py,sha256=RC3vb57r2FI3C6AQBmSEHvE0Q51i2kMkpPw1kTWV19A,1977
|
|
25
26
|
langflow/alembic/versions/1ef9c4f3765d_.py,sha256=Ly3M6wbbKxzdwha1Mr3fVruruNg4I0fAWvFamy9gtBw,2044
|
|
@@ -194,7 +195,7 @@ langflow/components/agentql/__init__.py,sha256=Erl669Dzsk-SegsDPWTtkKbprMXVuv8UT
|
|
|
194
195
|
langflow/components/agentql/agentql_api.py,sha256=zoRcxHro69_l3-VUYOXkfb3J78VNu0kJzCDbIQJbv8s,5606
|
|
195
196
|
langflow/components/agents/__init__.py,sha256=IXzXcobwGvV0MxtCrZfia5DlTnNL1OyOAADquoJn8Hc,130
|
|
196
197
|
langflow/components/agents/agent.py,sha256=UzqsX2GFFIt0JcgaHzxQlod2v32NKsbI5E3tWS-coks,15552
|
|
197
|
-
langflow/components/agents/mcp_component.py,sha256=
|
|
198
|
+
langflow/components/agents/mcp_component.py,sha256=aDn6PK6ZNAMLghWWew23Y1diHrv1JRf1P6Znr_e19yg,23313
|
|
198
199
|
langflow/components/aiml/__init__.py,sha256=uHyRjn6RHih__xQl9imVc2T3EVDJXh_YfHjA-OKSOC8,1102
|
|
199
200
|
langflow/components/aiml/aiml.py,sha256=P0-1dKjiQq-vima7QGwnAYMi8XKcrf9vXfEfkg7kZ8s,3860
|
|
200
201
|
langflow/components/aiml/aiml_embeddings.py,sha256=B-Nqe0wCwv9eNU9auyj62LQ1J0hmPfmxjZ5uxwSwPMo,1120
|
|
@@ -265,7 +266,7 @@ langflow/components/data/csv_to_data.py,sha256=FL99gVyquYmdrD6Z1S0X_l3DDkVDRfdCw
|
|
|
265
266
|
langflow/components/data/directory.py,sha256=MqSUyq5cL6Xy2CqBREc0hJlcoega3r82ti29oNmGlog,3966
|
|
266
267
|
langflow/components/data/file.py,sha256=07zPsp7_qUyBpziW7UfQLHoWY70Ps6hRPyKyX3aLLzw,5861
|
|
267
268
|
langflow/components/data/json_to_data.py,sha256=uN3yyVHo-DOvv0ZwYQx99V-rWddh3A6iDBKW7ga1J4c,3554
|
|
268
|
-
langflow/components/data/kb_ingest.py,sha256=
|
|
269
|
+
langflow/components/data/kb_ingest.py,sha256=4evNZuy8-M93S8I-mmjxiRjWit66cFIUwCqrBv7InFU,24478
|
|
269
270
|
langflow/components/data/kb_retrieval.py,sha256=7itmlY8JTS91P_oju0-sKDV2vZyWUuiQVRhg88I_3s8,9947
|
|
270
271
|
langflow/components/data/news_search.py,sha256=PpuhSTH_gk1iWjX4X3N0PxIPAAdcrnH0GAtu_d5LSgA,6196
|
|
271
272
|
langflow/components/data/rss.py,sha256=B_DZvPdnJhnh7qkzPHcp-ERsfqcft6kTNl58G94zJzg,2504
|
|
@@ -485,7 +486,7 @@ langflow/components/processing/parser.py,sha256=VWIJUgZQRN-eW8zgEUOECfpmy0nmfRI7
|
|
|
485
486
|
langflow/components/processing/prompt.py,sha256=xHilcszTEdewqBufJusnkXWTrRqC8MX9fEEz1n-vgK0,2791
|
|
486
487
|
langflow/components/processing/python_repl_core.py,sha256=FaNGm6f2ngniE2lueYaoxSn-hZ-yKePdV60y-jc9nfs,3477
|
|
487
488
|
langflow/components/processing/regex.py,sha256=MQVd8nUwe3engl_JiI-wEn1BvXVm1e0vQOn99gdiOrw,2660
|
|
488
|
-
langflow/components/processing/save_file.py,sha256=
|
|
489
|
+
langflow/components/processing/save_file.py,sha256=2a9yjOAqRf6x22157DJFLnSTJOMyiwFqaUDIttByDiM,9949
|
|
489
490
|
langflow/components/processing/select_data.py,sha256=t1InSoxLIageodImGpkNG1tWAirHKuFrU9QhNdom8PA,1765
|
|
490
491
|
langflow/components/processing/split_text.py,sha256=2_Lp0jGdEAnuup4ucL-ZexpcarCL78wJAwQLCL2cleE,5323
|
|
491
492
|
langflow/components/processing/structured_output.py,sha256=rSpvRVLAlxcnEWFJVvExZC-NTmIb6rUieBzYnSMV9po,7991
|
|
@@ -883,13 +884,13 @@ langflow/initial_setup/starter_projects/Hybrid Search RAG.json,sha256=YalCD1u0CQ
|
|
|
883
884
|
langflow/initial_setup/starter_projects/Image Sentiment Analysis.json,sha256=p3yUZeuCPiAlvHBjArJtNiAUdXV4viKYeRagI3fI534,113531
|
|
884
885
|
langflow/initial_setup/starter_projects/Instagram Copywriter.json,sha256=iBqKLxara5PJsbztdq9k-M6q0mkd_OExISb71t9Ft6o,169884
|
|
885
886
|
langflow/initial_setup/starter_projects/Invoice Summarizer.json,sha256=IdWYegxw5qTplYBdBt3Vl_b61bNgeTzPEtX6DVuimSM,95726
|
|
886
|
-
langflow/initial_setup/starter_projects/Knowledge Ingestion.json,sha256=
|
|
887
|
+
langflow/initial_setup/starter_projects/Knowledge Ingestion.json,sha256=Ynqwki9sC6j0ioMiqyWryeR8jyBcF-a7uQyfj7ivxxA,81467
|
|
887
888
|
langflow/initial_setup/starter_projects/Knowledge Retrieval.json,sha256=abZ7akGNWy_ywoFWTcq0xyT--iRbEBnXh8Xx0Q1BizY,43494
|
|
888
889
|
langflow/initial_setup/starter_projects/Market Research.json,sha256=i3IZbaXaXwNL_l222sikK4kCbtVjm_JU8xHrs-KTFI0,151362
|
|
889
890
|
langflow/initial_setup/starter_projects/Meeting Summary.json,sha256=rm58p7Dkxb4vBzyin-Aa1i6XdMT0Au5D5_QuEuuxNDM,195851
|
|
890
891
|
langflow/initial_setup/starter_projects/Memory Chatbot.json,sha256=d4imk-w2M69O8iCJT-Xbf9dleEf8uaLAsKzqLkMMZWw,85446
|
|
891
|
-
langflow/initial_setup/starter_projects/News Aggregator.json,sha256=
|
|
892
|
-
langflow/initial_setup/starter_projects/Nvidia Remix.json,sha256=
|
|
892
|
+
langflow/initial_setup/starter_projects/News Aggregator.json,sha256=ut1RyPTD1sIdM97wuAw_i_jpPwK6HhoWX5hdK1e8XXo,115136
|
|
893
|
+
langflow/initial_setup/starter_projects/Nvidia Remix.json,sha256=Nh1K7ifymwVgiaf7GBG17zDNRgf8khu4zSv3gcp69e0,316474
|
|
893
894
|
langflow/initial_setup/starter_projects/Pokédex Agent.json,sha256=xBs9Ih8IRFDTAP64ra2DhO52iQHui7xj-2JMq6YL3kY,111969
|
|
894
895
|
langflow/initial_setup/starter_projects/Portfolio Website Code Generator.json,sha256=GU8ESXR_Lf6_NdGihyuPyk4JUcO0KhzmL7dQQGr9XAo,123578
|
|
895
896
|
langflow/initial_setup/starter_projects/Price Deal Finder.json,sha256=b9DZDCmCCGmIym6szI7NSMA6bG1Kwa_UpvO6PJBQ9sk,122658
|
|
@@ -999,7 +1000,7 @@ langflow/services/database/models/api_key/crud.py,sha256=Boy5k8QktC8FaXSou2ub1WT
|
|
|
999
1000
|
langflow/services/database/models/api_key/model.py,sha256=DkfuvuyqxIA4V2Tps_Cl3elzsgRNmhNt3M0JvEgr_Lc,1991
|
|
1000
1001
|
langflow/services/database/models/file/__init__.py,sha256=1VvR0bFrVeDf-lVwXWa2YT0nbOnvkdxCbcMcSAONlPs,51
|
|
1001
1002
|
langflow/services/database/models/file/crud.py,sha256=ASclS7DS_y-fi7TdZi4dDWN61UjEro1bVL_g283oT1M,401
|
|
1002
|
-
langflow/services/database/models/file/model.py,sha256=
|
|
1003
|
+
langflow/services/database/models/file/model.py,sha256=k4WoyKaEDVQ6295QBxjtZ_BhgEaPnqp__IKCHptcPl4,740
|
|
1003
1004
|
langflow/services/database/models/flow/__init__.py,sha256=QJ8bBCOVfpsOITXiN92d7yBdRdS9apqetshv-jjDVlk,118
|
|
1004
1005
|
langflow/services/database/models/flow/model.py,sha256=ORA2qt1uNVuPclRUobFcs_VC17qA_a03iIdE3eu9kBo,11182
|
|
1005
1006
|
langflow/services/database/models/flow/schema.py,sha256=IeisZjKaOUp4EerBr5oAchOFkYrvjG1JDateLpa94Pg,95
|
|
@@ -1128,7 +1129,7 @@ langflow/utils/util_strings.py,sha256=Blz5lwvE7lml7nKCG9vVJ6me5VNmVtYzFXDVPHPK7v
|
|
|
1128
1129
|
langflow/utils/validate.py,sha256=8RnY61LZFCBU1HIlPDCMI3vsXOmK_IFAYBGZIfZJcsU,16362
|
|
1129
1130
|
langflow/utils/version.py,sha256=OjSj0smls9XnPd4-LpTH9AWyUO_NAn5mncqKkkXl_fw,2840
|
|
1130
1131
|
langflow/utils/voice_utils.py,sha256=pzU6uuseI2_5mi-yXzFIjMavVRFyuVrpLmR6LqbF7mE,3346
|
|
1131
|
-
langflow_base_nightly-0.5.0.
|
|
1132
|
-
langflow_base_nightly-0.5.0.
|
|
1133
|
-
langflow_base_nightly-0.5.0.
|
|
1134
|
-
langflow_base_nightly-0.5.0.
|
|
1132
|
+
langflow_base_nightly-0.5.0.dev35.dist-info/METADATA,sha256=eOTE0uH1t1a7TDnxA9KuOxZO5bIO9dptiAqdIG2dDNo,4212
|
|
1133
|
+
langflow_base_nightly-0.5.0.dev35.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
1134
|
+
langflow_base_nightly-0.5.0.dev35.dist-info/entry_points.txt,sha256=JvuLdXSrkeDmDdpb8M-VvFIzb84n4HmqUcIP10_EIF8,57
|
|
1135
|
+
langflow_base_nightly-0.5.0.dev35.dist-info/RECORD,,
|
{langflow_base_nightly-0.5.0.dev34.dist-info → langflow_base_nightly-0.5.0.dev35.dist-info}/WHEEL
RENAMED
|
File without changes
|
|
File without changes
|