kailash 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. kailash/__init__.py +31 -0
  2. kailash/__main__.py +11 -0
  3. kailash/cli/__init__.py +5 -0
  4. kailash/cli/commands.py +563 -0
  5. kailash/manifest.py +778 -0
  6. kailash/nodes/__init__.py +23 -0
  7. kailash/nodes/ai/__init__.py +26 -0
  8. kailash/nodes/ai/agents.py +417 -0
  9. kailash/nodes/ai/models.py +488 -0
  10. kailash/nodes/api/__init__.py +52 -0
  11. kailash/nodes/api/auth.py +567 -0
  12. kailash/nodes/api/graphql.py +480 -0
  13. kailash/nodes/api/http.py +598 -0
  14. kailash/nodes/api/rate_limiting.py +572 -0
  15. kailash/nodes/api/rest.py +665 -0
  16. kailash/nodes/base.py +1032 -0
  17. kailash/nodes/base_async.py +128 -0
  18. kailash/nodes/code/__init__.py +32 -0
  19. kailash/nodes/code/python.py +1021 -0
  20. kailash/nodes/data/__init__.py +125 -0
  21. kailash/nodes/data/readers.py +496 -0
  22. kailash/nodes/data/sharepoint_graph.py +623 -0
  23. kailash/nodes/data/sql.py +380 -0
  24. kailash/nodes/data/streaming.py +1168 -0
  25. kailash/nodes/data/vector_db.py +964 -0
  26. kailash/nodes/data/writers.py +529 -0
  27. kailash/nodes/logic/__init__.py +6 -0
  28. kailash/nodes/logic/async_operations.py +702 -0
  29. kailash/nodes/logic/operations.py +551 -0
  30. kailash/nodes/transform/__init__.py +5 -0
  31. kailash/nodes/transform/processors.py +379 -0
  32. kailash/runtime/__init__.py +6 -0
  33. kailash/runtime/async_local.py +356 -0
  34. kailash/runtime/docker.py +697 -0
  35. kailash/runtime/local.py +434 -0
  36. kailash/runtime/parallel.py +557 -0
  37. kailash/runtime/runner.py +110 -0
  38. kailash/runtime/testing.py +347 -0
  39. kailash/sdk_exceptions.py +307 -0
  40. kailash/tracking/__init__.py +7 -0
  41. kailash/tracking/manager.py +885 -0
  42. kailash/tracking/metrics_collector.py +342 -0
  43. kailash/tracking/models.py +535 -0
  44. kailash/tracking/storage/__init__.py +0 -0
  45. kailash/tracking/storage/base.py +113 -0
  46. kailash/tracking/storage/database.py +619 -0
  47. kailash/tracking/storage/filesystem.py +543 -0
  48. kailash/utils/__init__.py +0 -0
  49. kailash/utils/export.py +924 -0
  50. kailash/utils/templates.py +680 -0
  51. kailash/visualization/__init__.py +62 -0
  52. kailash/visualization/api.py +732 -0
  53. kailash/visualization/dashboard.py +951 -0
  54. kailash/visualization/performance.py +808 -0
  55. kailash/visualization/reports.py +1471 -0
  56. kailash/workflow/__init__.py +15 -0
  57. kailash/workflow/builder.py +245 -0
  58. kailash/workflow/graph.py +827 -0
  59. kailash/workflow/mermaid_visualizer.py +628 -0
  60. kailash/workflow/mock_registry.py +63 -0
  61. kailash/workflow/runner.py +302 -0
  62. kailash/workflow/state.py +238 -0
  63. kailash/workflow/visualization.py +588 -0
  64. kailash-0.1.0.dist-info/METADATA +710 -0
  65. kailash-0.1.0.dist-info/RECORD +69 -0
  66. kailash-0.1.0.dist-info/WHEEL +5 -0
  67. kailash-0.1.0.dist-info/entry_points.txt +2 -0
  68. kailash-0.1.0.dist-info/licenses/LICENSE +21 -0
  69. kailash-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,125 @@
1
+ """Data processing nodes for the Kailash SDK.
2
+
3
+ This package provides comprehensive data input/output nodes that serve as the
4
+ primary interface between the Kailash workflow system and external data sources.
5
+ These nodes form the foundation of most workflows by enabling data ingestion,
6
+ persistence, and real-time processing.
7
+
8
+ Module Organization:
9
+ - readers.py: Data source nodes for reading files
10
+ - writers.py: Data sink nodes for writing files
11
+ - sql.py: SQL database interaction nodes
12
+ - vector_db.py: Vector database and embedding nodes
13
+ - streaming.py: Real-time streaming data nodes
14
+
15
+ Design Philosophy:
16
+ 1. Consistent interfaces across data sources
17
+ 2. Type-safe parameter validation
18
+ 3. Memory-efficient processing
19
+ 4. Comprehensive error handling
20
+ 5. Format-specific optimizations
21
+ 6. Real-time and batch processing support
22
+
23
+ Node Categories:
24
+ - Readers: Bring external data into workflows
25
+ - Writers: Persist processed data to files
26
+ - SQL: Interact with relational databases
27
+ - Vector DB: Handle embeddings and similarity search
28
+ - Streaming: Process real-time data streams
29
+
30
+ Usage Patterns:
31
+ 1. ETL pipelines: Read → Transform → Write
32
+ 2. Data processing: Read → Analyze → Export
33
+ 3. RAG pipelines: Text → Embed → Store → Search
34
+ 4. Real-time analytics: Stream → Process → Aggregate
35
+ 5. Database operations: Query → Transform → Insert
36
+
37
+ Integration Points:
38
+ - Upstream: File systems, APIs, databases, streams
39
+ - Downstream: Transform nodes, AI models, analytics
40
+ - Parallel: Other data nodes in workflow
41
+
42
+ Advanced Features:
43
+ - Connection pooling for databases
44
+ - Batch processing for efficiency
45
+ - Real-time streaming support
46
+ - Vector similarity search
47
+ - Event-driven architectures
48
+
49
+ Error Handling:
50
+ All nodes provide detailed error messages for:
51
+ - Connection failures
52
+ - Authentication errors
53
+ - Format/schema issues
54
+ - Rate limiting
55
+ - Resource constraints
56
+
57
+ Example Workflows:
58
+ # Traditional ETL
59
+ workflow = Workflow()
60
+ workflow.add_node('read', CSVReader(file_path='input.csv'))
61
+ workflow.add_node('transform', DataTransform())
62
+ workflow.add_node('write', JSONWriter(file_path='output.json'))
63
+ workflow.connect('read', 'transform')
64
+ workflow.connect('transform', 'write')
65
+
66
+ # RAG Pipeline
67
+ workflow = Workflow()
68
+ workflow.add_node('split', TextSplitterNode())
69
+ workflow.add_node('embed', EmbeddingNode())
70
+ workflow.add_node('store', VectorDatabaseNode())
71
+ workflow.connect('split', 'embed')
72
+ workflow.connect('embed', 'store')
73
+
74
+ # Real-time Processing
75
+ workflow = Workflow()
76
+ workflow.add_node('consume', KafkaConsumerNode())
77
+ workflow.add_node('process', StreamProcessor())
78
+ workflow.add_node('publish', StreamPublisherNode())
79
+ workflow.connect('consume', 'process')
80
+ workflow.connect('process', 'publish')
81
+ """
82
+
83
+ from kailash.nodes.data.readers import CSVReader, JSONReader, TextReader
84
+ from kailash.nodes.data.sharepoint_graph import (
85
+ SharePointGraphReader,
86
+ SharePointGraphWriter,
87
+ )
88
+ from kailash.nodes.data.sql import SQLDatabaseNode, SQLQueryBuilderNode
89
+ from kailash.nodes.data.streaming import (
90
+ EventStreamNode,
91
+ KafkaConsumerNode,
92
+ StreamPublisherNode,
93
+ WebSocketNode,
94
+ )
95
+ from kailash.nodes.data.vector_db import (
96
+ EmbeddingNode,
97
+ TextSplitterNode,
98
+ VectorDatabaseNode,
99
+ )
100
+ from kailash.nodes.data.writers import CSVWriter, JSONWriter, TextWriter
101
+
102
+ __all__ = [
103
+ # Readers
104
+ "CSVReader",
105
+ "JSONReader",
106
+ "TextReader",
107
+ "SharePointGraphReader",
108
+ # Writers
109
+ "CSVWriter",
110
+ "JSONWriter",
111
+ "TextWriter",
112
+ "SharePointGraphWriter",
113
+ # SQL
114
+ "SQLDatabaseNode",
115
+ "SQLQueryBuilderNode",
116
+ # Vector DB
117
+ "EmbeddingNode",
118
+ "VectorDatabaseNode",
119
+ "TextSplitterNode",
120
+ # Streaming
121
+ "KafkaConsumerNode",
122
+ "StreamPublisherNode",
123
+ "WebSocketNode",
124
+ "EventStreamNode",
125
+ ]
@@ -0,0 +1,496 @@
1
+ """Data reader nodes for the Kailash SDK.
2
+
3
+ This module provides node implementations for reading data from various file formats.
4
+ These nodes serve as data sources in workflows, bringing external data into the
5
+ Kailash processing pipeline.
6
+
7
+ Design Philosophy:
8
+ 1. Unified interface for different file formats
9
+ 2. Consistent output format (always returns {"data": ...})
10
+ 3. Robust error handling for file operations
11
+ 4. Memory-efficient processing where possible
12
+ 5. Type-safe parameter validation
13
+
14
+ Node Categories:
15
+ - CSVReader: Tabular data from CSV files
16
+ - JSONReader: Structured data from JSON files
17
+ - TextReader: Raw text from any text file
18
+
19
+ Upstream Components:
20
+ - FileSystem: Provides files to read
21
+ - Workflow: Creates and configures reader nodes
22
+ - User Input: Specifies file paths and options
23
+
24
+ Downstream Consumers:
25
+ - Transform nodes: Process the loaded data
26
+ - Writer nodes: Export data to different formats
27
+ - Logic nodes: Make decisions based on data
28
+ - AI nodes: Use data for model input
29
+ """
30
+
31
+ import csv
32
+ import json
33
+ from typing import Any, Dict
34
+
35
+ from kailash.nodes.base import Node, NodeParameter, register_node
36
+
37
+
38
+ @register_node()
39
+ class CSVReader(Node):
40
+ """Reads data from a CSV file.
41
+
42
+ This node provides robust CSV file reading capabilities with support for
43
+ various delimiters, header detection, and encoding options. It's designed
44
+ to handle common CSV formats and edge cases.
45
+
46
+ Design Features:
47
+ 1. Automatic header detection
48
+ 2. Configurable delimiters
49
+ 3. Memory-efficient line-by-line reading
50
+ 4. Consistent dictionary output format
51
+ 5. Unicode support through encoding parameter
52
+
53
+ Data Flow:
54
+ - Input: File path and configuration parameters
55
+ - Processing: Reads CSV line by line, converting to dictionaries
56
+ - Output: List of dictionaries (with headers) or list of lists
57
+
58
+ Common Usage Patterns:
59
+ 1. Reading data exports from databases
60
+ 2. Processing spreadsheet data
61
+ 3. Loading configuration from CSV
62
+ 4. Ingesting sensor data logs
63
+
64
+ Upstream Sources:
65
+ - File system paths from user input
66
+ - Output paths from previous nodes
67
+ - Configuration management systems
68
+
69
+ Downstream Consumers:
70
+ - DataTransformer: Processes tabular data
71
+ - Aggregator: Summarizes data
72
+ - CSVWriter: Reformats and saves
73
+ - Visualizer: Creates charts from data
74
+
75
+ Error Handling:
76
+ - FileNotFoundError: Invalid file path
77
+ - PermissionError: Insufficient read permissions
78
+ - UnicodeDecodeError: Encoding mismatch
79
+ - csv.Error: Malformed CSV data
80
+
81
+ Example:
82
+ # Read customer data with headers
83
+ reader = CSVReader(
84
+ file_path='customers.csv',
85
+ headers=True,
86
+ delimiter=','
87
+ )
88
+ result = reader.execute()
89
+ # result['data'] = [
90
+ # {'id': '1', 'name': 'John', 'age': '30'},
91
+ # {'id': '2', 'name': 'Jane', 'age': '25'}
92
+ # ]
93
+ """
94
+
95
+ def get_parameters(self) -> Dict[str, NodeParameter]:
96
+ """Define input parameters for CSV reading.
97
+
98
+ This method specifies the configuration options for reading CSV files,
99
+ providing flexibility while maintaining sensible defaults.
100
+
101
+ Parameter Design:
102
+ 1. file_path: Required for locating the data source
103
+ 2. headers: Optional with smart default (True)
104
+ 3. delimiter: Optional with standard default (',')
105
+ 4. index_column: Optional column to use as dictionary key
106
+
107
+ The parameters are designed to handle common CSV variants while
108
+ keeping the interface simple for typical use cases.
109
+
110
+ Returns:
111
+ Dictionary of parameter definitions used by:
112
+ - Input validation during execution
113
+ - UI generation for configuration
114
+ - Workflow validation for connections
115
+ - Documentation and help systems
116
+ """
117
+ return {
118
+ "file_path": NodeParameter(
119
+ name="file_path",
120
+ type=str,
121
+ required=True,
122
+ description="Path to the CSV file to read",
123
+ ),
124
+ "headers": NodeParameter(
125
+ name="headers",
126
+ type=bool,
127
+ required=False,
128
+ default=True,
129
+ description="Whether the CSV has headers",
130
+ ),
131
+ "delimiter": NodeParameter(
132
+ name="delimiter",
133
+ type=str,
134
+ required=False,
135
+ default=",",
136
+ description="CSV delimiter character",
137
+ ),
138
+ "index_column": NodeParameter(
139
+ name="index_column",
140
+ type=str,
141
+ required=False,
142
+ description="Column to use as index for creating a dictionary",
143
+ ),
144
+ }
145
+
146
+ def run(self, **kwargs) -> Dict[str, Any]:
147
+ """Execute CSV reading operation.
148
+
149
+ This method performs the actual file reading, handling both headerless
150
+ and header-based CSV formats. It uses Python's csv module for robust
151
+ parsing of various CSV dialects.
152
+
153
+ Processing Steps:
154
+ 1. Opens file with UTF-8 encoding (standard)
155
+ 2. Creates csv.reader with specified delimiter
156
+ 3. Processes headers if present
157
+ 4. Converts rows to appropriate format
158
+ 5. Returns standardized output
159
+
160
+ Memory Considerations:
161
+ - Loads entire file into memory
162
+ - Suitable for files up to ~100MB
163
+ - For larger files, consider streaming approach
164
+
165
+ Output Format:
166
+ - With headers: List of dictionaries
167
+ - Without headers: List of lists
168
+ - With index_column: Also returns dictionary indexed by the column
169
+ - Always wrapped in {"data": ...} for consistency
170
+
171
+ Args:
172
+ **kwargs: Validated parameters including:
173
+ - file_path: Path to CSV file
174
+ - headers: Whether to treat first row as headers
175
+ - delimiter: Character separating values
176
+ - index_column: Column to use as key for indexed dictionary
177
+
178
+ Returns:
179
+ Dictionary with:
180
+ - 'data' key containing list of dicts or lists
181
+ - 'data_indexed' key (if index_column provided) containing dict
182
+
183
+ Raises:
184
+ FileNotFoundError: If file doesn't exist
185
+ PermissionError: If file can't be read
186
+ UnicodeDecodeError: If encoding is wrong
187
+ KeyError: If index_column doesn't exist in headers
188
+
189
+ Downstream usage:
190
+ - Transform nodes expect consistent data structure
191
+ - Writers can directly output the data
192
+ - Analyzers can process row-by-row
193
+ - data_indexed is useful for lookups and joins
194
+ """
195
+ file_path = kwargs["file_path"]
196
+ headers = kwargs.get("headers", True)
197
+ delimiter = kwargs.get("delimiter", ",")
198
+ index_column = kwargs.get("index_column")
199
+
200
+ data = []
201
+ data_indexed = {}
202
+
203
+ with open(file_path, "r", encoding="utf-8") as f:
204
+ reader = csv.reader(f, delimiter=delimiter)
205
+
206
+ if headers:
207
+ header_row = next(reader)
208
+
209
+ # Verify index_column exists if specified
210
+ if index_column and index_column not in header_row:
211
+ raise KeyError(
212
+ f"Index column '{index_column}' not found in headers: {header_row}"
213
+ )
214
+
215
+ index_pos = header_row.index(index_column) if index_column else None
216
+
217
+ for row in reader:
218
+ row_dict = dict(zip(header_row, row))
219
+ data.append(row_dict)
220
+
221
+ # If index column specified, add to indexed dictionary
222
+ if index_column and index_pos < len(row):
223
+ key = row[index_pos]
224
+ data_indexed[key] = row_dict
225
+ else:
226
+ for row in reader:
227
+ data.append(row)
228
+
229
+ result = {"data": data}
230
+ if index_column:
231
+ result["data_indexed"] = data_indexed
232
+
233
+ return result
234
+
235
+
236
+ @register_node()
237
+ class JSONReader(Node):
238
+ """Reads data from a JSON file.
239
+
240
+ This node handles JSON file reading with support for complex nested
241
+ structures, arrays, and objects. It preserves the original JSON
242
+ structure while ensuring compatibility with downstream nodes.
243
+
244
+ Design Features:
245
+ 1. Preserves JSON structure integrity
246
+ 2. Handles nested objects and arrays
247
+ 3. Unicode-safe reading
248
+ 4. Automatic type preservation
249
+ 5. Memory-efficient for reasonable file sizes
250
+
251
+ Data Flow:
252
+ - Input: JSON file path
253
+ - Processing: Parse JSON maintaining structure
254
+ - Output: Python objects matching JSON structure
255
+
256
+ Common Usage Patterns:
257
+ 1. Loading configuration files
258
+ 2. Reading API response caches
259
+ 3. Processing structured data exports
260
+ 4. Loading machine learning datasets
261
+
262
+ Upstream Sources:
263
+ - API response saves
264
+ - Configuration management
265
+ - Data export systems
266
+ - Previous JSONWriter outputs
267
+
268
+ Downstream Consumers:
269
+ - Transform nodes: Process structured data
270
+ - Logic nodes: Navigate JSON structure
271
+ - JSONWriter: Re-export with modifications
272
+ - AI nodes: Use as structured input
273
+
274
+ Error Handling:
275
+ - FileNotFoundError: Missing file
276
+ - json.JSONDecodeError: Invalid JSON syntax
277
+ - PermissionError: Access denied
278
+ - MemoryError: File too large
279
+
280
+ Example:
281
+ # Read API response data
282
+ reader = JSONReader(file_path='api_response.json')
283
+ result = reader.execute()
284
+ # result['data'] = {
285
+ # 'status': 'success',
286
+ # 'items': [{'id': 1, 'name': 'Item1'}],
287
+ # 'metadata': {'version': '1.0'}
288
+ # }
289
+ """
290
+
291
+ def get_parameters(self) -> Dict[str, NodeParameter]:
292
+ """Define input parameters for JSON reading.
293
+
294
+ Simple parameter definition reflecting JSON's self-describing nature.
295
+ Unlike CSV, JSON files don't require format configuration.
296
+
297
+ Design Choice:
298
+ - Single required parameter for simplicity
299
+ - No encoding parameter (UTF-8 standard for JSON)
300
+ - No structure hints needed (self-describing format)
301
+
302
+ Returns:
303
+ Dictionary with single file_path parameter
304
+ """
305
+ return {
306
+ "file_path": NodeParameter(
307
+ name="file_path",
308
+ type=str,
309
+ required=True,
310
+ description="Path to the JSON file to read",
311
+ )
312
+ }
313
+
314
+ def run(self, **kwargs) -> Dict[str, Any]:
315
+ """Execute JSON reading operation.
316
+
317
+ Reads and parses JSON file, preserving the original structure
318
+ and types. The json.load() function handles the parsing and
319
+ type conversion automatically.
320
+
321
+ Processing Steps:
322
+ 1. Opens file with UTF-8 encoding
323
+ 2. Parses JSON to Python objects
324
+ 3. Preserves structure (objects→dicts, arrays→lists)
325
+ 4. Returns wrapped in standard format
326
+
327
+ Type Mappings:
328
+ - JSON objects → Python dicts
329
+ - JSON arrays → Python lists
330
+ - JSON strings → Python strings
331
+ - JSON numbers → Python int/float
332
+ - JSON booleans → Python bool
333
+ - JSON null → Python None
334
+
335
+ Args:
336
+ **kwargs: Validated parameters including:
337
+ - file_path: Path to JSON file
338
+
339
+ Returns:
340
+ Dictionary with 'data' key containing the parsed JSON
341
+
342
+ Raises:
343
+ FileNotFoundError: If file doesn't exist
344
+ json.JSONDecodeError: If JSON is malformed
345
+ PermissionError: If file can't be read
346
+
347
+ Downstream usage:
348
+ - Structure can be directly navigated
349
+ - Compatible with JSONWriter for round-trip
350
+ - Transform nodes can process nested data
351
+ """
352
+ file_path = kwargs["file_path"]
353
+
354
+ with open(file_path, "r", encoding="utf-8") as f:
355
+ data = json.load(f)
356
+
357
+ return {"data": data}
358
+
359
+
360
+ @register_node()
361
+ class TextReader(Node):
362
+ """Reads text from a file.
363
+
364
+ This node provides simple text file reading with encoding support.
365
+ It's designed for processing plain text files, logs, documents,
366
+ and any text-based format not handled by specialized readers.
367
+
368
+ Design Features:
369
+ 1. Flexible encoding support
370
+ 2. Reads entire file as single string
371
+ 3. Preserves line endings and whitespace
372
+ 4. Handles various text encodings
373
+ 5. Simple, predictable output format
374
+
375
+ Data Flow:
376
+ - Input: File path and encoding
377
+ - Processing: Read entire file as text
378
+ - Output: Single text string
379
+
380
+ Common Usage Patterns:
381
+ 1. Reading log files
382
+ 2. Processing documentation
383
+ 3. Loading templates
384
+ 4. Reading configuration files
385
+ 5. Processing natural language data
386
+
387
+ Upstream Sources:
388
+ - Log file generators
389
+ - Document management systems
390
+ - Template repositories
391
+ - Previous TextWriter outputs
392
+
393
+ Downstream Consumers:
394
+ - NLP processors: Analyze text content
395
+ - Pattern matchers: Search for patterns
396
+ - TextWriter: Save processed text
397
+ - AI models: Process natural language
398
+
399
+ Error Handling:
400
+ - FileNotFoundError: Missing file
401
+ - PermissionError: Access denied
402
+ - UnicodeDecodeError: Wrong encoding
403
+ - MemoryError: File too large
404
+
405
+ Example:
406
+ # Read a log file
407
+ reader = TextReader(
408
+ file_path='application.log',
409
+ encoding='utf-8'
410
+ )
411
+ result = reader.execute()
412
+ # result['text'] = "2024-01-01 INFO: Application started\\n..."
413
+ """
414
+
415
+ def get_parameters(self) -> Dict[str, NodeParameter]:
416
+ """Define input parameters for text reading.
417
+
418
+ Provides essential parameters for text file reading with
419
+ encoding flexibility to handle international text.
420
+
421
+ Parameter Design:
422
+ 1. file_path: Required for file location
423
+ 2. encoding: Optional with UTF-8 default
424
+
425
+ The encoding parameter is crucial for:
426
+ - International text support
427
+ - Legacy system compatibility
428
+ - Log file processing
429
+ - Cross-platform text handling
430
+
431
+ Returns:
432
+ Dictionary of parameter definitions
433
+ """
434
+ return {
435
+ "file_path": NodeParameter(
436
+ name="file_path",
437
+ type=str,
438
+ required=True,
439
+ description="Path to the text file to read",
440
+ ),
441
+ "encoding": NodeParameter(
442
+ name="encoding",
443
+ type=str,
444
+ required=False,
445
+ default="utf-8",
446
+ description="File encoding",
447
+ ),
448
+ }
449
+
450
+ def run(self, **kwargs) -> Dict[str, Any]:
451
+ """Execute text reading operation.
452
+
453
+ Reads entire text file into memory as a single string,
454
+ preserving all formatting, line endings, and whitespace.
455
+
456
+ Processing Steps:
457
+ 1. Opens file with specified encoding
458
+ 2. Reads entire content as string
459
+ 3. Preserves original formatting
460
+ 4. Returns in standard format
461
+
462
+ Memory Considerations:
463
+ - Loads entire file into memory
464
+ - Suitable for files up to ~10MB
465
+ - Large files may need streaming approach
466
+
467
+ Output Note:
468
+ - Returns {"text": ...} not {"data": ...}
469
+ - Different from CSV/JSON readers for clarity
470
+ - Text is unprocessed, raw content
471
+
472
+ Args:
473
+ **kwargs: Validated parameters including:
474
+ - file_path: Path to text file
475
+ - encoding: Character encoding
476
+
477
+ Returns:
478
+ Dictionary with 'text' key containing file content
479
+
480
+ Raises:
481
+ FileNotFoundError: If file doesn't exist
482
+ UnicodeDecodeError: If encoding is incorrect
483
+ PermissionError: If file can't be read
484
+
485
+ Downstream usage:
486
+ - NLP nodes can tokenize/analyze
487
+ - Pattern nodes can search content
488
+ - Writers can save processed text
489
+ """
490
+ file_path = kwargs["file_path"]
491
+ encoding = kwargs.get("encoding", "utf-8")
492
+
493
+ with open(file_path, "r", encoding=encoding) as f:
494
+ text = f.read()
495
+
496
+ return {"text": text}