kailash 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +1 -1
- kailash/nodes/api/__init__.py +5 -0
- kailash/nodes/api/monitoring.py +463 -0
- kailash/nodes/api/security.py +822 -0
- kailash/nodes/base.py +3 -3
- kailash/nodes/data/__init__.py +6 -0
- kailash/nodes/data/event_generation.py +297 -0
- kailash/nodes/data/file_discovery.py +601 -0
- kailash/nodes/transform/processors.py +1 -1
- kailash/runtime/async_local.py +1 -1
- kailash/runtime/docker.py +4 -4
- kailash/runtime/local.py +39 -15
- kailash/runtime/parallel.py +2 -2
- kailash/runtime/parallel_cyclic.py +2 -2
- kailash/runtime/testing.py +2 -2
- kailash/utils/templates.py +6 -6
- kailash/visualization/performance.py +16 -3
- kailash/visualization/reports.py +5 -1
- kailash/workflow/cycle_analyzer.py +8 -1
- kailash/workflow/cyclic_runner.py +1 -1
- kailash/workflow/graph.py +18 -6
- kailash/workflow/visualization.py +10 -2
- kailash-0.3.0.dist-info/METADATA +428 -0
- {kailash-0.2.2.dist-info → kailash-0.3.0.dist-info}/RECORD +28 -24
- kailash-0.2.2.dist-info/METADATA +0 -121
- {kailash-0.2.2.dist-info → kailash-0.3.0.dist-info}/WHEEL +0 -0
- {kailash-0.2.2.dist-info → kailash-0.3.0.dist-info}/entry_points.txt +0 -0
- {kailash-0.2.2.dist-info → kailash-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.2.2.dist-info → kailash-0.3.0.dist-info}/top_level.txt +0 -0
kailash/nodes/base.py
CHANGED
@@ -407,9 +407,9 @@ class Node(ABC):
|
|
407
407
|
for param_name, param_def in params.items():
|
408
408
|
if param_name not in self.config:
|
409
409
|
if param_def.required and param_def.default is None:
|
410
|
-
|
411
|
-
|
412
|
-
|
410
|
+
# During node construction, we may not have all parameters yet
|
411
|
+
# Skip validation for required parameters - they will be validated at execution time
|
412
|
+
continue
|
413
413
|
elif param_def.default is not None:
|
414
414
|
self.config[param_name] = param_def.default
|
415
415
|
|
kailash/nodes/data/__init__.py
CHANGED
@@ -81,6 +81,8 @@ Example Workflows:
|
|
81
81
|
"""
|
82
82
|
|
83
83
|
from kailash.nodes.data.directory import DirectoryReaderNode
|
84
|
+
from kailash.nodes.data.event_generation import EventGeneratorNode
|
85
|
+
from kailash.nodes.data.file_discovery import FileDiscoveryNode
|
84
86
|
from kailash.nodes.data.readers import CSVReaderNode, JSONReaderNode, TextReaderNode
|
85
87
|
from kailash.nodes.data.retrieval import RelevanceScorerNode
|
86
88
|
from kailash.nodes.data.sharepoint_graph import (
|
@@ -105,6 +107,10 @@ from kailash.nodes.data.writers import CSVWriterNode, JSONWriterNode, TextWriter
|
|
105
107
|
__all__ = [
|
106
108
|
# Directory
|
107
109
|
"DirectoryReaderNode",
|
110
|
+
# Event Generation
|
111
|
+
"EventGeneratorNode",
|
112
|
+
# File Discovery
|
113
|
+
"FileDiscoveryNode",
|
108
114
|
# Readers
|
109
115
|
"CSVReaderNode",
|
110
116
|
"JSONReaderNode",
|
@@ -0,0 +1,297 @@
|
|
1
|
+
"""Event generation nodes for event-driven architectures."""
|
2
|
+
|
3
|
+
import random
|
4
|
+
import uuid
|
5
|
+
from datetime import datetime, timezone
|
6
|
+
from typing import Any, Dict
|
7
|
+
|
8
|
+
from kailash.nodes.base import Node, NodeParameter, register_node
|
9
|
+
|
10
|
+
|
11
|
+
@register_node()
|
12
|
+
class EventGeneratorNode(Node):
|
13
|
+
"""
|
14
|
+
Generates events for event sourcing and event-driven architecture patterns.
|
15
|
+
|
16
|
+
This node creates realistic event streams for testing, development, and
|
17
|
+
demonstration of event-driven systems. It supports various event types
|
18
|
+
and can generate events with proper sequencing, timestamps, and metadata.
|
19
|
+
|
20
|
+
Design Philosophy:
|
21
|
+
Event sourcing requires consistent, well-structured events with proper
|
22
|
+
metadata. This node eliminates the need for DataTransformer with embedded
|
23
|
+
Python code by providing a dedicated, configurable event generation
|
24
|
+
capability.
|
25
|
+
|
26
|
+
Upstream Dependencies:
|
27
|
+
- Optional configuration nodes
|
28
|
+
- Timer/scheduler nodes for periodic generation
|
29
|
+
- Template nodes for event schemas
|
30
|
+
|
31
|
+
Downstream Consumers:
|
32
|
+
- Event processing nodes
|
33
|
+
- Stream aggregation nodes
|
34
|
+
- Event store writers
|
35
|
+
- Message queue publishers
|
36
|
+
- Analytics and monitoring nodes
|
37
|
+
|
38
|
+
Configuration:
|
39
|
+
- Event types and schemas
|
40
|
+
- Generation patterns (burst, continuous, scheduled)
|
41
|
+
- Data ranges and distributions
|
42
|
+
- Metadata templates
|
43
|
+
|
44
|
+
Implementation Details:
|
45
|
+
- Generates proper event IDs and timestamps
|
46
|
+
- Maintains event ordering and sequencing
|
47
|
+
- Supports custom event schemas
|
48
|
+
- Realistic data generation with configurable patterns
|
49
|
+
- Proper metadata structure
|
50
|
+
|
51
|
+
Error Handling:
|
52
|
+
- Validates event schemas
|
53
|
+
- Handles invalid configurations gracefully
|
54
|
+
- Ensures timestamp consistency
|
55
|
+
- Validates required fields
|
56
|
+
|
57
|
+
Side Effects:
|
58
|
+
- No external side effects
|
59
|
+
- Deterministic with seed parameter
|
60
|
+
- Generates new events on each execution
|
61
|
+
|
62
|
+
Examples:
|
63
|
+
>>> # Generate order events
|
64
|
+
>>> generator = EventGeneratorNode(
|
65
|
+
... event_types=['OrderCreated', 'PaymentProcessed', 'OrderShipped'],
|
66
|
+
... event_count=10,
|
67
|
+
... aggregate_prefix='ORDER-2024'
|
68
|
+
... )
|
69
|
+
>>> result = generator.execute()
|
70
|
+
>>> assert len(result['events']) == 10
|
71
|
+
>>> assert result['events'][0]['event_type'] in ['OrderCreated', 'PaymentProcessed', 'OrderShipped']
|
72
|
+
>>>
|
73
|
+
>>> # Generate user events with custom data
|
74
|
+
>>> generator = EventGeneratorNode(
|
75
|
+
... event_types=['UserRegistered', 'UserLoggedIn'],
|
76
|
+
... event_count=5,
|
77
|
+
... custom_data_templates={
|
78
|
+
... 'UserRegistered': {'username': 'user_{id}', 'email': '{username}@example.com'},
|
79
|
+
... 'UserLoggedIn': {'ip_address': '192.168.1.{random_ip}', 'device': 'Chrome/Windows'}
|
80
|
+
... }
|
81
|
+
... )
|
82
|
+
>>> result = generator.execute()
|
83
|
+
>>> assert 'events' in result
|
84
|
+
>>> assert result['metadata']['total_events'] == 5
|
85
|
+
"""
|
86
|
+
|
87
|
+
def get_parameters(self) -> Dict[str, NodeParameter]:
|
88
|
+
return {
|
89
|
+
"event_types": NodeParameter(
|
90
|
+
name="event_types",
|
91
|
+
type=list,
|
92
|
+
required=True,
|
93
|
+
description="List of event types to generate",
|
94
|
+
),
|
95
|
+
"event_count": NodeParameter(
|
96
|
+
name="event_count",
|
97
|
+
type=int,
|
98
|
+
required=False,
|
99
|
+
default=10,
|
100
|
+
description="Number of events to generate",
|
101
|
+
),
|
102
|
+
"aggregate_prefix": NodeParameter(
|
103
|
+
name="aggregate_prefix",
|
104
|
+
type=str,
|
105
|
+
required=False,
|
106
|
+
default="AGG",
|
107
|
+
description="Prefix for aggregate IDs",
|
108
|
+
),
|
109
|
+
"custom_data_templates": NodeParameter(
|
110
|
+
name="custom_data_templates",
|
111
|
+
type=dict,
|
112
|
+
required=False,
|
113
|
+
default={},
|
114
|
+
description="Custom data templates for each event type",
|
115
|
+
),
|
116
|
+
"source_service": NodeParameter(
|
117
|
+
name="source_service",
|
118
|
+
type=str,
|
119
|
+
required=False,
|
120
|
+
default="event-generator",
|
121
|
+
description="Source service name for metadata",
|
122
|
+
),
|
123
|
+
"time_range_hours": NodeParameter(
|
124
|
+
name="time_range_hours",
|
125
|
+
type=int,
|
126
|
+
required=False,
|
127
|
+
default=24,
|
128
|
+
description="Time range in hours for event timestamps",
|
129
|
+
),
|
130
|
+
"seed": NodeParameter(
|
131
|
+
name="seed",
|
132
|
+
type=int,
|
133
|
+
required=False,
|
134
|
+
description="Random seed for reproducible generation",
|
135
|
+
),
|
136
|
+
}
|
137
|
+
|
138
|
+
def run(self, **kwargs) -> Dict[str, Any]:
|
139
|
+
event_types = kwargs["event_types"]
|
140
|
+
event_count = kwargs.get("event_count", 10)
|
141
|
+
aggregate_prefix = kwargs.get("aggregate_prefix", "AGG")
|
142
|
+
custom_data_templates = kwargs.get("custom_data_templates", {})
|
143
|
+
source_service = kwargs.get("source_service", "event-generator")
|
144
|
+
time_range_hours = kwargs.get("time_range_hours", 24)
|
145
|
+
seed = kwargs.get("seed")
|
146
|
+
|
147
|
+
if seed is not None:
|
148
|
+
random.seed(seed)
|
149
|
+
|
150
|
+
# Generate events
|
151
|
+
events = []
|
152
|
+
now = datetime.now(timezone.utc)
|
153
|
+
|
154
|
+
# Create a set of aggregate IDs for realistic event grouping
|
155
|
+
num_aggregates = max(1, event_count // 3) # Roughly 3 events per aggregate
|
156
|
+
aggregate_ids = [
|
157
|
+
f"{aggregate_prefix}-{i:04d}" for i in range(1, num_aggregates + 1)
|
158
|
+
]
|
159
|
+
|
160
|
+
for i in range(event_count):
|
161
|
+
# Select event type and aggregate
|
162
|
+
event_type = random.choice(event_types)
|
163
|
+
aggregate_id = random.choice(aggregate_ids)
|
164
|
+
|
165
|
+
# Generate timestamp within range
|
166
|
+
hours_offset = random.uniform(-time_range_hours, 0)
|
167
|
+
event_timestamp = now.timestamp() + hours_offset * 3600
|
168
|
+
event_time = datetime.fromtimestamp(event_timestamp, tz=timezone.utc)
|
169
|
+
|
170
|
+
# Generate event data
|
171
|
+
event_data = self._generate_event_data(
|
172
|
+
event_type, aggregate_id, custom_data_templates.get(event_type, {})
|
173
|
+
)
|
174
|
+
|
175
|
+
# Create event
|
176
|
+
event = {
|
177
|
+
"event_id": f"evt-{uuid.uuid4().hex[:8]}",
|
178
|
+
"event_type": event_type,
|
179
|
+
"aggregate_id": aggregate_id,
|
180
|
+
"timestamp": event_time.isoformat() + "Z",
|
181
|
+
"data": event_data,
|
182
|
+
"metadata": {
|
183
|
+
"source": source_service,
|
184
|
+
"version": 1,
|
185
|
+
"correlation_id": f"corr-{uuid.uuid4().hex[:8]}",
|
186
|
+
"generated": True,
|
187
|
+
},
|
188
|
+
}
|
189
|
+
events.append(event)
|
190
|
+
|
191
|
+
# Sort events by timestamp for realistic ordering
|
192
|
+
events.sort(key=lambda x: x["timestamp"])
|
193
|
+
|
194
|
+
# Generate metadata
|
195
|
+
metadata = {
|
196
|
+
"total_events": len(events),
|
197
|
+
"event_types": list(set(e["event_type"] for e in events)),
|
198
|
+
"aggregate_count": len(set(e["aggregate_id"] for e in events)),
|
199
|
+
"time_range": {
|
200
|
+
"start": events[0]["timestamp"] if events else None,
|
201
|
+
"end": events[-1]["timestamp"] if events else None,
|
202
|
+
},
|
203
|
+
"generated_at": now.isoformat() + "Z",
|
204
|
+
"source": source_service,
|
205
|
+
}
|
206
|
+
|
207
|
+
return {
|
208
|
+
"events": events,
|
209
|
+
"metadata": metadata,
|
210
|
+
"event_count": len(events),
|
211
|
+
"event_types": metadata["event_types"],
|
212
|
+
"aggregate_count": metadata["aggregate_count"],
|
213
|
+
}
|
214
|
+
|
215
|
+
def _generate_event_data(
|
216
|
+
self, event_type: str, aggregate_id: str, template: Dict[str, Any]
|
217
|
+
) -> Dict[str, Any]:
|
218
|
+
"""Generate event-specific data based on type and template."""
|
219
|
+
|
220
|
+
# Default data generators by event type
|
221
|
+
default_generators = {
|
222
|
+
"OrderCreated": lambda: {
|
223
|
+
"customer_id": f"CUST-{random.randint(100, 999)}",
|
224
|
+
"total_amount": round(random.uniform(10.0, 1000.0), 2),
|
225
|
+
"item_count": random.randint(1, 5),
|
226
|
+
"status": "pending",
|
227
|
+
"payment_method": random.choice(
|
228
|
+
["credit_card", "debit_card", "paypal"]
|
229
|
+
),
|
230
|
+
},
|
231
|
+
"PaymentProcessed": lambda: {
|
232
|
+
"payment_id": f"PAY-{random.randint(10000, 99999)}",
|
233
|
+
"amount": round(random.uniform(10.0, 1000.0), 2),
|
234
|
+
"method": random.choice(["credit_card", "debit_card", "paypal"]),
|
235
|
+
"status": random.choice(["success", "failed", "pending"]),
|
236
|
+
"transaction_id": f"txn-{uuid.uuid4().hex[:12]}",
|
237
|
+
},
|
238
|
+
"OrderShipped": lambda: {
|
239
|
+
"tracking_number": f"TRACK-{random.randint(100000, 999999)}",
|
240
|
+
"carrier": random.choice(["UPS", "FedEx", "DHL", "USPS"]),
|
241
|
+
"status": "shipped",
|
242
|
+
"estimated_delivery": datetime.now(timezone.utc)
|
243
|
+
.replace(day=datetime.now().day + random.randint(1, 7))
|
244
|
+
.isoformat()
|
245
|
+
+ "Z",
|
246
|
+
},
|
247
|
+
"UserRegistered": lambda: {
|
248
|
+
"username": f"user_{random.randint(1000, 9999)}",
|
249
|
+
"email": f"user_{random.randint(1000, 9999)}@example.com",
|
250
|
+
"plan": random.choice(["free", "premium", "enterprise"]),
|
251
|
+
"registration_source": random.choice(["web", "mobile", "api"]),
|
252
|
+
},
|
253
|
+
"UserLoggedIn": lambda: {
|
254
|
+
"ip_address": f"192.168.1.{random.randint(1, 254)}",
|
255
|
+
"device": random.choice(
|
256
|
+
[
|
257
|
+
"Chrome/Windows",
|
258
|
+
"Safari/macOS",
|
259
|
+
"Firefox/Linux",
|
260
|
+
"Mobile/iOS",
|
261
|
+
"Mobile/Android",
|
262
|
+
]
|
263
|
+
),
|
264
|
+
"session_id": f"sess-{uuid.uuid4().hex[:16]}",
|
265
|
+
},
|
266
|
+
"SubscriptionCreated": lambda: {
|
267
|
+
"plan": random.choice(["basic", "premium", "enterprise"]),
|
268
|
+
"price": random.choice([9.99, 29.99, 99.99, 199.99]),
|
269
|
+
"billing_cycle": random.choice(["monthly", "yearly"]),
|
270
|
+
"trial_days": random.choice([0, 7, 14, 30]),
|
271
|
+
},
|
272
|
+
}
|
273
|
+
|
274
|
+
# Use template if provided, otherwise use default generator
|
275
|
+
if template:
|
276
|
+
data = {}
|
277
|
+
for key, value_template in template.items():
|
278
|
+
if isinstance(value_template, str):
|
279
|
+
# Simple string templating
|
280
|
+
data[key] = value_template.format(
|
281
|
+
id=random.randint(1, 999),
|
282
|
+
random_ip=random.randint(1, 254),
|
283
|
+
username=f"user_{random.randint(1000, 9999)}",
|
284
|
+
aggregate_id=aggregate_id,
|
285
|
+
)
|
286
|
+
else:
|
287
|
+
data[key] = value_template
|
288
|
+
return data
|
289
|
+
elif event_type in default_generators:
|
290
|
+
return default_generators[event_type]()
|
291
|
+
else:
|
292
|
+
# Generic event data
|
293
|
+
return {
|
294
|
+
"event_data": f"Generated data for {event_type}",
|
295
|
+
"aggregate_id": aggregate_id,
|
296
|
+
"timestamp": datetime.now(timezone.utc).isoformat() + "Z",
|
297
|
+
}
|