kailash 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kailash/nodes/base.py CHANGED
@@ -407,9 +407,9 @@ class Node(ABC):
407
407
  for param_name, param_def in params.items():
408
408
  if param_name not in self.config:
409
409
  if param_def.required and param_def.default is None:
410
- raise NodeConfigurationError(
411
- f"Required parameter '{param_name}' not provided in configuration"
412
- )
410
+ # During node construction, we may not have all parameters yet
411
+ # Skip validation for required parameters - they will be validated at execution time
412
+ continue
413
413
  elif param_def.default is not None:
414
414
  self.config[param_name] = param_def.default
415
415
 
@@ -81,6 +81,8 @@ Example Workflows:
81
81
  """
82
82
 
83
83
  from kailash.nodes.data.directory import DirectoryReaderNode
84
+ from kailash.nodes.data.event_generation import EventGeneratorNode
85
+ from kailash.nodes.data.file_discovery import FileDiscoveryNode
84
86
  from kailash.nodes.data.readers import CSVReaderNode, JSONReaderNode, TextReaderNode
85
87
  from kailash.nodes.data.retrieval import RelevanceScorerNode
86
88
  from kailash.nodes.data.sharepoint_graph import (
@@ -105,6 +107,10 @@ from kailash.nodes.data.writers import CSVWriterNode, JSONWriterNode, TextWriter
105
107
  __all__ = [
106
108
  # Directory
107
109
  "DirectoryReaderNode",
110
+ # Event Generation
111
+ "EventGeneratorNode",
112
+ # File Discovery
113
+ "FileDiscoveryNode",
108
114
  # Readers
109
115
  "CSVReaderNode",
110
116
  "JSONReaderNode",
@@ -0,0 +1,297 @@
1
+ """Event generation nodes for event-driven architectures."""
2
+
3
+ import random
4
+ import uuid
5
+ from datetime import datetime, timezone
6
+ from typing import Any, Dict
7
+
8
+ from kailash.nodes.base import Node, NodeParameter, register_node
9
+
10
+
11
+ @register_node()
12
+ class EventGeneratorNode(Node):
13
+ """
14
+ Generates events for event sourcing and event-driven architecture patterns.
15
+
16
+ This node creates realistic event streams for testing, development, and
17
+ demonstration of event-driven systems. It supports various event types
18
+ and can generate events with proper sequencing, timestamps, and metadata.
19
+
20
+ Design Philosophy:
21
+ Event sourcing requires consistent, well-structured events with proper
22
+ metadata. This node eliminates the need for DataTransformer with embedded
23
+ Python code by providing a dedicated, configurable event generation
24
+ capability.
25
+
26
+ Upstream Dependencies:
27
+ - Optional configuration nodes
28
+ - Timer/scheduler nodes for periodic generation
29
+ - Template nodes for event schemas
30
+
31
+ Downstream Consumers:
32
+ - Event processing nodes
33
+ - Stream aggregation nodes
34
+ - Event store writers
35
+ - Message queue publishers
36
+ - Analytics and monitoring nodes
37
+
38
+ Configuration:
39
+ - Event types and schemas
40
+ - Generation patterns (burst, continuous, scheduled)
41
+ - Data ranges and distributions
42
+ - Metadata templates
43
+
44
+ Implementation Details:
45
+ - Generates proper event IDs and timestamps
46
+ - Maintains event ordering and sequencing
47
+ - Supports custom event schemas
48
+ - Realistic data generation with configurable patterns
49
+ - Proper metadata structure
50
+
51
+ Error Handling:
52
+ - Validates event schemas
53
+ - Handles invalid configurations gracefully
54
+ - Ensures timestamp consistency
55
+ - Validates required fields
56
+
57
+ Side Effects:
58
+ - No external side effects
59
+ - Deterministic with seed parameter
60
+ - Generates new events on each execution
61
+
62
+ Examples:
63
+ >>> # Generate order events
64
+ >>> generator = EventGeneratorNode(
65
+ ... event_types=['OrderCreated', 'PaymentProcessed', 'OrderShipped'],
66
+ ... event_count=10,
67
+ ... aggregate_prefix='ORDER-2024'
68
+ ... )
69
+ >>> result = generator.execute()
70
+ >>> assert len(result['events']) == 10
71
+ >>> assert result['events'][0]['event_type'] in ['OrderCreated', 'PaymentProcessed', 'OrderShipped']
72
+ >>>
73
+ >>> # Generate user events with custom data
74
+ >>> generator = EventGeneratorNode(
75
+ ... event_types=['UserRegistered', 'UserLoggedIn'],
76
+ ... event_count=5,
77
+ ... custom_data_templates={
78
+ ... 'UserRegistered': {'username': 'user_{id}', 'email': '{username}@example.com'},
79
+ ... 'UserLoggedIn': {'ip_address': '192.168.1.{random_ip}', 'device': 'Chrome/Windows'}
80
+ ... }
81
+ ... )
82
+ >>> result = generator.execute()
83
+ >>> assert 'events' in result
84
+ >>> assert result['metadata']['total_events'] == 5
85
+ """
86
+
87
+ def get_parameters(self) -> Dict[str, NodeParameter]:
88
+ return {
89
+ "event_types": NodeParameter(
90
+ name="event_types",
91
+ type=list,
92
+ required=True,
93
+ description="List of event types to generate",
94
+ ),
95
+ "event_count": NodeParameter(
96
+ name="event_count",
97
+ type=int,
98
+ required=False,
99
+ default=10,
100
+ description="Number of events to generate",
101
+ ),
102
+ "aggregate_prefix": NodeParameter(
103
+ name="aggregate_prefix",
104
+ type=str,
105
+ required=False,
106
+ default="AGG",
107
+ description="Prefix for aggregate IDs",
108
+ ),
109
+ "custom_data_templates": NodeParameter(
110
+ name="custom_data_templates",
111
+ type=dict,
112
+ required=False,
113
+ default={},
114
+ description="Custom data templates for each event type",
115
+ ),
116
+ "source_service": NodeParameter(
117
+ name="source_service",
118
+ type=str,
119
+ required=False,
120
+ default="event-generator",
121
+ description="Source service name for metadata",
122
+ ),
123
+ "time_range_hours": NodeParameter(
124
+ name="time_range_hours",
125
+ type=int,
126
+ required=False,
127
+ default=24,
128
+ description="Time range in hours for event timestamps",
129
+ ),
130
+ "seed": NodeParameter(
131
+ name="seed",
132
+ type=int,
133
+ required=False,
134
+ description="Random seed for reproducible generation",
135
+ ),
136
+ }
137
+
138
+ def run(self, **kwargs) -> Dict[str, Any]:
139
+ event_types = kwargs["event_types"]
140
+ event_count = kwargs.get("event_count", 10)
141
+ aggregate_prefix = kwargs.get("aggregate_prefix", "AGG")
142
+ custom_data_templates = kwargs.get("custom_data_templates", {})
143
+ source_service = kwargs.get("source_service", "event-generator")
144
+ time_range_hours = kwargs.get("time_range_hours", 24)
145
+ seed = kwargs.get("seed")
146
+
147
+ if seed is not None:
148
+ random.seed(seed)
149
+
150
+ # Generate events
151
+ events = []
152
+ now = datetime.now(timezone.utc)
153
+
154
+ # Create a set of aggregate IDs for realistic event grouping
155
+ num_aggregates = max(1, event_count // 3) # Roughly 3 events per aggregate
156
+ aggregate_ids = [
157
+ f"{aggregate_prefix}-{i:04d}" for i in range(1, num_aggregates + 1)
158
+ ]
159
+
160
+ for i in range(event_count):
161
+ # Select event type and aggregate
162
+ event_type = random.choice(event_types)
163
+ aggregate_id = random.choice(aggregate_ids)
164
+
165
+ # Generate timestamp within range
166
+ hours_offset = random.uniform(-time_range_hours, 0)
167
+ event_timestamp = now.timestamp() + hours_offset * 3600
168
+ event_time = datetime.fromtimestamp(event_timestamp, tz=timezone.utc)
169
+
170
+ # Generate event data
171
+ event_data = self._generate_event_data(
172
+ event_type, aggregate_id, custom_data_templates.get(event_type, {})
173
+ )
174
+
175
+ # Create event
176
+ event = {
177
+ "event_id": f"evt-{uuid.uuid4().hex[:8]}",
178
+ "event_type": event_type,
179
+ "aggregate_id": aggregate_id,
180
+ "timestamp": event_time.isoformat() + "Z",
181
+ "data": event_data,
182
+ "metadata": {
183
+ "source": source_service,
184
+ "version": 1,
185
+ "correlation_id": f"corr-{uuid.uuid4().hex[:8]}",
186
+ "generated": True,
187
+ },
188
+ }
189
+ events.append(event)
190
+
191
+ # Sort events by timestamp for realistic ordering
192
+ events.sort(key=lambda x: x["timestamp"])
193
+
194
+ # Generate metadata
195
+ metadata = {
196
+ "total_events": len(events),
197
+ "event_types": list(set(e["event_type"] for e in events)),
198
+ "aggregate_count": len(set(e["aggregate_id"] for e in events)),
199
+ "time_range": {
200
+ "start": events[0]["timestamp"] if events else None,
201
+ "end": events[-1]["timestamp"] if events else None,
202
+ },
203
+ "generated_at": now.isoformat() + "Z",
204
+ "source": source_service,
205
+ }
206
+
207
+ return {
208
+ "events": events,
209
+ "metadata": metadata,
210
+ "event_count": len(events),
211
+ "event_types": metadata["event_types"],
212
+ "aggregate_count": metadata["aggregate_count"],
213
+ }
214
+
215
+ def _generate_event_data(
216
+ self, event_type: str, aggregate_id: str, template: Dict[str, Any]
217
+ ) -> Dict[str, Any]:
218
+ """Generate event-specific data based on type and template."""
219
+
220
+ # Default data generators by event type
221
+ default_generators = {
222
+ "OrderCreated": lambda: {
223
+ "customer_id": f"CUST-{random.randint(100, 999)}",
224
+ "total_amount": round(random.uniform(10.0, 1000.0), 2),
225
+ "item_count": random.randint(1, 5),
226
+ "status": "pending",
227
+ "payment_method": random.choice(
228
+ ["credit_card", "debit_card", "paypal"]
229
+ ),
230
+ },
231
+ "PaymentProcessed": lambda: {
232
+ "payment_id": f"PAY-{random.randint(10000, 99999)}",
233
+ "amount": round(random.uniform(10.0, 1000.0), 2),
234
+ "method": random.choice(["credit_card", "debit_card", "paypal"]),
235
+ "status": random.choice(["success", "failed", "pending"]),
236
+ "transaction_id": f"txn-{uuid.uuid4().hex[:12]}",
237
+ },
238
+ "OrderShipped": lambda: {
239
+ "tracking_number": f"TRACK-{random.randint(100000, 999999)}",
240
+ "carrier": random.choice(["UPS", "FedEx", "DHL", "USPS"]),
241
+ "status": "shipped",
242
+ "estimated_delivery": datetime.now(timezone.utc)
243
+ .replace(day=datetime.now().day + random.randint(1, 7))
244
+ .isoformat()
245
+ + "Z",
246
+ },
247
+ "UserRegistered": lambda: {
248
+ "username": f"user_{random.randint(1000, 9999)}",
249
+ "email": f"user_{random.randint(1000, 9999)}@example.com",
250
+ "plan": random.choice(["free", "premium", "enterprise"]),
251
+ "registration_source": random.choice(["web", "mobile", "api"]),
252
+ },
253
+ "UserLoggedIn": lambda: {
254
+ "ip_address": f"192.168.1.{random.randint(1, 254)}",
255
+ "device": random.choice(
256
+ [
257
+ "Chrome/Windows",
258
+ "Safari/macOS",
259
+ "Firefox/Linux",
260
+ "Mobile/iOS",
261
+ "Mobile/Android",
262
+ ]
263
+ ),
264
+ "session_id": f"sess-{uuid.uuid4().hex[:16]}",
265
+ },
266
+ "SubscriptionCreated": lambda: {
267
+ "plan": random.choice(["basic", "premium", "enterprise"]),
268
+ "price": random.choice([9.99, 29.99, 99.99, 199.99]),
269
+ "billing_cycle": random.choice(["monthly", "yearly"]),
270
+ "trial_days": random.choice([0, 7, 14, 30]),
271
+ },
272
+ }
273
+
274
+ # Use template if provided, otherwise use default generator
275
+ if template:
276
+ data = {}
277
+ for key, value_template in template.items():
278
+ if isinstance(value_template, str):
279
+ # Simple string templating
280
+ data[key] = value_template.format(
281
+ id=random.randint(1, 999),
282
+ random_ip=random.randint(1, 254),
283
+ username=f"user_{random.randint(1000, 9999)}",
284
+ aggregate_id=aggregate_id,
285
+ )
286
+ else:
287
+ data[key] = value_template
288
+ return data
289
+ elif event_type in default_generators:
290
+ return default_generators[event_type]()
291
+ else:
292
+ # Generic event data
293
+ return {
294
+ "event_data": f"Generated data for {event_type}",
295
+ "aggregate_id": aggregate_id,
296
+ "timestamp": datetime.now(timezone.utc).isoformat() + "Z",
297
+ }