flow_nodes 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,603 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../lib/flow_nodes"
4
+ require "json"
5
+ require "securerandom"
6
+
7
+ # Example: Multi-LLM Content Processing Pipeline
8
+ # This demonstrates a sophisticated content processing workflow with:
9
+ # - Document ingestion and preprocessing
10
+ # - Multi-LLM analysis (summarization, sentiment, classification)
11
+ # - Content transformation and formatting
12
+ # - Batch processing for multiple documents
13
+
14
+ module LLMContentProcessor
15
+ # Mock LLM services with different capabilities
16
+ class LLMService
17
+ def self.summarize(text, max_length: 200)
18
+ # Simulate OpenAI/Claude summarization
19
+ sentences = text.split(/[.!?]/).reject(&:empty?)
20
+ key_sentences = sentences.first(3).join(". ") + "."
21
+
22
+ if key_sentences.length > max_length
23
+ key_sentences = key_sentences[0..max_length-4] + "..."
24
+ end
25
+
26
+ {
27
+ summary: key_sentences,
28
+ original_length: text.length,
29
+ compressed_ratio: (key_sentences.length.to_f / text.length * 100).round(2),
30
+ key_points: extract_key_points(text)
31
+ }
32
+ end
33
+
34
+ def self.analyze_sentiment(text)
35
+ # Simulate sentiment analysis
36
+ positive_words = %w[good great excellent amazing wonderful fantastic success]
37
+ negative_words = %w[bad terrible awful horrible disappointing failed problem]
38
+
39
+ words = text.downcase.split(/\W+/)
40
+ positive_count = words.count { |w| positive_words.include?(w) }
41
+ negative_count = words.count { |w| negative_words.include?(w) }
42
+
43
+ if positive_count > negative_count
44
+ sentiment = "positive"
45
+ confidence = [(positive_count.to_f / words.length * 100), 95].min
46
+ elsif negative_count > positive_count
47
+ sentiment = "negative"
48
+ confidence = [(negative_count.to_f / words.length * 100), 95].min
49
+ else
50
+ sentiment = "neutral"
51
+ confidence = 60
52
+ end
53
+
54
+ {
55
+ sentiment: sentiment,
56
+ confidence: confidence.round(2),
57
+ positive_indicators: positive_count,
58
+ negative_indicators: negative_count,
59
+ emotional_tone: determine_emotional_tone(sentiment, confidence)
60
+ }
61
+ end
62
+
63
+ def self.classify_content(text)
64
+ # Simulate content classification
65
+ if text.include?("technical") || text.include?("code") || text.include?("API")
66
+ category = "technical"
67
+ elsif text.include?("business") || text.include?("revenue") || text.include?("strategy")
68
+ category = "business"
69
+ elsif text.include?("news") || text.include?("announcement") || text.include?("update")
70
+ category = "news"
71
+ else
72
+ category = "general"
73
+ end
74
+
75
+ {
76
+ category: category,
77
+ confidence: 85.0,
78
+ tags: extract_tags(text),
79
+ complexity: determine_complexity(text)
80
+ }
81
+ end
82
+
83
+ def self.transform_content(text, target_format:, target_audience: "general")
84
+ # Simulate content transformation
85
+ case target_format
86
+ when "executive_summary"
87
+ {
88
+ format: "executive_summary",
89
+ content: "Executive Summary: #{text.split('.').first}. Key implications and recommendations follow.",
90
+ target_audience: target_audience,
91
+ word_count: 150
92
+ }
93
+ when "social_media"
94
+ {
95
+ format: "social_media",
96
+ content: "šŸš€ #{text.split('.').first}! #innovation #update",
97
+ target_audience: target_audience,
98
+ word_count: 25
99
+ }
100
+ when "technical_doc"
101
+ {
102
+ format: "technical_doc",
103
+ content: "## Technical Overview\n\n#{text}\n\n### Implementation Details\n\n[Technical details would follow...]",
104
+ target_audience: target_audience,
105
+ word_count: text.split.length + 50
106
+ }
107
+ else
108
+ {
109
+ format: "standard",
110
+ content: text,
111
+ target_audience: target_audience,
112
+ word_count: text.split.length
113
+ }
114
+ end
115
+ end
116
+
117
+ private
118
+
119
+ def self.extract_key_points(text)
120
+ sentences = text.split(/[.!?]/).reject(&:empty?)
121
+ sentences.first(3).map.with_index { |s, i| "#{i+1}. #{s.strip}" }
122
+ end
123
+
124
+ def self.determine_emotional_tone(sentiment, confidence)
125
+ case sentiment
126
+ when "positive"
127
+ confidence > 80 ? "enthusiastic" : "optimistic"
128
+ when "negative"
129
+ confidence > 80 ? "critical" : "concerned"
130
+ else
131
+ "balanced"
132
+ end
133
+ end
134
+
135
+ def self.extract_tags(text)
136
+ # Simple tag extraction
137
+ words = text.downcase.split(/\W+/).reject { |w| w.length < 4 }
138
+ words.uniq.first(5)
139
+ end
140
+
141
+ def self.determine_complexity(text)
142
+ avg_sentence_length = text.split(/[.!?]/).reject(&:empty?).map(&:length).sum / text.split(/[.!?]/).reject(&:empty?).length.to_f
143
+
144
+ if avg_sentence_length > 100
145
+ "high"
146
+ elsif avg_sentence_length > 50
147
+ "medium"
148
+ else
149
+ "low"
150
+ end
151
+ end
152
+ end
153
+
154
+ class DocumentIngestionNode < FlowNodes::Node
155
+ def prep(state)
156
+ puts "šŸ“„ [#{Time.now.strftime('%H:%M:%S')}] Starting document ingestion..."
157
+ state[:ingestion_start] = Time.now
158
+ state[:documents_processed] = 0
159
+ nil
160
+ end
161
+
162
+ def exec(params)
163
+ puts "šŸ“Š [#{Time.now.strftime('%H:%M:%S')}] Processing document source: #{params[:source]}..."
164
+
165
+ # Simulate document ingestion
166
+ sleep(0.1)
167
+
168
+ # Mock document content
169
+ documents = [
170
+ {
171
+ id: "doc_1",
172
+ title: "Quarterly Business Review",
173
+ content: "Our business has shown excellent growth this quarter. Revenue increased by 25% compared to last quarter. The technical team delivered amazing new features that customers love. Success metrics indicate positive user engagement.",
174
+ source: params[:source],
175
+ metadata: {
176
+ author: "business_team",
177
+ created_at: "2024-01-15T10:00:00Z",
178
+ word_count: 35
179
+ }
180
+ },
181
+ {
182
+ id: "doc_2",
183
+ title: "Technical API Documentation Update",
184
+ content: "The new API endpoints have been implemented with improved performance. Technical documentation has been updated to reflect the latest changes. Code examples and integration guides are now available.",
185
+ source: params[:source],
186
+ metadata: {
187
+ author: "engineering_team",
188
+ created_at: "2024-01-15T14:30:00Z",
189
+ word_count: 28
190
+ }
191
+ }
192
+ ]
193
+
194
+ puts "āœ… [#{Time.now.strftime('%H:%M:%S')}] Ingested #{documents.length} documents"
195
+
196
+ # Return documents for processing
197
+ { documents: documents }
198
+ end
199
+
200
+ def post(state, params, result)
201
+ duration = Time.now - state[:ingestion_start]
202
+ doc_count = result[:documents]&.length || 0
203
+ state[:documents_processed] = doc_count
204
+ puts "šŸ“ˆ [#{Time.now.strftime('%H:%M:%S')}] Ingested #{doc_count} documents in #{duration.round(3)}s"
205
+
206
+ # Return symbol for routing
207
+ :documents_ingested
208
+ end
209
+ end
210
+
211
+ class LLMAnalysisNode < FlowNodes::AsyncBatchNode
212
+ def initialize(analysis_type: "summarize")
213
+ super(max_retries: 2, wait: 0.5)
214
+ @analysis_type = analysis_type
215
+ end
216
+
217
+ def prep_async(state)
218
+ puts "šŸ¤– [#{Time.now.strftime('%H:%M:%S')}] Starting LLM analysis: #{@analysis_type}..."
219
+ state[:analysis_start] = Time.now
220
+ state[:llm_calls] = 0
221
+
222
+ # Extract documents from the result
223
+ @params[:documents] || []
224
+ end
225
+
226
+ def exec_async(document)
227
+ puts "🧠 [#{Time.now.strftime('%H:%M:%S')}] Analyzing document: #{document[:id]} (#{@analysis_type})"
228
+
229
+ # Simulate LLM processing time
230
+ sleep(0.1)
231
+
232
+ # Call appropriate LLM service
233
+ analysis_result = case @analysis_type
234
+ when "summarize"
235
+ LLMService.summarize(document[:content])
236
+ when "sentiment"
237
+ LLMService.analyze_sentiment(document[:content])
238
+ when "classify"
239
+ LLMService.classify_content(document[:content])
240
+ else
241
+ { error: "Unknown analysis type: #{@analysis_type}" }
242
+ end
243
+
244
+ # Merge analysis with document
245
+ document.merge({
246
+ analysis: analysis_result,
247
+ analysis_type: @analysis_type,
248
+ analyzed_at: Time.now
249
+ })
250
+ end
251
+
252
+ def post_async(state, params, results)
253
+ duration = Time.now - state[:analysis_start]
254
+ successful_analyses = results.count { |r| !r.dig(:analysis, :error) }
255
+ state[:llm_calls] = results.length
256
+
257
+ puts "šŸ“ˆ [#{Time.now.strftime('%H:%M:%S')}] LLM analysis completed: #{successful_analyses}/#{results.length} successful"
258
+ puts "ā±ļø Analysis duration: #{duration.round(3)}s"
259
+
260
+ state[:analysis_duration] = duration
261
+ state[:successful_analyses] = successful_analyses
262
+
263
+ # Return symbol for routing
264
+ :analysis_completed
265
+ end
266
+ end
267
+
268
+ class ContentTransformationNode < FlowNodes::BatchNode
269
+ def initialize(target_format: "executive_summary", target_audience: "general")
270
+ super(max_retries: 2, wait: 0.5)
271
+ @target_format = target_format
272
+ @target_audience = target_audience
273
+ end
274
+
275
+ def prep(state)
276
+ puts "šŸŽØ [#{Time.now.strftime('%H:%M:%S')}] Starting content transformation to #{@target_format}..."
277
+ state[:transformation_start] = Time.now
278
+
279
+ # Get analyzed documents
280
+ @params[:documents] || []
281
+ end
282
+
283
+ def exec(document)
284
+ puts "šŸ“ [#{Time.now.strftime('%H:%M:%S')}] Transforming document: #{document[:id]}"
285
+
286
+ # Use LLM to transform content
287
+ transformation_result = LLMService.transform_content(
288
+ document[:content],
289
+ target_format: @target_format,
290
+ target_audience: @target_audience
291
+ )
292
+
293
+ document.merge({
294
+ transformation: transformation_result,
295
+ transformed_at: Time.now
296
+ })
297
+ end
298
+
299
+ def post(state, params, results)
300
+ duration = Time.now - state[:transformation_start]
301
+ puts "šŸ“ˆ [#{Time.now.strftime('%H:%M:%S')}] Content transformation completed for #{results.length} documents"
302
+ puts "ā±ļø Transformation duration: #{duration.round(3)}s"
303
+
304
+ state[:transformation_duration] = duration
305
+
306
+ # Return symbol for routing
307
+ :transformation_completed
308
+ end
309
+ end
310
+
311
+ class MultiLLMProcessingNode < FlowNodes::AsyncParallelBatchNode
312
+ def initialize
313
+ super(max_retries: 3, wait: 1)
314
+ end
315
+
316
+ def prep_async(state)
317
+ puts "šŸš€ [#{Time.now.strftime('%H:%M:%S')}] Starting parallel multi-LLM processing..."
318
+ state[:multi_llm_start] = Time.now
319
+
320
+ # Get documents for parallel processing
321
+ @params[:documents] || []
322
+ end
323
+
324
+ def exec_async(document)
325
+ puts "⚔ [#{Time.now.strftime('%H:%M:%S')}] Processing document #{document[:id]} on thread #{Thread.current.object_id}"
326
+
327
+ # Simulate parallel LLM calls
328
+ sleep(0.2)
329
+
330
+ # Run multiple LLM analyses in parallel
331
+ summary = LLMService.summarize(document[:content])
332
+ sentiment = LLMService.analyze_sentiment(document[:content])
333
+ classification = LLMService.classify_content(document[:content])
334
+
335
+ document.merge({
336
+ multi_analysis: {
337
+ summary: summary,
338
+ sentiment: sentiment,
339
+ classification: classification,
340
+ thread_id: Thread.current.object_id
341
+ },
342
+ multi_analyzed_at: Time.now
343
+ })
344
+ end
345
+
346
+ def post_async(state, params, results)
347
+ duration = Time.now - state[:multi_llm_start]
348
+ thread_ids = results.map { |r| r.dig(:multi_analysis, :thread_id) }.uniq
349
+
350
+ puts "šŸ“ˆ [#{Time.now.strftime('%H:%M:%S')}] Multi-LLM processing completed!"
351
+ puts "⚔ Used #{thread_ids.length} parallel threads"
352
+ puts "ā±ļø Processing duration: #{duration.round(3)}s"
353
+
354
+ state[:multi_llm_duration] = duration
355
+ state[:threads_used] = thread_ids.length
356
+
357
+ # Return symbol for routing
358
+ :multi_analysis_completed
359
+ end
360
+ end
361
+
362
+ class ResultsAggregationNode < FlowNodes::Node
363
+ def prep(state)
364
+ puts "šŸ“Š [#{Time.now.strftime('%H:%M:%S')}] Aggregating results..."
365
+ state[:aggregation_start] = Time.now
366
+ nil
367
+ end
368
+
369
+ def exec(processed_data)
370
+ puts "šŸ“ˆ [#{Time.now.strftime('%H:%M:%S')}] Generating comprehensive report..."
371
+
372
+ documents = processed_data[:documents] || []
373
+
374
+ # Aggregate insights
375
+ aggregated_insights = {
376
+ total_documents: documents.length,
377
+ processing_summary: {
378
+ documents_processed: documents.length,
379
+ successful_analyses: documents.count { |d| d[:analysis] },
380
+ transformations: documents.count { |d| d[:transformation] },
381
+ multi_analyses: documents.count { |d| d[:multi_analysis] }
382
+ },
383
+ content_insights: generate_content_insights(documents),
384
+ performance_metrics: calculate_performance_metrics(documents)
385
+ }
386
+
387
+ processed_data.merge({
388
+ aggregated_insights: aggregated_insights,
389
+ aggregated_at: Time.now
390
+ })
391
+ end
392
+
393
+ def post(state, params, result)
394
+ duration = Time.now - state[:aggregation_start]
395
+ puts "šŸ“ˆ [#{Time.now.strftime('%H:%M:%S')}] Results aggregation completed in #{duration.round(3)}s"
396
+ state[:aggregation_duration] = duration
397
+
398
+ # Return symbol for routing
399
+ :aggregation_completed
400
+ end
401
+
402
+ private
403
+
404
+ def generate_content_insights(documents)
405
+ sentiments = documents.map { |d| d.dig(:multi_analysis, :sentiment, :sentiment) }.compact
406
+ categories = documents.map { |d| d.dig(:multi_analysis, :classification, :category) }.compact
407
+
408
+ {
409
+ sentiment_distribution: sentiments.group_by(&:itself).transform_values(&:count),
410
+ category_distribution: categories.group_by(&:itself).transform_values(&:count),
411
+ avg_compression_ratio: documents.map { |d| d.dig(:multi_analysis, :summary, :compressed_ratio) }.compact.sum / documents.length.to_f
412
+ }
413
+ end
414
+
415
+ def calculate_performance_metrics(documents)
416
+ {
417
+ total_processing_time: documents.sum { |d| 0.3 }, # Simulated
418
+ avg_processing_time_per_doc: 0.3,
419
+ llm_calls_made: documents.length * 3, # Summary + sentiment + classification
420
+ success_rate: (documents.count { |d| d[:multi_analysis] }.to_f / documents.length * 100).round(2)
421
+ }
422
+ end
423
+ end
424
+
425
+ class OutputFormattingNode < FlowNodes::Node
426
+ def initialize(output_format: "comprehensive_report")
427
+ super()
428
+ @output_format = output_format
429
+ end
430
+
431
+ def prep(state)
432
+ puts "šŸ“ [#{Time.now.strftime('%H:%M:%S')}] Formatting final output as #{@output_format}..."
433
+ state[:formatting_start] = Time.now
434
+ nil
435
+ end
436
+
437
+ def exec(aggregated_data)
438
+ puts "šŸŽØ [#{Time.now.strftime('%H:%M:%S')}] Generating final report..."
439
+
440
+ formatted_output = case @output_format
441
+ when "comprehensive_report"
442
+ generate_comprehensive_report(aggregated_data)
443
+ when "executive_summary"
444
+ generate_executive_summary(aggregated_data)
445
+ when "json"
446
+ JSON.pretty_generate(aggregated_data)
447
+ else
448
+ generate_simple_report(aggregated_data)
449
+ end
450
+
451
+ aggregated_data.merge({
452
+ formatted_output: formatted_output,
453
+ output_format: @output_format,
454
+ formatted_at: Time.now
455
+ })
456
+ end
457
+
458
+ def post(state, params, result)
459
+ duration = Time.now - state[:formatting_start]
460
+ total_duration = Time.now - state[:ingestion_start]
461
+
462
+ puts "šŸ“ˆ [#{Time.now.strftime('%H:%M:%S')}] Output formatting completed in #{duration.round(3)}s"
463
+ puts "šŸŽÆ [#{Time.now.strftime('%H:%M:%S')}] Total pipeline duration: #{total_duration.round(3)}s"
464
+
465
+ # Display pipeline statistics
466
+ puts "\nšŸ“Š PIPELINE PERFORMANCE METRICS:"
467
+ puts " - Document Ingestion: #{state[:documents_processed]} docs"
468
+ puts " - LLM Analysis: #{state[:successful_analyses]} successful"
469
+ puts " - Content Transformations: #{state[:transformation_duration]&.round(3)}s"
470
+ puts " - Multi-LLM Processing: #{state[:multi_llm_duration]&.round(3)}s"
471
+ puts " - Results Aggregation: #{state[:aggregation_duration]&.round(3)}s"
472
+ puts " - Output Formatting: #{duration.round(3)}s"
473
+ puts " - Total Pipeline Time: #{total_duration.round(3)}s"
474
+
475
+ state[:formatting_duration] = duration
476
+
477
+ # Return symbol for routing
478
+ :output_ready
479
+ end
480
+
481
+ private
482
+
483
+ def generate_comprehensive_report(data)
484
+ insights = data[:aggregated_insights]
485
+
486
+ """
487
+ šŸ“Š COMPREHENSIVE CONTENT ANALYSIS REPORT
488
+ ========================================
489
+
490
+ Processing Summary:
491
+ - Total Documents: #{insights[:total_documents]}
492
+ - Successful Analyses: #{insights[:processing_summary][:successful_analyses]}
493
+ - Transformations: #{insights[:processing_summary][:transformations]}
494
+ - Multi-Analyses: #{insights[:processing_summary][:multi_analyses]}
495
+
496
+ Content Insights:
497
+ - Sentiment Distribution: #{insights[:content_insights][:sentiment_distribution]}
498
+ - Category Distribution: #{insights[:content_insights][:category_distribution]}
499
+ - Average Compression Ratio: #{insights[:content_insights][:avg_compression_ratio].round(2)}%
500
+
501
+ Performance Metrics:
502
+ - Total Processing Time: #{insights[:performance_metrics][:total_processing_time]}s
503
+ - Average Time per Document: #{insights[:performance_metrics][:avg_processing_time_per_doc]}s
504
+ - LLM Calls Made: #{insights[:performance_metrics][:llm_calls_made]}
505
+ - Success Rate: #{insights[:performance_metrics][:success_rate]}%
506
+
507
+ Generated at: #{Time.now}
508
+ """
509
+ end
510
+
511
+ def generate_executive_summary(data)
512
+ insights = data[:aggregated_insights]
513
+
514
+ """
515
+ šŸ“‹ EXECUTIVE SUMMARY
516
+ ===================
517
+
518
+ Processed #{insights[:total_documents]} documents with #{insights[:performance_metrics][:success_rate]}% success rate.
519
+
520
+ Key Findings:
521
+ - Sentiment: #{insights[:content_insights][:sentiment_distribution]}
522
+ - Categories: #{insights[:content_insights][:category_distribution]}
523
+ - Processing Efficiency: #{insights[:performance_metrics][:avg_processing_time_per_doc]}s per document
524
+
525
+ Recommendations: Continue monitoring content quality and processing performance.
526
+ """
527
+ end
528
+
529
+ def generate_simple_report(data)
530
+ "Content processing completed for #{data[:aggregated_insights][:total_documents]} documents."
531
+ end
532
+ end
533
+
534
+ class FinalDeliveryNode < FlowNodes::Node
535
+ def prep(state)
536
+ puts "šŸ“¤ [#{Time.now.strftime('%H:%M:%S')}] Preparing final delivery..."
537
+ state[:delivery_start] = Time.now
538
+ nil
539
+ end
540
+
541
+ def exec(final_data)
542
+ puts "šŸš€ [#{Time.now.strftime('%H:%M:%S')}] Delivering final results..."
543
+
544
+ # Display the final formatted output
545
+ puts "\n" + "="*80
546
+ puts "šŸ“‹ FINAL CONTENT PROCESSING RESULTS"
547
+ puts "="*80
548
+ puts final_data[:formatted_output]
549
+ puts "="*80
550
+
551
+ puts "\nāœ… Content processing pipeline completed successfully!"
552
+
553
+ nil # End of flow
554
+ end
555
+
556
+ def post(state, params, result)
557
+ duration = Time.now - state[:delivery_start]
558
+ puts "šŸ“ˆ [#{Time.now.strftime('%H:%M:%S')}] Final delivery completed in #{duration.round(3)}s"
559
+ end
560
+ end
561
+ end
562
+
563
+ # Demo script showing LLM content processing workflows
564
+ if $PROGRAM_NAME == __FILE__
565
+ puts "šŸ¤– LLM CONTENT PROCESSING PIPELINE"
566
+ puts "=" * 50
567
+
568
+ # Create pipeline state
569
+ state = {
570
+ pipeline_id: SecureRandom.hex(4),
571
+ user_id: "content_team",
572
+ pipeline_start: Time.now
573
+ }
574
+
575
+ # Create nodes
576
+ ingestion = LLMContentProcessor::DocumentIngestionNode.new
577
+ llm_analysis = LLMContentProcessor::LLMAnalysisNode.new(analysis_type: "summarize")
578
+ transformation = LLMContentProcessor::ContentTransformationNode.new(
579
+ target_format: "executive_summary",
580
+ target_audience: "executives"
581
+ )
582
+ multi_llm = LLMContentProcessor::MultiLLMProcessingNode.new
583
+ aggregation = LLMContentProcessor::ResultsAggregationNode.new
584
+ formatting = LLMContentProcessor::OutputFormattingNode.new(output_format: "comprehensive_report")
585
+ delivery = LLMContentProcessor::FinalDeliveryNode.new
586
+
587
+ # Connect the pipeline with symbol-based routing
588
+ ingestion - :documents_ingested >> llm_analysis
589
+ llm_analysis - :analysis_completed >> transformation
590
+ transformation - :transformation_completed >> multi_llm
591
+ multi_llm - :multi_analysis_completed >> aggregation
592
+ aggregation - :aggregation_completed >> formatting
593
+ formatting - :output_ready >> delivery
594
+
595
+ # Create and run the flow
596
+ flow = FlowNodes::Flow.new(start: ingestion)
597
+ flow.set_params({ source: "document_management_system" })
598
+ flow.run(state)
599
+
600
+ puts "\nšŸŽÆ LLM Content Processing Pipeline Completed!"
601
+ puts "Pipeline ID: #{state[:pipeline_id]}"
602
+ puts "Total Runtime: #{(Time.now - state[:pipeline_start]).round(3)}s"
603
+ end