fractor 0.1.4 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop-https---raw-githubusercontent-com-riboseinc-oss-guides-main-ci-rubocop-yml +552 -0
- data/.rubocop.yml +14 -8
- data/.rubocop_todo.yml +284 -43
- data/README.adoc +111 -950
- data/docs/.lycheeignore +16 -0
- data/docs/Gemfile +24 -0
- data/docs/README.md +157 -0
- data/docs/_config.yml +151 -0
- data/docs/_features/error-handling.adoc +1192 -0
- data/docs/_features/index.adoc +80 -0
- data/docs/_features/monitoring.adoc +589 -0
- data/docs/_features/signal-handling.adoc +202 -0
- data/docs/_features/workflows.adoc +1235 -0
- data/docs/_guides/continuous-mode.adoc +736 -0
- data/docs/_guides/cookbook.adoc +1133 -0
- data/docs/_guides/index.adoc +55 -0
- data/docs/_guides/pipeline-mode.adoc +730 -0
- data/docs/_guides/troubleshooting.adoc +358 -0
- data/docs/_pages/architecture.adoc +1390 -0
- data/docs/_pages/core-concepts.adoc +1392 -0
- data/docs/_pages/design-principles.adoc +862 -0
- data/docs/_pages/getting-started.adoc +290 -0
- data/docs/_pages/installation.adoc +143 -0
- data/docs/_reference/api.adoc +1080 -0
- data/docs/_reference/error-reporting.adoc +670 -0
- data/docs/_reference/examples.adoc +181 -0
- data/docs/_reference/index.adoc +96 -0
- data/docs/_reference/troubleshooting.adoc +862 -0
- data/docs/_tutorials/complex-workflows.adoc +1022 -0
- data/docs/_tutorials/data-processing-pipeline.adoc +740 -0
- data/docs/_tutorials/first-application.adoc +384 -0
- data/docs/_tutorials/index.adoc +48 -0
- data/docs/_tutorials/long-running-services.adoc +931 -0
- data/docs/assets/images/favicon-16.png +0 -0
- data/docs/assets/images/favicon-32.png +0 -0
- data/docs/assets/images/favicon-48.png +0 -0
- data/docs/assets/images/favicon.ico +0 -0
- data/docs/assets/images/favicon.png +0 -0
- data/docs/assets/images/favicon.svg +45 -0
- data/docs/assets/images/fractor-icon.svg +49 -0
- data/docs/assets/images/fractor-logo.svg +61 -0
- data/docs/index.adoc +131 -0
- data/docs/lychee.toml +39 -0
- data/examples/api_aggregator/README.adoc +627 -0
- data/examples/api_aggregator/api_aggregator.rb +376 -0
- data/examples/auto_detection/README.adoc +407 -29
- data/examples/auto_detection/auto_detection.rb +9 -9
- data/examples/continuous_chat_common/message_protocol.rb +53 -0
- data/examples/continuous_chat_fractor/README.adoc +217 -0
- data/examples/continuous_chat_fractor/chat_client.rb +303 -0
- data/examples/continuous_chat_fractor/chat_common.rb +83 -0
- data/examples/continuous_chat_fractor/chat_server.rb +167 -0
- data/examples/continuous_chat_fractor/simulate.rb +345 -0
- data/examples/continuous_chat_server/README.adoc +135 -0
- data/examples/continuous_chat_server/chat_client.rb +303 -0
- data/examples/continuous_chat_server/chat_server.rb +359 -0
- data/examples/continuous_chat_server/simulate.rb +343 -0
- data/examples/error_reporting.rb +207 -0
- data/examples/file_processor/README.adoc +170 -0
- data/examples/file_processor/file_processor.rb +615 -0
- data/examples/file_processor/sample_files/invalid.csv +1 -0
- data/examples/file_processor/sample_files/orders.xml +24 -0
- data/examples/file_processor/sample_files/products.json +23 -0
- data/examples/file_processor/sample_files/users.csv +6 -0
- data/examples/hierarchical_hasher/README.adoc +629 -41
- data/examples/hierarchical_hasher/hierarchical_hasher.rb +12 -8
- data/examples/image_processor/README.adoc +610 -0
- data/examples/image_processor/image_processor.rb +349 -0
- data/examples/image_processor/processed_images/sample_10_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_1_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_2_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_3_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_4_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_5_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_6_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_7_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_8_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_9_processed.jpg.json +12 -0
- data/examples/image_processor/test_images/sample_1.png +1 -0
- data/examples/image_processor/test_images/sample_10.png +1 -0
- data/examples/image_processor/test_images/sample_2.png +1 -0
- data/examples/image_processor/test_images/sample_3.png +1 -0
- data/examples/image_processor/test_images/sample_4.png +1 -0
- data/examples/image_processor/test_images/sample_5.png +1 -0
- data/examples/image_processor/test_images/sample_6.png +1 -0
- data/examples/image_processor/test_images/sample_7.png +1 -0
- data/examples/image_processor/test_images/sample_8.png +1 -0
- data/examples/image_processor/test_images/sample_9.png +1 -0
- data/examples/log_analyzer/README.adoc +662 -0
- data/examples/log_analyzer/log_analyzer.rb +579 -0
- data/examples/log_analyzer/sample_logs/apache.log +20 -0
- data/examples/log_analyzer/sample_logs/json.log +15 -0
- data/examples/log_analyzer/sample_logs/nginx.log +15 -0
- data/examples/log_analyzer/sample_logs/rails.log +29 -0
- data/examples/multi_work_type/README.adoc +576 -26
- data/examples/multi_work_type/multi_work_type.rb +30 -29
- data/examples/performance_monitoring.rb +120 -0
- data/examples/pipeline_processing/README.adoc +740 -26
- data/examples/pipeline_processing/pipeline_processing.rb +16 -16
- data/examples/priority_work_example.rb +155 -0
- data/examples/producer_subscriber/README.adoc +889 -46
- data/examples/producer_subscriber/producer_subscriber.rb +20 -16
- data/examples/scatter_gather/README.adoc +829 -27
- data/examples/scatter_gather/scatter_gather.rb +29 -28
- data/examples/simple/README.adoc +347 -0
- data/examples/simple/sample.rb +5 -5
- data/examples/specialized_workers/README.adoc +622 -26
- data/examples/specialized_workers/specialized_workers.rb +88 -45
- data/examples/stream_processor/README.adoc +206 -0
- data/examples/stream_processor/stream_processor.rb +284 -0
- data/examples/web_scraper/README.adoc +625 -0
- data/examples/web_scraper/web_scraper.rb +285 -0
- data/examples/workflow/README.adoc +406 -0
- data/examples/workflow/circuit_breaker/README.adoc +360 -0
- data/examples/workflow/circuit_breaker/circuit_breaker_workflow.rb +225 -0
- data/examples/workflow/conditional/README.adoc +483 -0
- data/examples/workflow/conditional/conditional_workflow.rb +215 -0
- data/examples/workflow/dead_letter_queue/README.adoc +374 -0
- data/examples/workflow/dead_letter_queue/dead_letter_queue_workflow.rb +217 -0
- data/examples/workflow/fan_out/README.adoc +381 -0
- data/examples/workflow/fan_out/fan_out_workflow.rb +202 -0
- data/examples/workflow/retry/README.adoc +248 -0
- data/examples/workflow/retry/retry_workflow.rb +195 -0
- data/examples/workflow/simple_linear/README.adoc +267 -0
- data/examples/workflow/simple_linear/simple_linear_workflow.rb +175 -0
- data/examples/workflow/simplified/README.adoc +329 -0
- data/examples/workflow/simplified/simplified_workflow.rb +222 -0
- data/exe/fractor +10 -0
- data/lib/fractor/cli.rb +288 -0
- data/lib/fractor/configuration.rb +307 -0
- data/lib/fractor/continuous_server.rb +183 -0
- data/lib/fractor/error_formatter.rb +72 -0
- data/lib/fractor/error_report_generator.rb +152 -0
- data/lib/fractor/error_reporter.rb +244 -0
- data/lib/fractor/error_statistics.rb +147 -0
- data/lib/fractor/execution_tracer.rb +162 -0
- data/lib/fractor/logger.rb +230 -0
- data/lib/fractor/main_loop_handler.rb +406 -0
- data/lib/fractor/main_loop_handler3.rb +135 -0
- data/lib/fractor/main_loop_handler4.rb +299 -0
- data/lib/fractor/performance_metrics_collector.rb +181 -0
- data/lib/fractor/performance_monitor.rb +215 -0
- data/lib/fractor/performance_report_generator.rb +202 -0
- data/lib/fractor/priority_work.rb +93 -0
- data/lib/fractor/priority_work_queue.rb +189 -0
- data/lib/fractor/result_aggregator.rb +33 -1
- data/lib/fractor/shutdown_handler.rb +168 -0
- data/lib/fractor/signal_handler.rb +80 -0
- data/lib/fractor/supervisor.rb +430 -144
- data/lib/fractor/supervisor_logger.rb +88 -0
- data/lib/fractor/version.rb +1 -1
- data/lib/fractor/work.rb +12 -0
- data/lib/fractor/work_distribution_manager.rb +151 -0
- data/lib/fractor/work_queue.rb +88 -0
- data/lib/fractor/work_result.rb +181 -9
- data/lib/fractor/worker.rb +75 -1
- data/lib/fractor/workflow/builder.rb +210 -0
- data/lib/fractor/workflow/chain_builder.rb +169 -0
- data/lib/fractor/workflow/circuit_breaker.rb +183 -0
- data/lib/fractor/workflow/circuit_breaker_orchestrator.rb +208 -0
- data/lib/fractor/workflow/circuit_breaker_registry.rb +112 -0
- data/lib/fractor/workflow/dead_letter_queue.rb +334 -0
- data/lib/fractor/workflow/execution_hooks.rb +39 -0
- data/lib/fractor/workflow/execution_strategy.rb +225 -0
- data/lib/fractor/workflow/execution_trace.rb +134 -0
- data/lib/fractor/workflow/helpers.rb +191 -0
- data/lib/fractor/workflow/job.rb +290 -0
- data/lib/fractor/workflow/job_dependency_validator.rb +120 -0
- data/lib/fractor/workflow/logger.rb +110 -0
- data/lib/fractor/workflow/pre_execution_context.rb +193 -0
- data/lib/fractor/workflow/retry_config.rb +156 -0
- data/lib/fractor/workflow/retry_orchestrator.rb +184 -0
- data/lib/fractor/workflow/retry_strategy.rb +93 -0
- data/lib/fractor/workflow/structured_logger.rb +30 -0
- data/lib/fractor/workflow/type_compatibility_validator.rb +222 -0
- data/lib/fractor/workflow/visualizer.rb +211 -0
- data/lib/fractor/workflow/workflow_context.rb +132 -0
- data/lib/fractor/workflow/workflow_executor.rb +669 -0
- data/lib/fractor/workflow/workflow_result.rb +55 -0
- data/lib/fractor/workflow/workflow_validator.rb +295 -0
- data/lib/fractor/workflow.rb +333 -0
- data/lib/fractor/wrapped_ractor.rb +66 -91
- data/lib/fractor/wrapped_ractor3.rb +161 -0
- data/lib/fractor/wrapped_ractor4.rb +242 -0
- data/lib/fractor.rb +93 -3
- metadata +192 -6
- data/tests/sample.rb.bak +0 -309
- data/tests/sample_working.rb.bak +0 -209
|
@@ -5,88 +5,931 @@
|
|
|
5
5
|
|
|
6
6
|
toc::[]
|
|
7
7
|
|
|
8
|
-
==
|
|
8
|
+
== Purpose
|
|
9
9
|
|
|
10
|
-
The Producer-Subscriber example demonstrates
|
|
10
|
+
The Producer-Subscriber example demonstrates hierarchical work decomposition in Fractor, where initial work items generate additional sub-work items dynamically. This showcases how to build multi-phase processing systems where early-stage workers produce work for later-stage workers, creating a tree-like processing structure. This is essential for document processing, recursive data structures, and divide-and-conquer algorithms.
|
|
11
11
|
|
|
12
|
-
|
|
12
|
+
== Focus
|
|
13
13
|
|
|
14
|
-
|
|
15
|
-
* Managing dependencies between work items
|
|
16
|
-
* Building hierarchical result structures from parallel processing
|
|
14
|
+
This example demonstrates:
|
|
17
15
|
|
|
18
|
-
|
|
16
|
+
* **Dynamic work generation** from processing results
|
|
17
|
+
* **Two-phase processing** with producer and subscriber stages
|
|
18
|
+
* **Hierarchical result structures** from parent-child relationships
|
|
19
|
+
* **Work item referencing** using object IDs
|
|
20
|
+
* **Multi-level decomposition** patterns
|
|
21
|
+
* **Result tree assembly** from distributed processing
|
|
19
22
|
|
|
20
|
-
|
|
23
|
+
== Architecture
|
|
21
24
|
|
|
22
|
-
===
|
|
25
|
+
=== Two-Phase Processing Overview
|
|
23
26
|
|
|
24
|
-
|
|
27
|
+
[source]
|
|
28
|
+
----
|
|
29
|
+
Phase 1: Producer Phase (Initial Work Processing)
|
|
30
|
+
┌─────────────────────────────────────────────────────────┐
|
|
31
|
+
│ Documents: ["Annual Report", "Tech Docs", "Research"] │
|
|
32
|
+
└─────────────────────────────────────────────────────────┘
|
|
33
|
+
│
|
|
34
|
+
│ Create InitialWork items
|
|
35
|
+
▼
|
|
36
|
+
┌───────────────────────────────────┐
|
|
37
|
+
│ Supervisor 1 (Phase 1) │
|
|
38
|
+
│ Worker Pool: MultiWorker × 4 │
|
|
39
|
+
└───────────────────────────────────┘
|
|
40
|
+
│
|
|
41
|
+
┌───────────┼───────────┐
|
|
42
|
+
│ │ │
|
|
43
|
+
▼ ▼ ▼
|
|
44
|
+
┌─────────┐ ┌─────────┐ ┌─────────┐
|
|
45
|
+
│ Worker1 │ │ Worker2 │ │ Worker3 │
|
|
46
|
+
│Process │ │Process │ │Process │
|
|
47
|
+
│ Doc 1 │ │ Doc 2 │ │ Doc 3 │
|
|
48
|
+
└─────────┘ └─────────┘ └─────────┘
|
|
49
|
+
│ │ │
|
|
50
|
+
▼ ▼ ▼
|
|
51
|
+
┌─────────┐ ┌─────────┐ ┌─────────┐
|
|
52
|
+
│Result 1 │ │Result 2 │ │Result 3 │
|
|
53
|
+
│+ IDs for│ │+ IDs for│ │+ IDs for│
|
|
54
|
+
│sub-works│ │sub-works│ │sub-works│
|
|
55
|
+
└─────────┘ └─────────┘ └─────────┘
|
|
56
|
+
│
|
|
57
|
+
│ Analyze results
|
|
58
|
+
│ Generate sub-work descriptors
|
|
59
|
+
▼
|
|
60
|
+
┌───────────────────────────────────┐
|
|
61
|
+
│ Sub-work items created: │
|
|
62
|
+
│ Doc1-0, Doc1-1, Doc1-2 │
|
|
63
|
+
│ Doc2-0, Doc2-1, Doc2-2 │
|
|
64
|
+
│ Doc3-0, Doc3-1, Doc3-2 │
|
|
65
|
+
│ (each linked to parent via ID) │
|
|
66
|
+
└───────────────────────────────────┘
|
|
67
|
+
|
|
68
|
+
Phase 2: Subscriber Phase (Sub-Work Processing)
|
|
69
|
+
┌─────────────────────────────────────────────────────────┐
|
|
70
|
+
│ Sub-works with parent references │
|
|
71
|
+
└─────────────────────────────────────────────────────────┘
|
|
72
|
+
│
|
|
73
|
+
│ Create SubWork items
|
|
74
|
+
▼
|
|
75
|
+
┌───────────────────────────────────┐
|
|
76
|
+
│ Supervisor 2 (Phase 2) │
|
|
77
|
+
│ Worker Pool: MultiWorker × 4 │
|
|
78
|
+
└───────────────────────────────────┘
|
|
79
|
+
│
|
|
80
|
+
┌───────────┼───────────┬────────────┐
|
|
81
|
+
│ │ │ │
|
|
82
|
+
▼ ▼ ▼ ▼
|
|
83
|
+
┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐
|
|
84
|
+
│Worker1 │ │Worker2 │ │Worker3 │ │Worker4 │
|
|
85
|
+
│Doc1-0 │ │Doc1-1 │ │Doc2-0 │ │Doc3-0 │
|
|
86
|
+
└─────────┘ └─────────┘ └─────────┘ └─────────┘
|
|
87
|
+
│ │ │ │
|
|
88
|
+
▼ ▼ ▼ ▼
|
|
89
|
+
All sub-work results with parent_id preserved
|
|
90
|
+
│
|
|
91
|
+
│ Build hierarchical tree
|
|
92
|
+
▼
|
|
93
|
+
┌───────────────────────────────────┐
|
|
94
|
+
│ Final Hierarchical Result Tree │
|
|
95
|
+
│ │
|
|
96
|
+
│ Root: Annual Report │
|
|
97
|
+
│ ├─ Child 1: Doc1-0 │
|
|
98
|
+
│ ├─ Child 2: Doc1-1 │
|
|
99
|
+
│ └─ Child 3: Doc1-2 │
|
|
100
|
+
│ Root: Tech Docs │
|
|
101
|
+
│ ├─ Child 1: Doc2-0 │
|
|
102
|
+
│ ... │
|
|
103
|
+
└───────────────────────────────────┘
|
|
104
|
+
----
|
|
105
|
+
|
|
106
|
+
=== Work Decomposition Pattern
|
|
25
107
|
|
|
26
|
-
|
|
27
|
-
|
|
108
|
+
[source]
|
|
109
|
+
----
|
|
110
|
+
Initial Document
|
|
111
|
+
│
|
|
112
|
+
│ Phase 1: Producer generates sub-work descriptors
|
|
113
|
+
│
|
|
114
|
+
▼
|
|
115
|
+
┌──────────────┐
|
|
116
|
+
│ Document A │ ──┐
|
|
117
|
+
└──────────────┘ │ Generates 3 sections
|
|
118
|
+
│ (stored as descriptors with parent ID)
|
|
119
|
+
│
|
|
120
|
+
┌───────────────┼───────────────┐
|
|
121
|
+
│ │ │
|
|
122
|
+
▼ ▼ ▼
|
|
123
|
+
┌────────┐ ┌────────┐ ┌────────┐
|
|
124
|
+
│ A-0 │ │ A-1 │ │ A-2 │
|
|
125
|
+
│parent: │ │parent: │ │parent: │
|
|
126
|
+
│Doc A ID│ │Doc A ID│ │Doc A ID│
|
|
127
|
+
└────────┘ └────────┘ └────────┘
|
|
128
|
+
│ │ │
|
|
129
|
+
│ Phase 2: Subscribers process sections
|
|
130
|
+
│
|
|
131
|
+
▼ ▼ ▼
|
|
132
|
+
Processed Processed Processed
|
|
133
|
+
Section 0 Section 1 Section 2
|
|
134
|
+
│ │ │
|
|
135
|
+
│ │ │
|
|
136
|
+
└───────────────┴───────────────┘
|
|
137
|
+
│
|
|
138
|
+
│ Tree assembly
|
|
139
|
+
▼
|
|
140
|
+
Final Result Tree
|
|
141
|
+
----
|
|
28
142
|
|
|
29
|
-
===
|
|
143
|
+
=== Object Linking Mechanism
|
|
30
144
|
|
|
31
|
-
|
|
145
|
+
[source]
|
|
146
|
+
----
|
|
147
|
+
┌─────────────────────────────────────────────────────┐
|
|
148
|
+
│ Phase 1 Execution │
|
|
149
|
+
│ │
|
|
150
|
+
│ work1 = InitialWork.new("Doc A") │
|
|
151
|
+
│ work1.object_id => 12345 │
|
|
152
|
+
│ │
|
|
153
|
+
│ Process work1 → result1 │
|
|
154
|
+
│ result1 stores: work1.object_id (12345) │
|
|
155
|
+
│ │
|
|
156
|
+
│ Generate descriptors: │
|
|
157
|
+
│ { data: "Doc A-0", parent_id: 12345, depth: 1 } │
|
|
158
|
+
│ { data: "Doc A-1", parent_id: 12345, depth: 1 } │
|
|
159
|
+
│ { data: "Doc A-2", parent_id: 12345, depth: 1 } │
|
|
160
|
+
└─────────────────────────────────────────────────────┘
|
|
161
|
+
│
|
|
162
|
+
▼
|
|
163
|
+
┌─────────────────────────────────────────────────────┐
|
|
164
|
+
│ Phase 2 Execution │
|
|
165
|
+
│ │
|
|
166
|
+
│ subwork1 = SubWork.new("Doc A-0", 12345, 1) │
|
|
167
|
+
│ subwork2 = SubWork.new("Doc A-1", 12345, 1) │
|
|
168
|
+
│ subwork3 = SubWork.new("Doc A-2", 12345, 1) │
|
|
169
|
+
│ │
|
|
170
|
+
│ Process each → results with parent_id: 12345 │
|
|
171
|
+
└─────────────────────────────────────────────────────┘
|
|
172
|
+
│
|
|
173
|
+
▼
|
|
174
|
+
┌─────────────────────────────────────────────────────┐
|
|
175
|
+
│ Tree Assembly │
|
|
176
|
+
│ │
|
|
177
|
+
│ result_tree[12345] = { │
|
|
178
|
+
│ data: "Processed: Doc A", │
|
|
179
|
+
│ children: [ │
|
|
180
|
+
│ "Sub-processed: Doc A-0 (depth: 1)", │
|
|
181
|
+
│ "Sub-processed: Doc A-1 (depth: 1)", │
|
|
182
|
+
│ "Sub-processed: Doc A-2 (depth: 1)" │
|
|
183
|
+
│ ] │
|
|
184
|
+
│ } │
|
|
185
|
+
└─────────────────────────────────────────────────────┘
|
|
186
|
+
----
|
|
32
187
|
|
|
33
|
-
|
|
34
|
-
* A reference to its parent work via `parent_id`
|
|
35
|
-
* A depth level (typically depth + 1 from its parent)
|
|
188
|
+
== Key Components
|
|
36
189
|
|
|
37
|
-
===
|
|
190
|
+
=== InitialWork: Producer Work Unit
|
|
191
|
+
|
|
192
|
+
The `InitialWork` class represents initial documents:
|
|
193
|
+
|
|
194
|
+
[source,ruby]
|
|
195
|
+
----
|
|
196
|
+
class InitialWork < Fractor::Work
|
|
197
|
+
def initialize(data, depth = 0)
|
|
198
|
+
super({
|
|
199
|
+
data: data, # <1>
|
|
200
|
+
depth: depth # <2>
|
|
201
|
+
})
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
def data
|
|
205
|
+
input[:data]
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
def depth
|
|
209
|
+
input[:depth]
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
----
|
|
213
|
+
<1> The document data to be processed
|
|
214
|
+
<2> Depth level (0 for initial work, increases with decomposition)
|
|
38
215
|
|
|
39
|
-
|
|
216
|
+
Purpose:
|
|
40
217
|
|
|
41
|
-
*
|
|
42
|
-
*
|
|
218
|
+
* **Root-level work**: Represents top-level items to decompose
|
|
219
|
+
* **Depth tracking**: Enables multi-level hierarchies
|
|
220
|
+
* **Identity preservation**: Object ID used to link children
|
|
221
|
+
|
|
222
|
+
=== SubWork: Generated Work Unit
|
|
223
|
+
|
|
224
|
+
The `SubWork` class represents decomposed sections:
|
|
225
|
+
|
|
226
|
+
[source,ruby]
|
|
227
|
+
----
|
|
228
|
+
class SubWork < Fractor::Work
|
|
229
|
+
def initialize(data, parent_id = nil, depth = 0)
|
|
230
|
+
super({
|
|
231
|
+
data: data, # <1>
|
|
232
|
+
parent_id: parent_id, # <2>
|
|
233
|
+
depth: depth # <3>
|
|
234
|
+
})
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
def parent_id
|
|
238
|
+
input[:parent_id]
|
|
239
|
+
end
|
|
240
|
+
end
|
|
241
|
+
----
|
|
242
|
+
<1> The section data derived from parent
|
|
243
|
+
<2> Reference to parent work via `object_id`
|
|
244
|
+
<3> Depth level (parent depth + 1)
|
|
245
|
+
|
|
246
|
+
Purpose:
|
|
247
|
+
|
|
248
|
+
* **Parent linkage**: Maintains relationship to source document
|
|
249
|
+
* **Hierarchical positioning**: Tracks decomposition level
|
|
250
|
+
* **Result assembly**: Enables tree reconstruction
|
|
251
|
+
|
|
252
|
+
=== MultiWorker: Polymorphic Processor
|
|
253
|
+
|
|
254
|
+
The `MultiWorker` handles both work types:
|
|
255
|
+
|
|
256
|
+
[source,ruby]
|
|
257
|
+
----
|
|
258
|
+
class MultiWorker < Fractor::Worker
|
|
259
|
+
def process(work)
|
|
260
|
+
if work.is_a?(InitialWork) # <1>
|
|
261
|
+
process_initial_work(work)
|
|
262
|
+
elsif work.is_a?(SubWork) # <2>
|
|
263
|
+
process_sub_work(work)
|
|
264
|
+
else
|
|
265
|
+
Fractor::WorkResult.new(
|
|
266
|
+
error: "Unknown work type: #{work.class}",
|
|
267
|
+
work: work
|
|
268
|
+
)
|
|
269
|
+
end
|
|
270
|
+
end
|
|
43
271
|
|
|
44
|
-
|
|
272
|
+
private
|
|
273
|
+
|
|
274
|
+
def process_initial_work(work)
|
|
275
|
+
sleep(rand(0.01..0.05))
|
|
276
|
+
processed_data = "Processed: #{work}"
|
|
277
|
+
|
|
278
|
+
Fractor::WorkResult.new(
|
|
279
|
+
result: {
|
|
280
|
+
processed_data: processed_data,
|
|
281
|
+
sub_works: [] # <3>
|
|
282
|
+
},
|
|
283
|
+
work: work
|
|
284
|
+
)
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
def process_sub_work(work)
|
|
288
|
+
sleep(rand(0.01..0.03))
|
|
289
|
+
processed_data = "Sub-processed: #{work.data} (depth: #{work.depth})"
|
|
290
|
+
|
|
291
|
+
Fractor::WorkResult.new(
|
|
292
|
+
result: {
|
|
293
|
+
processed_data: processed_data,
|
|
294
|
+
parent_id: work.parent_id # <4>
|
|
295
|
+
},
|
|
296
|
+
work: work
|
|
297
|
+
)
|
|
298
|
+
end
|
|
299
|
+
end
|
|
300
|
+
----
|
|
301
|
+
<1> Route to initial work processing
|
|
302
|
+
<2> Route to sub-work processing
|
|
303
|
+
<3> Placeholder for sub-work metadata (populated later)
|
|
304
|
+
<4> Preserve parent reference for tree assembly
|
|
305
|
+
|
|
306
|
+
Design benefits:
|
|
307
|
+
|
|
308
|
+
* **Single worker type**: Handles all processing stages
|
|
309
|
+
* **Type-based routing**: Clean separation of logic
|
|
310
|
+
* **Flexible processing**: Different logic per work type
|
|
311
|
+
* **Metadata preservation**: Maintains hierarchical links
|
|
312
|
+
|
|
313
|
+
=== DocumentProcessor: Two-Phase Orchestrator
|
|
314
|
+
|
|
315
|
+
The `DocumentProcessor` manages the complete workflow:
|
|
316
|
+
|
|
317
|
+
[source,ruby]
|
|
318
|
+
----
|
|
319
|
+
class DocumentProcessor
|
|
320
|
+
def process
|
|
321
|
+
# Phase 1: Process initial documents
|
|
322
|
+
supervisor = Fractor::Supervisor.new(
|
|
323
|
+
worker_pools: [
|
|
324
|
+
{ worker_class: MultiWorker, num_workers: @worker_count }
|
|
325
|
+
]
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
initial_work_items = documents.map { |doc| InitialWork.new(doc, 0) } # <1>
|
|
329
|
+
supervisor.add_work_items(initial_work_items)
|
|
330
|
+
supervisor.run # <2>
|
|
331
|
+
|
|
332
|
+
# Generate sub-work descriptors
|
|
333
|
+
sub_works = create_sub_works(supervisor.results) # <3>
|
|
334
|
+
|
|
335
|
+
# Phase 2: Process generated sub-works
|
|
336
|
+
if !sub_works.empty?
|
|
337
|
+
sub_supervisor = Fractor::Supervisor.new( # <4>
|
|
338
|
+
worker_pools: [
|
|
339
|
+
{ worker_class: MultiWorker, num_workers: @worker_count }
|
|
340
|
+
]
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
sub_work_items = sub_works.map do |sw|
|
|
344
|
+
SubWork.new(sw[:data], sw[:parent_id], sw[:depth]) # <5>
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
sub_supervisor.add_work_items(sub_work_items)
|
|
348
|
+
sub_supervisor.run # <6>
|
|
349
|
+
|
|
350
|
+
# Assemble hierarchical tree
|
|
351
|
+
build_result_tree(supervisor.results, sub_supervisor.results) # <7>
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
format_tree # <8>
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
private
|
|
358
|
+
|
|
359
|
+
def create_sub_works(results_aggregator)
|
|
360
|
+
sub_works = []
|
|
361
|
+
|
|
362
|
+
results_aggregator.results.each do |result|
|
|
363
|
+
work = result.work
|
|
364
|
+
|
|
365
|
+
next unless work.depth < 2 # <9>
|
|
366
|
+
|
|
367
|
+
3.times do |i| # <10>
|
|
368
|
+
sub_data = "#{work.data}-#{i}"
|
|
369
|
+
sub_works << {
|
|
370
|
+
data: sub_data,
|
|
371
|
+
parent_id: work.object_id, # <11>
|
|
372
|
+
depth: work.depth + 1
|
|
373
|
+
}
|
|
374
|
+
end
|
|
375
|
+
end
|
|
376
|
+
|
|
377
|
+
sub_works
|
|
378
|
+
end
|
|
379
|
+
|
|
380
|
+
def build_result_tree(initial_results, sub_results)
|
|
381
|
+
# Build base tree from initial results
|
|
382
|
+
initial_results.results.each do |result|
|
|
383
|
+
@result_tree[result.work.object_id] = { # <12>
|
|
384
|
+
data: result.result[:processed_data],
|
|
385
|
+
children: []
|
|
386
|
+
}
|
|
387
|
+
end
|
|
388
|
+
|
|
389
|
+
# Add sub-results to their parents
|
|
390
|
+
sub_results.results.each do |result|
|
|
391
|
+
parent_id = result.result[:parent_id]
|
|
392
|
+
@result_tree[parent_id][:children] << result.result[:processed_data] # <13>
|
|
393
|
+
end
|
|
394
|
+
end
|
|
395
|
+
end
|
|
396
|
+
----
|
|
397
|
+
<1> Create initial work items (depth 0)
|
|
398
|
+
<2> Execute Phase 1 processing
|
|
399
|
+
<3> Analyze results and generate sub-work descriptors
|
|
400
|
+
<4> Create new supervisor for Phase 2
|
|
401
|
+
<5> Convert descriptors to SubWork objects
|
|
402
|
+
<6> Execute Phase 2 processing
|
|
403
|
+
<7> Assemble results into hierarchical tree
|
|
404
|
+
<8> Format tree for display
|
|
405
|
+
<9> Depth limit prevents infinite recursion
|
|
406
|
+
<10> Generate 3 sub-items per parent (configurable)
|
|
407
|
+
<11> Link to parent using object_id
|
|
408
|
+
<12> Create tree nodes indexed by object_id
|
|
409
|
+
<13> Attach children to their parent nodes
|
|
45
410
|
|
|
46
|
-
|
|
411
|
+
Orchestration features:
|
|
47
412
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
413
|
+
* **Sequential phases**: Phase 2 starts after Phase 1 completes
|
|
414
|
+
* **Dynamic work creation**: Sub-works generated from results
|
|
415
|
+
* **Independent supervisors**: Clean separation of concerns
|
|
416
|
+
* **Tree assembly**: Reconstructs hierarchy from flat results
|
|
52
417
|
|
|
53
418
|
== Usage
|
|
54
419
|
|
|
420
|
+
.Basic usage
|
|
421
|
+
[example]
|
|
422
|
+
====
|
|
423
|
+
[source,bash]
|
|
424
|
+
----
|
|
425
|
+
# Run the producer-subscriber example
|
|
426
|
+
ruby producer_subscriber.rb
|
|
427
|
+
----
|
|
428
|
+
====
|
|
429
|
+
|
|
430
|
+
.Programmatic usage
|
|
431
|
+
[example]
|
|
432
|
+
====
|
|
55
433
|
[source,ruby]
|
|
56
434
|
----
|
|
57
|
-
|
|
435
|
+
require_relative "producer_subscriber"
|
|
436
|
+
|
|
437
|
+
# Define documents to process
|
|
58
438
|
documents = [
|
|
59
439
|
"Annual Report 2025",
|
|
60
440
|
"Technical Documentation",
|
|
61
441
|
"Research Paper"
|
|
62
442
|
]
|
|
63
443
|
|
|
64
|
-
#
|
|
65
|
-
processor = ProducerSubscriber::DocumentProcessor.new(documents,
|
|
444
|
+
# Create processor with 8 workers
|
|
445
|
+
processor = ProducerSubscriber::DocumentProcessor.new(documents, 8)
|
|
446
|
+
|
|
447
|
+
# Execute two-phase processing
|
|
66
448
|
result = processor.process
|
|
67
449
|
|
|
68
|
-
#
|
|
450
|
+
# Display hierarchical results
|
|
69
451
|
puts result
|
|
70
452
|
----
|
|
453
|
+
====
|
|
454
|
+
|
|
455
|
+
== Expected Output
|
|
456
|
+
|
|
457
|
+
[source,text]
|
|
458
|
+
----
|
|
459
|
+
Starting producer-subscriber example: Document Processing System
|
|
460
|
+
This example simulates a document processing system where:
|
|
461
|
+
1. Initial documents are broken down into sections
|
|
462
|
+
2. Sections are further broken down into paragraphs
|
|
463
|
+
3. Paragraphs are processed individually
|
|
464
|
+
4. Results are assembled into a hierarchical structure
|
|
465
|
+
|
|
466
|
+
Using 4 workers to process 3 documents
|
|
467
|
+
|
|
468
|
+
Processing Results:
|
|
469
|
+
===================
|
|
470
|
+
Root: Processed: InitialWork: data=Annual Report 2025, depth=0
|
|
471
|
+
├─ Child 1: Sub-processed: Annual Report 2025-0 (depth: 1)
|
|
472
|
+
├─ Child 2: Sub-processed: Annual Report 2025-1 (depth: 1)
|
|
473
|
+
└─ Child 3: Sub-processed: Annual Report 2025-2 (depth: 1)
|
|
474
|
+
|
|
475
|
+
Root: Processed: InitialWork: data=Technical Documentation, depth=0
|
|
476
|
+
├─ Child 1: Sub-processed: Technical Documentation-0 (depth: 1)
|
|
477
|
+
├─ Child 2: Sub-processed: Technical Documentation-1 (depth: 1)
|
|
478
|
+
└─ Child 3: Sub-processed: Technical Documentation-2 (depth: 1)
|
|
479
|
+
|
|
480
|
+
Root: Processed: InitialWork: data=Research Paper, depth=0
|
|
481
|
+
├─ Child 1: Sub-processed: Research Paper-0 (depth: 1)
|
|
482
|
+
├─ Child 2: Sub-processed: Research Paper-1 (depth: 1)
|
|
483
|
+
└─ Child 3: Sub-processed: Research Paper-2 (depth: 1)
|
|
484
|
+
|
|
485
|
+
Processing completed in 0.123456 seconds
|
|
486
|
+
----
|
|
487
|
+
|
|
488
|
+
== Learning Points
|
|
489
|
+
|
|
490
|
+
=== 1. Producer-Subscriber Pattern
|
|
491
|
+
|
|
492
|
+
Phase 1 produces work for Phase 2:
|
|
493
|
+
|
|
494
|
+
[source,ruby]
|
|
495
|
+
----
|
|
496
|
+
# Phase 1: Producers
|
|
497
|
+
initial_results.each do |result|
|
|
498
|
+
# Generate sub-work descriptors
|
|
499
|
+
3.times do |i|
|
|
500
|
+
sub_works << { data: "#{result.data}-#{i}", parent_id: result.work.object_id }
|
|
501
|
+
end
|
|
502
|
+
end
|
|
503
|
+
|
|
504
|
+
# Phase 2: Subscribers
|
|
505
|
+
sub_works.each do |descriptor|
|
|
506
|
+
process(SubWork.new(descriptor[:data], descriptor[:parent_id]))
|
|
507
|
+
end
|
|
508
|
+
----
|
|
509
|
+
|
|
510
|
+
**Key insight**: Producers don't directly invoke subscribers; they generate work descriptors that are queued and processed independently.
|
|
511
|
+
|
|
512
|
+
=== 2. Object ID for Parent Linking
|
|
513
|
+
|
|
514
|
+
Using `object_id` creates stable references:
|
|
515
|
+
|
|
516
|
+
[source,ruby]
|
|
517
|
+
----
|
|
518
|
+
# Capture parent ID during Phase 1
|
|
519
|
+
parent_work = InitialWork.new("Document")
|
|
520
|
+
parent_id = parent_work.object_id # e.g., 12345
|
|
521
|
+
|
|
522
|
+
# Use in Phase 2
|
|
523
|
+
child_work = SubWork.new("Section", parent_id)
|
|
524
|
+
|
|
525
|
+
# Reassemble in tree
|
|
526
|
+
result_tree[parent_id][:children] << child_result
|
|
527
|
+
----
|
|
528
|
+
|
|
529
|
+
**Why object_id works**:
|
|
530
|
+
|
|
531
|
+
* **Unique**: Each object has a unique identifier
|
|
532
|
+
* **Stable**: ID doesn't change during object lifetime
|
|
533
|
+
* **Simple**: No need for custom ID generation
|
|
534
|
+
* **Fast**: Hash lookup is O(1)
|
|
535
|
+
|
|
536
|
+
**Caveat**: Object IDs are only valid during the program's execution. For persistent storage, use custom IDs.
|
|
537
|
+
|
|
538
|
+
=== 3. Two-Phase vs. Callback Approach
|
|
539
|
+
|
|
540
|
+
**Two-Phase** (this example):
|
|
541
|
+
[source,ruby]
|
|
542
|
+
----
|
|
543
|
+
# Phase 1
|
|
544
|
+
phase1_results = run_phase1()
|
|
545
|
+
|
|
546
|
+
# Analyze and generate Phase 2 work
|
|
547
|
+
phase2_work = analyze(phase1_results)
|
|
548
|
+
|
|
549
|
+
# Phase 2
|
|
550
|
+
phase2_results = run_phase2(phase2_work)
|
|
551
|
+
----
|
|
552
|
+
|
|
553
|
+
**Callback Approach** (like pipeline_processing):
|
|
554
|
+
[source,ruby]
|
|
555
|
+
----
|
|
556
|
+
supervisor.on_new_result do |result|
|
|
557
|
+
# Immediately generate and queue sub-work
|
|
558
|
+
sub_works.each { |sw| supervisor.add_work_item(sw) }
|
|
559
|
+
end
|
|
560
|
+
----
|
|
561
|
+
|
|
562
|
+
**Choose two-phase when**:
|
|
563
|
+
|
|
564
|
+
* You need to analyze all Phase 1 results before generating Phase 2 work
|
|
565
|
+
* Phase 2 work depends on aggregated Phase 1 results
|
|
566
|
+
* You want clear separation between phases
|
|
567
|
+
|
|
568
|
+
**Choose callback when**:
|
|
569
|
+
|
|
570
|
+
* Work can be generated immediately per result
|
|
571
|
+
* No cross-result dependencies
|
|
572
|
+
* Continuous streaming processing
|
|
573
|
+
|
|
574
|
+
=== 4. Depth Limiting
|
|
575
|
+
|
|
576
|
+
Prevent infinite recursion with depth checks:
|
|
577
|
+
|
|
578
|
+
[source,ruby]
|
|
579
|
+
----
|
|
580
|
+
def create_sub_works(results)
|
|
581
|
+
results.each do |result|
|
|
582
|
+
next unless result.work.depth < MAX_DEPTH # Depth limit
|
|
583
|
+
|
|
584
|
+
# Generate sub-works
|
|
585
|
+
generate_children(result.work)
|
|
586
|
+
end
|
|
587
|
+
end
|
|
588
|
+
----
|
|
589
|
+
|
|
590
|
+
**Without depth limiting**:
|
|
591
|
+
[source]
|
|
592
|
+
----
|
|
593
|
+
Doc → Sections → Paragraphs → Sentences → Words → Characters → ...
|
|
594
|
+
(infinite recursion)
|
|
595
|
+
----
|
|
596
|
+
|
|
597
|
+
**With depth limiting (depth < 2)**:
|
|
598
|
+
[source]
|
|
599
|
+
----
|
|
600
|
+
Level 0: Documents
|
|
601
|
+
Level 1: Sections (generated)
|
|
602
|
+
Level 2: Stop (depth limit reached)
|
|
603
|
+
----
|
|
604
|
+
|
|
605
|
+
=== 5. Hierarchical Result Assembly
|
|
606
|
+
|
|
607
|
+
Build trees from flat results:
|
|
608
|
+
|
|
609
|
+
[source,ruby]
|
|
610
|
+
----
|
|
611
|
+
# Step 1: Create parent nodes
|
|
612
|
+
parents.each do |parent|
|
|
613
|
+
tree[parent.id] = { data: parent.data, children: [] }
|
|
614
|
+
end
|
|
615
|
+
|
|
616
|
+
# Step 2: Attach children to parents
|
|
617
|
+
children.each do |child|
|
|
618
|
+
tree[child.parent_id][:children] << child.data
|
|
619
|
+
end
|
|
620
|
+
|
|
621
|
+
# Result: Hierarchical structure
|
|
622
|
+
{
|
|
623
|
+
doc1_id: { data: "Doc 1", children: ["Section 1", "Section 2"] },
|
|
624
|
+
doc2_id: { data: "Doc 2", children: ["Section 3", "Section 4"] }
|
|
625
|
+
}
|
|
626
|
+
----
|
|
627
|
+
|
|
628
|
+
=== 6. Performance Characteristics
|
|
629
|
+
|
|
630
|
+
**Phase 1**:
|
|
631
|
+
[source]
|
|
632
|
+
----
|
|
633
|
+
Documents: N
|
|
634
|
+
Workers: W
|
|
635
|
+
Time: N/W (if evenly distributed)
|
|
636
|
+
----
|
|
637
|
+
|
|
638
|
+
**Phase 2**:
|
|
639
|
+
[source]
|
|
640
|
+
----
|
|
641
|
+
Sub-works: N × K (K = children per document)
|
|
642
|
+
Workers: W
|
|
643
|
+
Time: (N × K)/W
|
|
644
|
+
----
|
|
645
|
+
|
|
646
|
+
**Total Time**:
|
|
647
|
+
[source]
|
|
648
|
+
----
|
|
649
|
+
T_total = N/W + (N × K)/W
|
|
650
|
+
= N(1 + K)/W
|
|
651
|
+
|
|
652
|
+
Example: 3 docs, 3 children each, 4 workers
|
|
653
|
+
T_total = 3(1 + 3)/4 = 12/4 = 3 time units
|
|
654
|
+
----
|
|
655
|
+
|
|
656
|
+
== Use Cases and Patterns
|
|
657
|
+
|
|
658
|
+
=== Document Processing
|
|
659
|
+
|
|
660
|
+
Process documents into sections and paragraphs:
|
|
661
|
+
|
|
662
|
+
[source,ruby]
|
|
663
|
+
----
|
|
664
|
+
# Phase 1: Extract sections
|
|
665
|
+
def process_initial_work(document)
|
|
666
|
+
sections = extract_sections(document)
|
|
667
|
+
sections.each do |section|
|
|
668
|
+
sub_works << { data: section, parent_id: document.object_id }
|
|
669
|
+
end
|
|
670
|
+
end
|
|
671
|
+
|
|
672
|
+
# Phase 2: Process sections
|
|
673
|
+
def process_sub_work(section)
|
|
674
|
+
process_section_content(section)
|
|
675
|
+
end
|
|
676
|
+
----
|
|
677
|
+
|
|
678
|
+
=== Web Crawling
|
|
679
|
+
|
|
680
|
+
Crawl pages and follow links:
|
|
681
|
+
|
|
682
|
+
[source,ruby]
|
|
683
|
+
----
|
|
684
|
+
# Phase 1: Fetch page and extract links
|
|
685
|
+
def process_initial_work(url)
|
|
686
|
+
page = fetch_page(url)
|
|
687
|
+
links = extract_links(page)
|
|
688
|
+
links.each do |link|
|
|
689
|
+
sub_works << { data: link, parent_id: url.object_id, depth: url.depth + 1 }
|
|
690
|
+
end
|
|
691
|
+
end
|
|
692
|
+
|
|
693
|
+
# Phase 2: Crawl linked pages
|
|
694
|
+
def process_sub_work(link)
|
|
695
|
+
if link.depth < MAX_DEPTH
|
|
696
|
+
# Create more work (recursive crawling)
|
|
697
|
+
end
|
|
698
|
+
end
|
|
699
|
+
----
|
|
700
|
+
|
|
701
|
+
=== Directory Tree Processing
|
|
702
|
+
|
|
703
|
+
Process directories and their contents:
|
|
704
|
+
|
|
705
|
+
[source,ruby]
|
|
706
|
+
----
|
|
707
|
+
# Phase 1: List directory contents
|
|
708
|
+
def process_initial_work(directory)
|
|
709
|
+
entries = Dir.entries(directory)
|
|
710
|
+
entries.each do |entry|
|
|
711
|
+
sub_works << {
|
|
712
|
+
data: File.join(directory, entry),
|
|
713
|
+
parent_id: directory.object_id,
|
|
714
|
+
depth: directory.depth + 1
|
|
715
|
+
}
|
|
716
|
+
end
|
|
717
|
+
end
|
|
718
|
+
|
|
719
|
+
# Phase 2: Process files/subdirectories
|
|
720
|
+
def process_sub_work(path)
|
|
721
|
+
if File.directory?(path) && path.depth < MAX_DEPTH
|
|
722
|
+
# Generate more work for subdirectories
|
|
723
|
+
else
|
|
724
|
+
# Process file
|
|
725
|
+
end
|
|
726
|
+
end
|
|
727
|
+
----
|
|
728
|
+
|
|
729
|
+
=== API Data Fetching
|
|
730
|
+
|
|
731
|
+
Fetch collections and related resources:
|
|
732
|
+
|
|
733
|
+
[source,ruby]
|
|
734
|
+
----
|
|
735
|
+
# Phase 1: Fetch collection
|
|
736
|
+
def process_initial_work(collection_url)
|
|
737
|
+
collection = api_get(collection_url)
|
|
738
|
+
collection[:items].each do |item|
|
|
739
|
+
sub_works << {
|
|
740
|
+
data: item[:detail_url],
|
|
741
|
+
parent_id: collection_url.object_id
|
|
742
|
+
}
|
|
743
|
+
end
|
|
744
|
+
end
|
|
745
|
+
|
|
746
|
+
# Phase 2: Fetch individual items
|
|
747
|
+
def process_sub_work(item_url)
|
|
748
|
+
fetch_item_details(item_url)
|
|
749
|
+
end
|
|
750
|
+
----
|
|
751
|
+
|
|
752
|
+
== Advanced Patterns
|
|
753
|
+
|
|
754
|
+
=== Multi-Level Decomposition
|
|
755
|
+
|
|
756
|
+
Extend to more than two levels:
|
|
757
|
+
|
|
758
|
+
[source,ruby]
|
|
759
|
+
----
|
|
760
|
+
def process
|
|
761
|
+
current_work = [@initial_work]
|
|
762
|
+
depth = 0
|
|
763
|
+
|
|
764
|
+
while !current_work.empty? && depth < MAX_DEPTH
|
|
765
|
+
supervisor = create_supervisor
|
|
766
|
+
supervisor.add_work_items(current_work)
|
|
767
|
+
supervisor.run
|
|
768
|
+
|
|
769
|
+
# Generate next level
|
|
770
|
+
current_work = create_sub_works(supervisor.results, depth + 1)
|
|
771
|
+
depth += 1
|
|
772
|
+
|
|
773
|
+
store_results(supervisor.results, depth)
|
|
774
|
+
end
|
|
775
|
+
|
|
776
|
+
build_multi_level_tree
|
|
777
|
+
end
|
|
778
|
+
----
|
|
779
|
+
|
|
780
|
+
=== Conditional Decomposition
|
|
781
|
+
|
|
782
|
+
Generate sub-work based on content:
|
|
783
|
+
|
|
784
|
+
[source,ruby]
|
|
785
|
+
----
|
|
786
|
+
def create_sub_works(results)
|
|
787
|
+
sub_works = []
|
|
788
|
+
|
|
789
|
+
results.each do |result|
|
|
790
|
+
# Only decompose large documents
|
|
791
|
+
if result.data.size > THRESHOLD
|
|
792
|
+
# Split into smaller chunks
|
|
793
|
+
chunks = split_into_chunks(result.data)
|
|
794
|
+
chunks.each do |chunk|
|
|
795
|
+
sub_works << { data: chunk, parent_id: result.work.object_id }
|
|
796
|
+
end
|
|
797
|
+
end
|
|
798
|
+
end
|
|
799
|
+
|
|
800
|
+
sub_works
|
|
801
|
+
end
|
|
802
|
+
----
|
|
803
|
+
|
|
804
|
+
=== Fan-Out with Varying Children
|
|
805
|
+
|
|
806
|
+
Different items produce different numbers of sub-items:
|
|
807
|
+
|
|
808
|
+
[source,ruby]
|
|
809
|
+
----
|
|
810
|
+
def create_sub_works(results)
|
|
811
|
+
sub_works = []
|
|
812
|
+
|
|
813
|
+
results.each do |result|
|
|
814
|
+
# Number of children depends on content
|
|
815
|
+
num_children = calculate_optimal_split(result.data)
|
|
816
|
+
|
|
817
|
+
num_children.times do |i|
|
|
818
|
+
sub_works << {
|
|
819
|
+
data: extract_chunk(result.data, i, num_children),
|
|
820
|
+
parent_id: result.work.object_id
|
|
821
|
+
}
|
|
822
|
+
end
|
|
823
|
+
end
|
|
824
|
+
|
|
825
|
+
sub_works
|
|
826
|
+
end
|
|
827
|
+
----
|
|
828
|
+
|
|
829
|
+
=== Result Aggregation with Statistics
|
|
830
|
+
|
|
831
|
+
Collect statistics during tree assembly:
|
|
832
|
+
|
|
833
|
+
[source,ruby]
|
|
834
|
+
----
|
|
835
|
+
def build_result_tree_with_stats(initial_results, sub_results)
|
|
836
|
+
stats = { total_nodes: 0, max_depth: 0, avg_children: 0 }
|
|
71
837
|
|
|
72
|
-
|
|
838
|
+
initial_results.results.each do |result|
|
|
839
|
+
node = {
|
|
840
|
+
data: result.result[:processed_data],
|
|
841
|
+
children: [],
|
|
842
|
+
stats: { processing_time: result.processing_time }
|
|
843
|
+
}
|
|
73
844
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
4. These sub-sections are then processed in a second phase
|
|
78
|
-
5. Results from both phases are combined into a hierarchical tree structure
|
|
79
|
-
6. The final output presents documents with their processed sections
|
|
845
|
+
@result_tree[result.work.object_id] = node
|
|
846
|
+
stats[:total_nodes] += 1
|
|
847
|
+
end
|
|
80
848
|
|
|
81
|
-
|
|
849
|
+
sub_results.results.each do |result|
|
|
850
|
+
parent_id = result.result[:parent_id]
|
|
851
|
+
@result_tree[parent_id][:children] << result
|
|
852
|
+
stats[:total_nodes] += 1
|
|
853
|
+
end
|
|
82
854
|
|
|
83
|
-
|
|
855
|
+
stats[:avg_children] = sub_results.size.to_f / initial_results.size
|
|
856
|
+
stats[:max_depth] = calculate_max_depth(@result_tree)
|
|
857
|
+
|
|
858
|
+
stats
|
|
859
|
+
end
|
|
860
|
+
----
|
|
861
|
+
|
|
862
|
+
== Performance Tuning
|
|
863
|
+
|
|
864
|
+
=== Worker Allocation
|
|
865
|
+
|
|
866
|
+
Distribute workers across phases:
|
|
867
|
+
|
|
868
|
+
[source,ruby]
|
|
869
|
+
----
|
|
870
|
+
# Option 1: Same workers for both phases
|
|
871
|
+
phase1_workers = 8
|
|
872
|
+
phase2_workers = 8
|
|
873
|
+
|
|
874
|
+
# Option 2: More workers for phase with more work
|
|
875
|
+
phase1_workers = 4
|
|
876
|
+
phase2_workers = 12 # 3 sub-items per initial item
|
|
877
|
+
|
|
878
|
+
# Option 3: Adaptive based on work ratio
|
|
879
|
+
work_ratio = sub_works.size / initial_works.size
|
|
880
|
+
phase2_workers = [phase1_workers * work_ratio, MAX_WORKERS].min
|
|
881
|
+
----
|
|
882
|
+
|
|
883
|
+
=== Memory Management
|
|
884
|
+
|
|
885
|
+
Handle large result sets:
|
|
886
|
+
|
|
887
|
+
[source,ruby]
|
|
888
|
+
----
|
|
889
|
+
# Stream results instead of storing all in memory
|
|
890
|
+
def process_streaming
|
|
891
|
+
phase1_supervisor.on_result do |result|
|
|
892
|
+
# Process result immediately
|
|
893
|
+
process_result(result)
|
|
894
|
+
|
|
895
|
+
# Generate sub-works
|
|
896
|
+
create_and_queue_sub_works(result)
|
|
897
|
+
|
|
898
|
+
# Don't store in memory
|
|
899
|
+
result = nil
|
|
900
|
+
end
|
|
901
|
+
|
|
902
|
+
phase1_supervisor.run
|
|
903
|
+
phase2_supervisor.run
|
|
904
|
+
end
|
|
905
|
+
----
|
|
906
|
+
|
|
907
|
+
=== Batch Sub-Work Generation
|
|
908
|
+
|
|
909
|
+
Generate sub-works in batches:
|
|
910
|
+
|
|
911
|
+
[source,ruby]
|
|
912
|
+
----
|
|
913
|
+
def create_sub_works_batched(results, batch_size = 100)
|
|
914
|
+
results.each_slice(batch_size) do |batch|
|
|
915
|
+
batch_sub_works = []
|
|
916
|
+
|
|
917
|
+
batch.each do |result|
|
|
918
|
+
# Generate sub-works for this batch
|
|
919
|
+
batch_sub_works.concat(generate_sub_works(result))
|
|
920
|
+
end
|
|
921
|
+
|
|
922
|
+
# Process batch
|
|
923
|
+
yield batch_sub_works if block_given?
|
|
924
|
+
end
|
|
925
|
+
end
|
|
926
|
+
----
|
|
84
927
|
|
|
85
|
-
|
|
86
|
-
2. *Work Generation*: Create new work items based on first-stage results
|
|
87
|
-
3. *Second Stage Processing*: Process the generated work items
|
|
88
|
-
4. *Result Aggregation*: Combine results from both stages into a cohesive structure
|
|
928
|
+
== Next Steps
|
|
89
929
|
|
|
90
|
-
|
|
930
|
+
After understanding producer-subscriber, explore:
|
|
91
931
|
|
|
92
|
-
|
|
932
|
+
* **link:../scatter_gather/README.adoc[Scatter-Gather]**: Dynamic work distribution and collection
|
|
933
|
+
* **link:../pipeline_processing/README.adoc[Pipeline Processing]**: Sequential stage transformations
|
|
934
|
+
* **link:../hierarchical_hasher/README.adoc[Hierarchical Hasher]**: Map-reduce with hierarchies
|
|
935
|
+
* **link:../workflow/README.adoc[Workflow System]**: Complex orchestration patterns
|