llm-rsa 0.0.2__tar.gz → 0.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llm_rsa-0.0.3/PKG-INFO ADDED
@@ -0,0 +1,287 @@
1
+ Metadata-Version: 2.4
2
+ Name: llm_rsa
3
+ Version: 0.0.3
4
+ Summary: Recursive self aggregation
5
+ Home-page: https://github.com/risheekkumarb/llm_rsa
6
+ Author: Risheek kumar B
7
+ Author-email: b.risheekkumar@gmail.com
8
+ License: Apache-2.0
9
+ Keywords: nbdev jupyter notebook python
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Natural Language :: English
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Requires-Python: >=3.9
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Requires-Dist: fastcore
22
+ Requires-Dist: litellm
23
+ Requires-Dist: fastprogress
24
+ Provides-Extra: dev
25
+ Dynamic: author
26
+ Dynamic: author-email
27
+ Dynamic: classifier
28
+ Dynamic: description
29
+ Dynamic: description-content-type
30
+ Dynamic: home-page
31
+ Dynamic: keywords
32
+ Dynamic: license
33
+ Dynamic: license-file
34
+ Dynamic: provides-extra
35
+ Dynamic: requires-dist
36
+ Dynamic: requires-python
37
+ Dynamic: summary
38
+
39
+ # RSA - Recursive Self-Aggregation
40
+
41
+
42
+ <!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
43
+
44
+ RSA implements Recursive Self-Aggregation, a technique for improving LLM
45
+ responses by generating multiple candidate answers and iteratively
46
+ aggregating them. The algorithm samples k candidates from a pool of M
47
+ responses, asks the LLM to synthesize an improved answer, and repeats
48
+ this process across multiple loops to converge on higher-quality
49
+ outputs.
50
+
51
+ ## Developer Guide
52
+
53
+ If you are new to using `nbdev` here are some useful pointers to get you
54
+ started.
55
+
56
+ ### Install in Development mode
57
+
58
+ ``` sh
59
+ # make sure package is installed in development mode
60
+ $ pip install -e .
61
+
62
+ # make changes under nbs/ directory
63
+ # ...
64
+
65
+ # compile to have changes apply to
66
+ $ nbdev_prepare
67
+ ```
68
+
69
+ ## Usage
70
+
71
+ ### Installation
72
+
73
+ Install latest from the GitHub
74
+ [repository](https://github.com/risheekkumarb/llm_rsa):
75
+
76
+ ``` sh
77
+ $ pip install git+https://github.com//.git
78
+ ```
79
+
80
+ or from [conda](https://anaconda.org/risheekkumarb/llm_rsa)
81
+
82
+ ``` sh
83
+ $ conda install -c
84
+ ```
85
+
86
+ or from [pypi](https://pypi.org/project/llm_rsa)
87
+
88
+ ``` sh
89
+ $ pip install
90
+ ```
91
+
92
+ ### Documentation
93
+
94
+ Documentation can be found hosted on this GitHub
95
+ [repository](https://github.com/risheekkumarb/llm_rsa)’s
96
+ [pages](https://risheekkumarb.github.io/llm_rsa/). Additionally you can
97
+ find package manager specific guidelines on
98
+ [conda](https://anaconda.org/risheekkumarb/llm_rsa) and
99
+ [pypi](https://pypi.org/project/llm_rsa) respectively.
100
+
101
+ ## How to use
102
+
103
+ ### Basic Usage
104
+
105
+ Create an RSA instance with your task prompt and call it to run the
106
+ aggregation:
107
+
108
+ ``` python
109
+ task_prompt = '''Three people check into a hotel room that costs $30. They each contribute $10.
110
+ Later, the manager realizes the room only costs $25 and gives $5 to the bellboy to return.
111
+ The bellboy keeps $2 and gives $1 back to each person.
112
+ So each person paid $9 (total $27), plus the bellboy has $2, which equals $29.
113
+ Where did the extra dollar go?'''
114
+ ```
115
+
116
+ ``` python
117
+ agg_prompt = """Below is a reasoning problem followed by several candidate solutions.
118
+ Your job is to:
119
+ 1. Carefully analyze each candidate's reasoning step-by-step
120
+ 2. Identify which candidates make logical errors or arithmetic mistakes
121
+ 3. Note which approaches lead to correct reasoning
122
+ 4. Synthesize the best reasoning into a single, clear, correct solution
123
+
124
+ Show your work step-by-step, then state your final answer clearly."""
125
+ ```
126
+
127
+ ``` python
128
+ from llm_rsa.core import RSA
129
+
130
+ # Create RSA instance with a reasoning task
131
+ rsa = RSA(
132
+ task_prompt=task_prompt,
133
+ agg_prompt=agg_prompt,
134
+ N=4,
135
+ K=2,
136
+ loops=2
137
+ )
138
+
139
+ # Run the aggregation
140
+ results = rsa.run()
141
+ print(f"Generated {len(rsa.history)} total candidates across {rsa.loops} loops")
142
+ print('llm response: \n', results[-1].response)
143
+ ```
144
+
145
+ <style>
146
+ progress { appearance: none; border: none; border-radius: 4px; width: 300px;
147
+ height: 20px; vertical-align: middle; background: #e0e0e0; }
148
+ &#10; progress::-webkit-progress-bar { background: #e0e0e0; border-radius: 4px; }
149
+ progress::-webkit-progress-value { background: #2196F3; border-radius: 4px; }
150
+ progress::-moz-progress-bar { background: #2196F3; border-radius: 4px; }
151
+ &#10; progress:not([value]) {
152
+ background: repeating-linear-gradient(45deg, #7e7e7e, #7e7e7e 10px, #5c5c5c 10px, #5c5c5c 20px); }
153
+ &#10; progress.progress-bar-interrupted::-webkit-progress-value { background: #F44336; }
154
+ progress.progress-bar-interrupted::-moz-progress-value { background: #F44336; }
155
+ progress.progress-bar-interrupted::-webkit-progress-bar { background: #F44336; }
156
+ progress.progress-bar-interrupted::-moz-progress-bar { background: #F44336; }
157
+ progress.progress-bar-interrupted { background: #F44336; }
158
+ &#10; table.fastprogress { border-collapse: collapse; margin: 1em 0; font-size: 0.9em; }
159
+ table.fastprogress th, table.fastprogress td { padding: 8px 12px; border: 1px solid #ddd; text-align: left; }
160
+ table.fastprogress thead tr { background: #f8f9fa; font-weight: bold; }
161
+ table.fastprogress tbody tr:nth-of-type(even) { background: #f8f9fa; }
162
+ </style>
163
+
164
+ ``` html
165
+ <div>
166
+ <progress max="2" value="2"></progress> 100.00% [2/2 00:20&lt;00:00... Loop 2]</div>
167
+ ```
168
+
169
+ Generated 8 total candidates across 2 loops
170
+ llm response:
171
+ ### Analysis of Candidate Reasoning
172
+
173
+ Both **Candidate 1** and **Candidate 2** provide excellent, accurate explanations of the "missing dollar" riddle.
174
+
175
+ 1. **Candidate 1 Analysis:** This candidate correctly identifies the logical fallacy of adding the bellboy's kept money to the amount spent by the guests. They provide a clear "Follow the Money" breakdown showing that the $30 is distributed as $25 (hotel), $2 (bellboy), and $3 (guests). They correctly state that the $27 spent by the guests *already includes* the $2 held by the bellboy.
176
+ 2. **Candidate 2 Analysis:** This candidate identifies the "false premise" and "incorrect logic" of the riddle. Like Candidate 1, they break down the $30 correctly and explain that the riddle incorrectly adds a "cost" ($27) to a "profit" ($2) instead of adding the "cost" ($27) to the "refund" ($3).
177
+
178
+ Both candidates conclude that no money is actually missing and that the riddle is based on an arithmetic trick.
179
+
180
+ ---
181
+
182
+ ### Synthetic Correct Solution
183
+
184
+ The "missing dollar" is a result of a misleading calculation. To resolve the mystery, we must track the money accurately using two different perspectives: **where the money is now** and **the total amount spent vs. kept.**
185
+
186
+ #### 1. Where is the money now? (The $30 Breakdown)
187
+ The original $30 can be accounted for by looking at who currently holds the cash:
188
+ * **$25:** Held by the hotel (the actual cost of the room).
189
+ * **$2:** Held by the bellboy (the amount he kept).
190
+ * **$3:** Held by the three guests ($1 each in their pockets).
191
+ * **Total: $25 + $2 + $3 = $30.**
192
+ Nothing is missing.
193
+
194
+ #### 2. The Fallacy in the Riddle
195
+ The riddle states: *"Each person paid $9 (total $27), plus the bellboy has $2, which equals $29."*
196
+ This is logically incorrect because it **double-counts** the bellboy's money.
197
+
198
+ * **The Net Payment:** The guests spent a total of **$27**.
199
+ * **The Destination of that payment:** Of that $27, **$25** went to the hotel and **$2** went to the bellboy.
200
+ * **The Calculation:** Adding the $2 to the $27 is nonsensical because the $2 is *already part* of the $27.
201
+
202
+ To reach the original $30, you must add the money the guests **kept** to the money they **spent**:
203
+ **$27 (Spent) + $3 (Returned to them) = $30.**
204
+
205
+ ### Final Answer
206
+ The dollar did not go anywhere. The riddle creates an illusion by adding the bellboy's $2 to the $27 spent, when it should be subtracting the $2 from the $27 to find the hotel’s $25, or adding the $3 refund to the $27 to find the original $30.
207
+
208
+ ``` python
209
+ from pydantic import BaseModel
210
+ class Answer(BaseModel):
211
+ answer: str
212
+ confidence: float
213
+
214
+ prompt, response = rsa.aggregate(response_model=Answer)
215
+ print(response)
216
+ ```
217
+
218
+ {"answer":"The mystery of the missing dollar is caused by a logical fallacy known as misdirection. The riddle incorrectly adds the bellboy's $2 to the $27 paid by the guests, creating a mathematically irrelevant number ($29). To solve the puzzle, we simply need to track the original $30 using two balance methods:\n\n1. The Distribution Method (Where is the money now?):\n- $25 is in the hotel's register.\n- $2 is in the bellboy's pocket.\n- $3 is in the guests' pockets ($1 each).\n- Total: $25 + $2 + $3 = $30. \nEverything is accounted for.\n\n2. The Net Expenditure Method (What did the guests pay?):\nThe guests paid $30 and got $3 back, meaning they spent exactly $27. \n- $25 of that $27 went to the hotel room cost.\n- $2 of that $27 went to the bellboy as a tip.\n- Total: $25 + $2 = $27.\n\nThe error in the riddle is adding the $2 to the $27. Because the $2 is already part of the $27, adding them together double-counts the bellboy's tip. To get back to the original $30, you must add the $27 spent to the $3 refund ($27 + $3 = $30). There is no missing dollar.","confidence":1.0}
219
+
220
+ ``` python
221
+ from litellm import completion
222
+
223
+ # Single direct call (baseline)
224
+ response = completion(
225
+ model='openrouter/google/gemini-3-flash-preview',
226
+ messages=[{"role": "user", "content": task_prompt}],
227
+ temperature=1.0
228
+ )
229
+ baseline_answer = response.choices[0].message.content
230
+ print("=== BASELINE (single call) ===")
231
+ print(baseline_answer)
232
+ ```
233
+
234
+ === BASELINE (single call) ===
235
+ This is a classic riddle that relies on a **logical fallacy**—specifically, an error in how the numbers are added together at the end.
236
+
237
+ The "lost" dollar doesn't exist; it only appears to be missing because the math at the end of the story adds two numbers that should actually be **subtracted**.
238
+
239
+ Here is the correct breakdown of the money:
240
+
241
+ ### 1. Follow the Money
242
+ Instead of adding the bellboy's tip to the guests' payment, look at where the original $30 is at the very end:
243
+ * **$25** is in the hotel cash register.
244
+ * **$2** is in the bellboy's pocket.
245
+ * **$3** was returned to the guests ($1 each).
246
+ * **Total: $25 + $2 + $3 = $30.** (The math is perfect).
247
+
248
+ ### 2. The Flaw in the Riddle
249
+ The riddle says: *"Each person paid $9 (total $27), plus the bellboy has $2, which equals $29."*
250
+
251
+ **The error is adding the $2 to the $27.**
252
+ The $27 that the guests spent **already includes** the $2 that the bellboy took.
253
+
254
+ Think of it this way:
255
+ * The guests paid **$27**.
256
+ * Where did that $27 go? **$25** went to the hotel and **$2** went to the bellboy.
257
+ * To reach the original $30, you should add the **$3** they got back, not the $2 the bellboy kept.
258
+
259
+ **The correct equation is:**
260
+ $27 (Paid) + $3 (Refund) = $30.
261
+ *OR*
262
+ $27 (Paid) - $2 (Bellboy's Tip) = $25 (Room Cost).
263
+
264
+ ### Configuration Options
265
+
266
+ | Parameter | Default | Description |
267
+ |----|----|----|
268
+ | `task_prompt` | (required) | The main task/question to solve |
269
+ | `model` | `'openrouter/google/gemini-3-flash-preview'` | LLM model to use (any litellm-compatible model) |
270
+ | `N` | 4 | Population size (candidates per loop) |
271
+ | `K` | 3 | Number of candidates to aggregate |
272
+ | `loops` | 2 | Number of aggregation iterations |
273
+ | `temperature` | 1.0 | LLM sampling temperature |
274
+ | `n_workers` | 4 | Parallel workers for LLM calls |
275
+ | `agg_prompt` | (auto) | Custom aggregation prompt (optional) |
276
+
277
+ ### How RSA Works
278
+
279
+ 1. **Loop 0**: Generate N independent responses to the task prompt
280
+ 2. **Loop 1+**: For each of N new candidates, randomly sample K
281
+ previous candidates and ask the LLM to aggregate them into an
282
+ improved answer
283
+ 3. **Repeat** for the specified number of loops
284
+ 4. **Return** the final pool of aggregated candidates
285
+
286
+ The `history` attribute stores all candidates across all loops, allowing
287
+ you to trace the aggregation process.
@@ -0,0 +1,249 @@
1
+ # RSA - Recursive Self-Aggregation
2
+
3
+
4
+ <!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
5
+
6
+ RSA implements Recursive Self-Aggregation, a technique for improving LLM
7
+ responses by generating multiple candidate answers and iteratively
8
+ aggregating them. The algorithm samples k candidates from a pool of M
9
+ responses, asks the LLM to synthesize an improved answer, and repeats
10
+ this process across multiple loops to converge on higher-quality
11
+ outputs.
12
+
13
+ ## Developer Guide
14
+
15
+ If you are new to using `nbdev` here are some useful pointers to get you
16
+ started.
17
+
18
+ ### Install in Development mode
19
+
20
+ ``` sh
21
+ # make sure package is installed in development mode
22
+ $ pip install -e .
23
+
24
+ # make changes under nbs/ directory
25
+ # ...
26
+
27
+ # compile to have changes apply to
28
+ $ nbdev_prepare
29
+ ```
30
+
31
+ ## Usage
32
+
33
+ ### Installation
34
+
35
+ Install latest from the GitHub
36
+ [repository](https://github.com/risheekkumarb/llm_rsa):
37
+
38
+ ``` sh
39
+ $ pip install git+https://github.com//.git
40
+ ```
41
+
42
+ or from [conda](https://anaconda.org/risheekkumarb/llm_rsa)
43
+
44
+ ``` sh
45
+ $ conda install -c
46
+ ```
47
+
48
+ or from [pypi](https://pypi.org/project/llm_rsa)
49
+
50
+ ``` sh
51
+ $ pip install
52
+ ```
53
+
54
+ ### Documentation
55
+
56
+ Documentation can be found hosted on this GitHub
57
+ [repository](https://github.com/risheekkumarb/llm_rsa)’s
58
+ [pages](https://risheekkumarb.github.io/llm_rsa/). Additionally you can
59
+ find package manager specific guidelines on
60
+ [conda](https://anaconda.org/risheekkumarb/llm_rsa) and
61
+ [pypi](https://pypi.org/project/llm_rsa) respectively.
62
+
63
+ ## How to use
64
+
65
+ ### Basic Usage
66
+
67
+ Create an RSA instance with your task prompt and call it to run the
68
+ aggregation:
69
+
70
+ ``` python
71
+ task_prompt = '''Three people check into a hotel room that costs $30. They each contribute $10.
72
+ Later, the manager realizes the room only costs $25 and gives $5 to the bellboy to return.
73
+ The bellboy keeps $2 and gives $1 back to each person.
74
+ So each person paid $9 (total $27), plus the bellboy has $2, which equals $29.
75
+ Where did the extra dollar go?'''
76
+ ```
77
+
78
+ ``` python
79
+ agg_prompt = """Below is a reasoning problem followed by several candidate solutions.
80
+ Your job is to:
81
+ 1. Carefully analyze each candidate's reasoning step-by-step
82
+ 2. Identify which candidates make logical errors or arithmetic mistakes
83
+ 3. Note which approaches lead to correct reasoning
84
+ 4. Synthesize the best reasoning into a single, clear, correct solution
85
+
86
+ Show your work step-by-step, then state your final answer clearly."""
87
+ ```
88
+
89
+ ``` python
90
+ from llm_rsa.core import RSA
91
+
92
+ # Create RSA instance with a reasoning task
93
+ rsa = RSA(
94
+ task_prompt=task_prompt,
95
+ agg_prompt=agg_prompt,
96
+ N=4,
97
+ K=2,
98
+ loops=2
99
+ )
100
+
101
+ # Run the aggregation
102
+ results = rsa.run()
103
+ print(f"Generated {len(rsa.history)} total candidates across {rsa.loops} loops")
104
+ print('llm response: \n', results[-1].response)
105
+ ```
106
+
107
+ <style>
108
+ progress { appearance: none; border: none; border-radius: 4px; width: 300px;
109
+ height: 20px; vertical-align: middle; background: #e0e0e0; }
110
+ &#10; progress::-webkit-progress-bar { background: #e0e0e0; border-radius: 4px; }
111
+ progress::-webkit-progress-value { background: #2196F3; border-radius: 4px; }
112
+ progress::-moz-progress-bar { background: #2196F3; border-radius: 4px; }
113
+ &#10; progress:not([value]) {
114
+ background: repeating-linear-gradient(45deg, #7e7e7e, #7e7e7e 10px, #5c5c5c 10px, #5c5c5c 20px); }
115
+ &#10; progress.progress-bar-interrupted::-webkit-progress-value { background: #F44336; }
116
+ progress.progress-bar-interrupted::-moz-progress-value { background: #F44336; }
117
+ progress.progress-bar-interrupted::-webkit-progress-bar { background: #F44336; }
118
+ progress.progress-bar-interrupted::-moz-progress-bar { background: #F44336; }
119
+ progress.progress-bar-interrupted { background: #F44336; }
120
+ &#10; table.fastprogress { border-collapse: collapse; margin: 1em 0; font-size: 0.9em; }
121
+ table.fastprogress th, table.fastprogress td { padding: 8px 12px; border: 1px solid #ddd; text-align: left; }
122
+ table.fastprogress thead tr { background: #f8f9fa; font-weight: bold; }
123
+ table.fastprogress tbody tr:nth-of-type(even) { background: #f8f9fa; }
124
+ </style>
125
+
126
+ ``` html
127
+ <div>
128
+ <progress max="2" value="2"></progress> 100.00% [2/2 00:20&lt;00:00... Loop 2]</div>
129
+ ```
130
+
131
+ Generated 8 total candidates across 2 loops
132
+ llm response:
133
+ ### Analysis of Candidate Reasoning
134
+
135
+ Both **Candidate 1** and **Candidate 2** provide excellent, accurate explanations of the "missing dollar" riddle.
136
+
137
+ 1. **Candidate 1 Analysis:** This candidate correctly identifies the logical fallacy of adding the bellboy's kept money to the amount spent by the guests. They provide a clear "Follow the Money" breakdown showing that the $30 is distributed as $25 (hotel), $2 (bellboy), and $3 (guests). They correctly state that the $27 spent by the guests *already includes* the $2 held by the bellboy.
138
+ 2. **Candidate 2 Analysis:** This candidate identifies the "false premise" and "incorrect logic" of the riddle. Like Candidate 1, they break down the $30 correctly and explain that the riddle incorrectly adds a "cost" ($27) to a "profit" ($2) instead of adding the "cost" ($27) to the "refund" ($3).
139
+
140
+ Both candidates conclude that no money is actually missing and that the riddle is based on an arithmetic trick.
141
+
142
+ ---
143
+
144
+ ### Synthetic Correct Solution
145
+
146
+ The "missing dollar" is a result of a misleading calculation. To resolve the mystery, we must track the money accurately using two different perspectives: **where the money is now** and **the total amount spent vs. kept.**
147
+
148
+ #### 1. Where is the money now? (The $30 Breakdown)
149
+ The original $30 can be accounted for by looking at who currently holds the cash:
150
+ * **$25:** Held by the hotel (the actual cost of the room).
151
+ * **$2:** Held by the bellboy (the amount he kept).
152
+ * **$3:** Held by the three guests ($1 each in their pockets).
153
+ * **Total: $25 + $2 + $3 = $30.**
154
+ Nothing is missing.
155
+
156
+ #### 2. The Fallacy in the Riddle
157
+ The riddle states: *"Each person paid $9 (total $27), plus the bellboy has $2, which equals $29."*
158
+ This is logically incorrect because it **double-counts** the bellboy's money.
159
+
160
+ * **The Net Payment:** The guests spent a total of **$27**.
161
+ * **The Destination of that payment:** Of that $27, **$25** went to the hotel and **$2** went to the bellboy.
162
+ * **The Calculation:** Adding the $2 to the $27 is nonsensical because the $2 is *already part* of the $27.
163
+
164
+ To reach the original $30, you must add the money the guests **kept** to the money they **spent**:
165
+ **$27 (Spent) + $3 (Returned to them) = $30.**
166
+
167
+ ### Final Answer
168
+ The dollar did not go anywhere. The riddle creates an illusion by adding the bellboy's $2 to the $27 spent, when it should be subtracting the $2 from the $27 to find the hotel’s $25, or adding the $3 refund to the $27 to find the original $30.
169
+
170
+ ``` python
171
+ from pydantic import BaseModel
172
+ class Answer(BaseModel):
173
+ answer: str
174
+ confidence: float
175
+
176
+ prompt, response = rsa.aggregate(response_model=Answer)
177
+ print(response)
178
+ ```
179
+
180
+ {"answer":"The mystery of the missing dollar is caused by a logical fallacy known as misdirection. The riddle incorrectly adds the bellboy's $2 to the $27 paid by the guests, creating a mathematically irrelevant number ($29). To solve the puzzle, we simply need to track the original $30 using two balance methods:\n\n1. The Distribution Method (Where is the money now?):\n- $25 is in the hotel's register.\n- $2 is in the bellboy's pocket.\n- $3 is in the guests' pockets ($1 each).\n- Total: $25 + $2 + $3 = $30. \nEverything is accounted for.\n\n2. The Net Expenditure Method (What did the guests pay?):\nThe guests paid $30 and got $3 back, meaning they spent exactly $27. \n- $25 of that $27 went to the hotel room cost.\n- $2 of that $27 went to the bellboy as a tip.\n- Total: $25 + $2 = $27.\n\nThe error in the riddle is adding the $2 to the $27. Because the $2 is already part of the $27, adding them together double-counts the bellboy's tip. To get back to the original $30, you must add the $27 spent to the $3 refund ($27 + $3 = $30). There is no missing dollar.","confidence":1.0}
181
+
182
+ ``` python
183
+ from litellm import completion
184
+
185
+ # Single direct call (baseline)
186
+ response = completion(
187
+ model='openrouter/google/gemini-3-flash-preview',
188
+ messages=[{"role": "user", "content": task_prompt}],
189
+ temperature=1.0
190
+ )
191
+ baseline_answer = response.choices[0].message.content
192
+ print("=== BASELINE (single call) ===")
193
+ print(baseline_answer)
194
+ ```
195
+
196
+ === BASELINE (single call) ===
197
+ This is a classic riddle that relies on a **logical fallacy**—specifically, an error in how the numbers are added together at the end.
198
+
199
+ The "lost" dollar doesn't exist; it only appears to be missing because the math at the end of the story adds two numbers that should actually be **subtracted**.
200
+
201
+ Here is the correct breakdown of the money:
202
+
203
+ ### 1. Follow the Money
204
+ Instead of adding the bellboy's tip to the guests' payment, look at where the original $30 is at the very end:
205
+ * **$25** is in the hotel cash register.
206
+ * **$2** is in the bellboy's pocket.
207
+ * **$3** was returned to the guests ($1 each).
208
+ * **Total: $25 + $2 + $3 = $30.** (The math is perfect).
209
+
210
+ ### 2. The Flaw in the Riddle
211
+ The riddle says: *"Each person paid $9 (total $27), plus the bellboy has $2, which equals $29."*
212
+
213
+ **The error is adding the $2 to the $27.**
214
+ The $27 that the guests spent **already includes** the $2 that the bellboy took.
215
+
216
+ Think of it this way:
217
+ * The guests paid **$27**.
218
+ * Where did that $27 go? **$25** went to the hotel and **$2** went to the bellboy.
219
+ * To reach the original $30, you should add the **$3** they got back, not the $2 the bellboy kept.
220
+
221
+ **The correct equation is:**
222
+ $27 (Paid) + $3 (Refund) = $30.
223
+ *OR*
224
+ $27 (Paid) - $2 (Bellboy's Tip) = $25 (Room Cost).
225
+
226
+ ### Configuration Options
227
+
228
+ | Parameter | Default | Description |
229
+ |----|----|----|
230
+ | `task_prompt` | (required) | The main task/question to solve |
231
+ | `model` | `'openrouter/google/gemini-3-flash-preview'` | LLM model to use (any litellm-compatible model) |
232
+ | `N` | 4 | Population size (candidates per loop) |
233
+ | `K` | 3 | Number of candidates to aggregate |
234
+ | `loops` | 2 | Number of aggregation iterations |
235
+ | `temperature` | 1.0 | LLM sampling temperature |
236
+ | `n_workers` | 4 | Parallel workers for LLM calls |
237
+ | `agg_prompt` | (auto) | Custom aggregation prompt (optional) |
238
+
239
+ ### How RSA Works
240
+
241
+ 1. **Loop 0**: Generate N independent responses to the task prompt
242
+ 2. **Loop 1+**: For each of N new candidates, randomly sample K
243
+ previous candidates and ask the LLM to aggregate them into an
244
+ improved answer
245
+ 3. **Repeat** for the specified number of loops
246
+ 4. **Return** the final pool of aggregated candidates
247
+
248
+ The `history` attribute stores all candidates across all loops, allowing
249
+ you to trace the aggregation process.
@@ -43,12 +43,14 @@ class RSA:
43
43
 
44
44
  # %% ../nbs/00_core.ipynb #3c39e9e6
45
45
  @patch
46
- def _call_llm(self:RSA, prompt):
46
+ def _call_llm(self:RSA, prompt, **kwargs):
47
47
  "Call the LLM with the given prompt and return the response content"
48
48
  response = completion(
49
49
  model=self.model,
50
50
  messages=[{"role": "user", "content": prompt}],
51
51
  temperature=self.temperature,
52
+ num_retries=3,
53
+ **kwargs
52
54
  )
53
55
  return response.choices[0].message.content
54
56
 
@@ -97,11 +99,13 @@ def run(self:RSA):
97
99
  self.history.extend(pool)
98
100
  return pool
99
101
 
100
- # %% ../nbs/00_core.ipynb #b4edf71c
102
+ # %% ../nbs/00_core.ipynb #72cb9b61
101
103
  @patch
102
- def aggregate(self:RSA):
103
- "Final aggregation one LLM call to aggregate all the final loop candidates"
104
+ def aggregate(self:RSA, agg_prompt=None, response_model=None):
105
+ "Final aggregation: one LLM call to aggregate all final loop candidates, with optional structured output"
106
+ agg_prompt = agg_prompt or self.agg_prompt
104
107
  candidates = self.history.filter(lambda x: x.loop_id==(self.loops-1))
105
108
  responses = '\n'.join(f"---- Candidate {i+1} ----\n{c.response}" for i, c in enumerate(candidates))
106
- prompt = f"{self.agg_prompt}\n{self.task_prompt}\n\nCANDIDATE ANSWERS:\n{responses}\n\nProvide the best aggregated answer:"
107
- return prompt, self._call_llm(prompt)
109
+ prompt = f"{agg_prompt}\n{self.task_prompt}\n\nCANDIDATE ANSWERS:\n{responses}\n\nProvide the best aggregated answer:"
110
+ result = self._call_llm(prompt, **({'response_format': response_model} if response_model else {}))
111
+ return prompt, result