reasoning-benchmarks 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ recursive-include reasoning_tasks/src *.csv
@@ -0,0 +1,58 @@
1
+ Metadata-Version: 2.1
2
+ Name: reasoning-benchmarks
3
+ Version: 0.1.0
4
+ Summary: Generate argumentation reasoning benchmarks and prompts.
5
+ Classifier: Programming Language :: Python :: 3
6
+ Classifier: Programming Language :: Python :: 3.9
7
+ Classifier: Programming Language :: Python :: 3.10
8
+ Classifier: Programming Language :: Python :: 3.11
9
+ Classifier: Programming Language :: Python :: 3.12
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: Intended Audience :: Science/Research
12
+ Requires-Python: >=3.9
13
+ Description-Content-Type: text/markdown
14
+ Requires-Dist: pandas
15
+ Requires-Dist: matplotlib
16
+ Requires-Dist: networkx
17
+
18
+ # reasoning-benchmarks
19
+
20
+ `reasoning-benchmarks` generates structured argumentation benchmarks for evaluating reasoning systems.
21
+
22
+ ## Features
23
+
24
+ - Generate chain and star argument graphs.
25
+ - Create prompt text for binary and multi-label reasoning tasks.
26
+ - Export benchmark datasets directly to CSV.
27
+
28
+ ## Installation
29
+
30
+ ```bash
31
+ pip install reasoning-benchmarks
32
+ ```
33
+
34
+ ## Quickstart
35
+
36
+ ```python
37
+ from reasoning_benchmarks import generate_benchmark
38
+
39
+ benchmark = generate_benchmark(
40
+ chain_range=(1, 5),
41
+ star_range=(3, 6),
42
+ shuffled="both",
43
+ percentage_irrelevant=0,
44
+ attack_words=["not credible", "unreliable", "lying"],
45
+ num_variations_per_graph=(1, 1),
46
+ seed=95,
47
+ )
48
+
49
+ print(benchmark.head())
50
+ ```
51
+
52
+ ## Development Build
53
+
54
+ ```bash
55
+ python -m build
56
+ ```
57
+
58
+ The package includes CSV resources under `reasoning_benchmarks/src` and accesses them via `importlib.resources`.
@@ -0,0 +1,41 @@
1
+ # reasoning-benchmarks
2
+
3
+ `reasoning-benchmarks` generates structured argumentation benchmarks for evaluating reasoning systems.
4
+
5
+ ## Features
6
+
7
+ - Generate chain and star argument graphs.
8
+ - Create prompt text for binary and multi-label reasoning tasks.
9
+ - Export benchmark datasets directly to CSV.
10
+
11
+ ## Installation
12
+
13
+ ```bash
14
+ pip install reasoning-benchmarks
15
+ ```
16
+
17
+ ## Quickstart
18
+
19
+ ```python
20
+ from reasoning_benchmarks import generate_benchmark
21
+
22
+ benchmark = generate_benchmark(
23
+ chain_range=(1, 5),
24
+ star_range=(3, 6),
25
+ shuffled="both",
26
+ percentage_irrelevant=0,
27
+ attack_words=["not credible", "unreliable", "lying"],
28
+ num_variations_per_graph=(1, 1),
29
+ seed=95,
30
+ )
31
+
32
+ print(benchmark.head())
33
+ ```
34
+
35
+ ## Development Build
36
+
37
+ ```bash
38
+ python -m build
39
+ ```
40
+
41
+ The package includes CSV resources under `reasoning_benchmarks/src` and accesses them via `importlib.resources`.
@@ -0,0 +1,3 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61", "wheel"]
3
+ build-backend = "setuptools.build_meta"
@@ -0,0 +1 @@
1
+ from .benchmarkgenerator import *
@@ -0,0 +1,358 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Callable, Iterable
5
+
6
+
7
+ @dataclass(eq=False)
8
+ class Argument:
9
+ """Represents one argument node in an argumentation framework."""
10
+
11
+ ID: int
12
+ description: str = ""
13
+ value: bool | None = None
14
+
15
+
16
+ class ArgumentGraph:
17
+ """Directed attack graph for abstract argumentation.
18
+
19
+ Nodes are arguments, edges are attacks (attacker -> target).
20
+ """
21
+
22
+ def __init__(self) -> None:
23
+ self.arguments: list[Argument] = []
24
+ self.attacks: list[tuple[Argument, Argument]] = []
25
+
26
+ def __repr__(self) -> str:
27
+ return f"ArgumentGraph(arguments={self.arguments}, attacks={self.attacks})"
28
+
29
+ def add_argument(self, argument: Argument) -> None:
30
+ """Add an argument node."""
31
+ if any(existing.ID == argument.ID for existing in self.arguments):
32
+ raise ValueError(f"Argument ID {argument.ID} already exists.")
33
+ self.arguments.append(argument)
34
+
35
+ def get_argument(self, argument_id: int) -> Argument:
36
+ """Return an argument by ID."""
37
+ for argument in self.arguments:
38
+ if argument.ID == argument_id:
39
+ return argument
40
+ raise KeyError(f"Argument ID {argument_id} does not exist.")
41
+
42
+ def add_attack(self, attacker: Argument | int, target: Argument | int) -> None:
43
+ """Add a directed attack edge.
44
+
45
+ Parameters can be Argument instances or argument IDs.
46
+ """
47
+ attacker_arg = self._resolve_argument(attacker)
48
+ target_arg = self._resolve_argument(target)
49
+ self.attacks.append((attacker_arg, target_arg))
50
+
51
+ def _resolve_argument(self, argument: Argument | int) -> Argument:
52
+ if isinstance(argument, Argument):
53
+ return argument
54
+ return self.get_argument(argument)
55
+
56
+ def solve(self) -> dict[str, list[Argument]]:
57
+ """Compute labels using grounded-style propagation.
58
+
59
+ Rules:
60
+ - Start with unattacked arguments as accepted.
61
+ - Arguments attacked by accepted arguments are rejected.
62
+ - Arguments with all attackers rejected become accepted.
63
+ - Remaining arguments are undecided.
64
+ """
65
+ incoming = {arg: set() for arg in self.arguments}
66
+ outgoing = {arg: set() for arg in self.arguments}
67
+
68
+ for attacker, target in self.attacks:
69
+ if attacker in outgoing and target in incoming:
70
+ outgoing[attacker].add(target)
71
+ incoming[target].add(attacker)
72
+
73
+ accepted = {arg for arg in self.arguments if len(incoming[arg]) == 0}
74
+ rejected: set[Argument] = set()
75
+
76
+ changed = True
77
+ while changed:
78
+ changed = False
79
+
80
+ newly_rejected = {
81
+ target
82
+ for accepted_arg in accepted
83
+ for target in outgoing[accepted_arg]
84
+ } - rejected
85
+ if newly_rejected:
86
+ rejected |= newly_rejected
87
+ changed = True
88
+
89
+ newly_accepted = {
90
+ arg
91
+ for arg in self.arguments
92
+ if arg not in accepted and arg not in rejected
93
+ and incoming[arg]
94
+ and incoming[arg].issubset(rejected)
95
+ }
96
+ if newly_accepted:
97
+ accepted |= newly_accepted
98
+ changed = True
99
+
100
+ undecided = set(self.arguments) - accepted - rejected
101
+
102
+ for arg in self.arguments:
103
+ if arg in accepted:
104
+ arg.value = True
105
+ elif arg in rejected:
106
+ arg.value = False
107
+ else:
108
+ arg.value = None
109
+
110
+ return {
111
+ "accepted": [arg for arg in self.arguments if arg in accepted],
112
+ "rejected": [arg for arg in self.arguments if arg in rejected],
113
+ "undecided": [arg for arg in self.arguments if arg in undecided],
114
+ }
115
+
116
+ def _build_degree_maps(self) -> tuple[
117
+ dict[Argument, list[Argument]],
118
+ dict[Argument, list[Argument]],
119
+ ]:
120
+ """Return (outgoing, incoming) adjacency lists for all arguments."""
121
+ outgoing: dict[Argument, list[Argument]] = {arg: [] for arg in self.arguments}
122
+ incoming: dict[Argument, list[Argument]] = {arg: [] for arg in self.arguments}
123
+ for attacker, target in self.attacks:
124
+ if attacker in outgoing and target in incoming:
125
+ outgoing[attacker].append(target)
126
+ incoming[target].append(attacker)
127
+ return outgoing, incoming
128
+
129
+ def is_chain(self) -> bool:
130
+ """Return True if the graph is a strictly linear chain.
131
+
132
+ A chain is a single directed path a_n → a_{n-1} → … → a_1 where
133
+ every argument attacks exactly the preceding one and nothing else.
134
+
135
+ Requirements
136
+ ------------
137
+ - Every argument has at most one attacker (in-degree ≤ 1).
138
+ - Every argument attacks at most one other (out-degree ≤ 1).
139
+ - Exactly one argument is unattacked (the chain head).
140
+ - Exactly one argument attacks nothing (the chain tail).
141
+ - Total attacks == n − 1 (single connected path, no isolated sub-chains).
142
+ """
143
+ n = len(self.arguments)
144
+ if n == 0:
145
+ return True
146
+ if n == 1:
147
+ return len(self.attacks) == 0
148
+
149
+ outgoing, incoming = self._build_degree_maps()
150
+
151
+ if any(len(targets) > 1 for targets in outgoing.values()):
152
+ return False
153
+ if any(len(attackers) > 1 for attackers in incoming.values()):
154
+ return False
155
+
156
+ sources = [arg for arg in self.arguments if not incoming[arg]]
157
+ sinks = [arg for arg in self.arguments if not outgoing[arg]]
158
+ if len(sources) != 1 or len(sinks) != 1:
159
+ return False
160
+
161
+ # n − 1 edges guarantees a single connected path (no isolated sub-chains).
162
+ return len(self.attacks) == n - 1
163
+
164
+ def is_star_chain(self) -> bool:
165
+ """Return True if the graph is a star-chain.
166
+
167
+ A star-chain has one central argument that is attacked by multiple
168
+ independent chains and attacks nothing itself.
169
+
170
+ Example::
171
+
172
+ attack(b, a), attack(c, b) # chain 1: c → b → a
173
+ attack(d, a) # chain 2: d → a
174
+ attack(e, a), attack(f, e) # chain 3: f → e → a
175
+
176
+ Requirements
177
+ ------------
178
+ - Exactly one center: out-degree = 0, in-degree ≥ 2.
179
+ - Every non-center argument: out-degree = 1, in-degree ≤ 1.
180
+ - Every non-center argument can reach the center without cycling
181
+ (detects disconnected sub-cycles in the non-center subgraph).
182
+ """
183
+ n = len(self.arguments)
184
+ if n < 3:
185
+ return False
186
+
187
+ outgoing, incoming = self._build_degree_maps()
188
+
189
+ centers = [arg for arg in self.arguments if not outgoing[arg]]
190
+ if len(centers) != 1:
191
+ return False
192
+
193
+ center = centers[0]
194
+ if len(incoming[center]) < 2:
195
+ return False
196
+
197
+ non_center = [arg for arg in self.arguments if arg is not center]
198
+
199
+ if any(len(outgoing[arg]) != 1 for arg in non_center):
200
+ return False
201
+ if any(len(incoming[arg]) > 1 for arg in non_center):
202
+ return False
203
+
204
+ # Follow each non-center path; it must reach the center within n steps.
205
+ # Exceeding n steps implies a cycle that never terminates at the center.
206
+ for start in non_center:
207
+ current = start
208
+ for _ in range(n):
209
+ targets = outgoing[current]
210
+ if not targets:
211
+ break
212
+ current = targets[0]
213
+ if current is center:
214
+ break
215
+ else:
216
+ return False # cycle detected — center never reached
217
+
218
+ return True
219
+
220
+ def _status_of(self, argument: Argument) -> str:
221
+ if argument.value is True:
222
+ return "accepted"
223
+ if argument.value is False:
224
+ return "rejected"
225
+ return "undecided"
226
+
227
+ def _node_label(
228
+ self,
229
+ argument: Argument,
230
+ *,
231
+ include_description: bool,
232
+ include_status: bool,
233
+ status_labels: dict[str, str],
234
+ ) -> str:
235
+ parts = [f"{argument.ID}"]
236
+ if include_description:
237
+ parts.append(argument.description)
238
+ if include_status:
239
+ parts.append(status_labels[self._status_of(argument)])
240
+ return " | ".join(parts)
241
+
242
+ def print_graph(
243
+ self,
244
+ style: str = "ascii",
245
+ include_description: bool = True,
246
+ include_status: bool = True,
247
+ status_labels: dict[str, str] | None = None,
248
+ sort_key: Callable[[Argument], object] | None = None,
249
+ only_ids: Iterable[int] | None = None,
250
+ return_string: bool = False,
251
+ ) -> str | None:
252
+ """Render the graph as text.
253
+
254
+ Supported styles:
255
+ - ascii
256
+ - adjacency
257
+ - mermaid
258
+ """
259
+ if status_labels is None:
260
+ status_labels = {
261
+ "accepted": "ACCEPTED",
262
+ "rejected": "REJECTED",
263
+ "undecided": "UNDECIDED",
264
+ }
265
+
266
+ allowed_styles = {"ascii", "adjacency", "mermaid"}
267
+ if style not in allowed_styles:
268
+ raise ValueError(f"style must be one of {sorted(allowed_styles)}")
269
+
270
+ selected = list(self.arguments)
271
+ if only_ids is not None:
272
+ id_filter = set(only_ids)
273
+ selected = [arg for arg in selected if arg.ID in id_filter]
274
+
275
+ if sort_key is None:
276
+ selected.sort(key=lambda arg: arg.ID)
277
+ else:
278
+ selected.sort(key=sort_key)
279
+
280
+ selected_set = set(selected)
281
+ selected_attacks = [
282
+ (attacker, target)
283
+ for attacker, target in self.attacks
284
+ if attacker in selected_set and target in selected_set
285
+ ]
286
+
287
+ if style == "adjacency":
288
+ outgoing = {arg: [] for arg in selected}
289
+ for attacker, target in selected_attacks:
290
+ outgoing[attacker].append(target)
291
+
292
+ lines: list[str] = []
293
+ for arg in selected:
294
+ node_text = self._node_label(
295
+ arg,
296
+ include_description=include_description,
297
+ include_status=include_status,
298
+ status_labels=status_labels,
299
+ )
300
+ lines.append(node_text)
301
+
302
+ targets = outgoing[arg]
303
+ if targets:
304
+ target_ids = ", ".join(str(t.ID) for t in sorted(targets, key=lambda x: x.ID))
305
+ lines.append(f" attacks -> {target_ids}")
306
+ else:
307
+ lines.append(" attacks -> -")
308
+
309
+ rendered = "\n".join(lines)
310
+
311
+ elif style == "mermaid":
312
+ lines = ["graph TD"]
313
+ for arg in selected:
314
+ label = self._node_label(
315
+ arg,
316
+ include_description=include_description,
317
+ include_status=include_status,
318
+ status_labels=status_labels,
319
+ )
320
+ safe_label = label.replace('"', "'")
321
+ lines.append(f" A{arg.ID}[\"{safe_label}\"]")
322
+
323
+ for attacker, target in selected_attacks:
324
+ lines.append(f" A{attacker.ID} --> A{target.ID}")
325
+
326
+ rendered = "\n".join(lines)
327
+
328
+ else:
329
+ lines = ["Arguments:"]
330
+ for arg in selected:
331
+ lines.append(
332
+ f" [{arg.ID}] " + self._node_label(
333
+ arg,
334
+ include_description=include_description,
335
+ include_status=include_status,
336
+ status_labels=status_labels,
337
+ )
338
+ )
339
+
340
+ lines.append("Attacks:")
341
+ if selected_attacks:
342
+ for attacker, target in sorted(selected_attacks, key=lambda pair: (pair[0].ID, pair[1].ID)):
343
+ lines.append(f" [{attacker.ID}] -> [{target.ID}]")
344
+ else:
345
+ lines.append(" (none)")
346
+
347
+ rendered = "\n".join(lines)
348
+
349
+ if return_string:
350
+ return rendered
351
+
352
+ print(rendered)
353
+ return None
354
+
355
+
356
+ # Backward-compatible alias
357
+ argumentGraph = ArgumentGraph
358
+