llm-feedback-control 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,106 @@
1
+ """llm-feedback-control — get reliable, checkable structured output from a small
2
+ local language model by wrapping it in ordinary deterministic code.
3
+
4
+ WHAT IT DOES, concretely
5
+ ------------------------
6
+ You hand it a process described in plain English::
7
+
8
+ "A claim enters Intake. From Intake it goes to Triage. Triage goes to
9
+ FastTrack or to Investigation. ..."
10
+
11
+ and it:
12
+ 1. turns that into a state machine (states + transitions);
13
+ 2. computes *provable* facts about it — which steps are dead ends, whether
14
+ there are loops, which steps can't be reached;
15
+ 3. writes a report in which every statement is backed by one of those
16
+ checked facts (so it can't quietly make things up);
17
+ 4. knows its own limits: if the text isn't actually a finite step-by-step
18
+ process (e.g. "prices drift up as confidence grows"), it REFUSES instead
19
+ of inventing a fake state machine; and if the model's first pass missed
20
+ part of the process, it loops to fill the gaps — or refuses if it can't.
21
+
22
+ WHY "FEEDBACK CONTROL" (the analogy, explained)
23
+ -----------------------------------------------
24
+ The design is borrowed from electronics. A raw LLM is like a very high-gain
25
+ amplifier: hugely powerful, but left to run "open-loop" it overshoots — fluent,
26
+ yet it drifts and hallucinates. Engineers tame such an amplifier by adding a
27
+ *feedback loop*: feed the output back, compare it to a stable reference, and
28
+ trade some raw power for precision and stability. This library is that feedback
29
+ loop for an LLM. The "reference" is plain, deterministic code — graph checks
30
+ and schema rules the model's output is measured against.
31
+
32
+ Two kinds of feedback, in plain terms:
33
+
34
+ - NEGATIVE feedback = the stabilising checks (``run_audit``):
35
+ * decide first whether the text is even the kind of thing we can analyse
36
+ exactly, and refuse the fuzzy ones;
37
+ * force the model's answer into a strict shape (with a no-model fallback);
38
+ * compute the provable graph facts;
39
+ * say "I can't do this exactly" rather than guess.
40
+ - POSITIVE feedback = the gap-filling loop (``extract_iterative``):
41
+ re-ask the model about anything the text mentions that's missing from
42
+ its answer, repeating until nothing is missing (a "fixed point") — and
43
+ refuse if it never settles.
44
+
45
+ Zero third-party runtime dependencies. The deterministic core runs with no model
46
+ at all; an LLM is a pure upgrade and is fully injectable (pass ``generate=``).
47
+
48
+ Quickstart (works with no model)::
49
+
50
+ from llm_feedback_control import run_audit
51
+ r = run_audit("A claim enters Intake. From Intake it goes to Triage. "
52
+ "Triage goes to FastTrack or to Investigation.")
53
+ print(r["result"]); print(r["report_facts"])
54
+ """
55
+ from .llm import gen, gen_ceiling, info, doctor, BackendError
56
+ from .auditor import (
57
+ run_audit,
58
+ regime_gate,
59
+ gate_heuristic,
60
+ extract_workflow,
61
+ exact_analysis,
62
+ graph_facts,
63
+ transfer_operator,
64
+ fp_orbit,
65
+ grounded_report,
66
+ valid,
67
+ fallback_extract,
68
+ norm,
69
+ )
70
+ from .feedback import (
71
+ extract_iterative,
72
+ consistency_gaps,
73
+ candidate_states,
74
+ candidate_trans,
75
+ )
76
+
77
+ __version__ = "0.1.0"
78
+
79
+ __all__ = [
80
+ # headline
81
+ "run_audit",
82
+ "extract_iterative",
83
+ # negative-feedback pipeline parts
84
+ "regime_gate",
85
+ "gate_heuristic",
86
+ "extract_workflow",
87
+ "exact_analysis",
88
+ "graph_facts",
89
+ "transfer_operator",
90
+ "fp_orbit",
91
+ "grounded_report",
92
+ "valid",
93
+ "fallback_extract",
94
+ "norm",
95
+ # positive-feedback parts
96
+ "consistency_gaps",
97
+ "candidate_states",
98
+ "candidate_trans",
99
+ # client
100
+ "gen",
101
+ "gen_ceiling",
102
+ "info",
103
+ "doctor",
104
+ "BackendError",
105
+ "__version__",
106
+ ]
@@ -0,0 +1,98 @@
1
+ """Command-line entry point: python -m llm_feedback_control / lfc
2
+
3
+ lfc "A claim enters Intake. From Intake it goes to Triage." # audit text
4
+ lfc --check # backend doctor
5
+ lfc --demo # M1/M2/M3 demos
6
+ lfc # quick sample run
7
+
8
+ The audit runs with no model at all (deterministic regex extraction + exact
9
+ graph analysis); if an LLM backend is reachable it is used automatically and
10
+ the extraction quality goes up. Run ``lfc --check`` to see what's available.
11
+ """
12
+ import argparse
13
+ import json
14
+ import sys
15
+
16
+ from . import __version__, run_audit, doctor, info
17
+
18
+ try:
19
+ sys.stdout.reconfigure(encoding="utf-8")
20
+ except Exception:
21
+ pass
22
+
23
+ SAMPLE = ("A claim enters Intake. From Intake it goes to Triage. Triage goes to "
24
+ "FastTrack or to Investigation. FastTrack goes to Payout. Investigation "
25
+ "goes to Payout or to Denied. Payout goes to Closed. Denied goes to Closed.")
26
+
27
+
28
+ def _print_audit(r):
29
+ print("gate :", r["gate"]["verdict"], "|", r["gate"]["reason"])
30
+ if "extraction" in r:
31
+ ex = r["extraction"]
32
+ print(f"extracted : via={ex['via']} states={ex['states']}")
33
+ print(f" transitions={ex['transitions']}")
34
+ if "report_facts" in r:
35
+ print("facts :")
36
+ for line in r["report_facts"].rstrip().splitlines():
37
+ print(" " + line)
38
+ if r.get("report_english"):
39
+ print("grounded :", r["report_english"][:400])
40
+ print("result :", r["result"])
41
+
42
+
43
+ def _print_doctor():
44
+ d = doctor()
45
+ print("llm-feedback-control doctor")
46
+ print(" config :", info())
47
+ if d["ollama_reachable"]:
48
+ print(f" ollama : reachable at {d['ollama_host']}")
49
+ print(f" models : {', '.join(d['models_available']) or '(none pulled)'}")
50
+ if d["small_model_present"]:
51
+ print(f" -> small model '{d['small_model']}' is present. Full LLM path enabled.")
52
+ else:
53
+ print(f" -> small model '{d['small_model']}' NOT pulled. Run: "
54
+ f"ollama pull {d['small_model']}")
55
+ else:
56
+ print(f" ollama : NOT reachable at {d['ollama_host']}")
57
+ print(" -> The deterministic pipeline still works (regex extraction + exact")
58
+ print(" graph analysis). For the full LLM path, install Ollama")
59
+ print(" (https://ollama.com) and `ollama pull {}`,".format(d["small_model"]))
60
+ print(" or set CEILING_BACKEND=openai with OPENAI_API_KEY.")
61
+ if d["ceiling_backend"] == "openai":
62
+ print(" openai :", "OPENAI_API_KEY set" if d["openai_key_set"] else "OPENAI_API_KEY MISSING")
63
+
64
+
65
+ def main(argv=None):
66
+ ap = argparse.ArgumentParser(
67
+ prog="lfc",
68
+ description="LLM feedback control: audit a process description into an "
69
+ "exact, grounded, refusable report.")
70
+ ap.add_argument("text", nargs="?", help="process description to audit "
71
+ "(omit to run a built-in sample)")
72
+ ap.add_argument("--check", action="store_true", help="probe the LLM backend and exit")
73
+ ap.add_argument("--demo", action="store_true", help="run the M1/M2/M3 demos and exit")
74
+ ap.add_argument("--json", action="store_true", help="print the raw audit dict as JSON")
75
+ ap.add_argument("--version", action="version", version=f"llm-feedback-control {__version__}")
76
+ args = ap.parse_args(argv)
77
+
78
+ if args.check:
79
+ _print_doctor()
80
+ return 0
81
+ if args.demo:
82
+ from .auditor import main as demo_main
83
+ demo_main()
84
+ return 0
85
+
86
+ text = args.text or SAMPLE
87
+ if not args.text:
88
+ print(f"(no text given — auditing a built-in sample; pass your own as an argument)\n")
89
+ r = run_audit(text)
90
+ if args.json:
91
+ print(json.dumps(r, indent=2))
92
+ else:
93
+ _print_audit(r)
94
+ return 0
95
+
96
+
97
+ if __name__ == "__main__":
98
+ raise SystemExit(main())
@@ -0,0 +1,337 @@
1
+ """The LLM-feedback-control pipeline, end-to-end and self-contained.
2
+
3
+ A small LLM is wrapped in a deterministic feedback network so the system knows
4
+ what it can compute exactly, does so, and refuses the rest:
5
+
6
+ English text
7
+ -> extract finite transition system (LLM + schema + deterministic fallback)
8
+ -> regime gate (hybrid: heuristic + LLM tie-break)
9
+ -> exact analysis (standard graph facts + an optional
10
+ finite-field spectral fingerprint)
11
+ -> readout contract + injectivity (refuse non-injective lifts)
12
+ -> grounded report (every claim backed by a trace fact)
13
+
14
+ This is the NEGATIVE-feedback half (gate / ground / refuse). The bounded
15
+ POSITIVE-feedback loop (iterate-to-fixed-point re-extraction) lives in
16
+ feedback.py.
17
+
18
+ Every entry point that can use an LLM takes an injectable ``generate`` callable
19
+ (``f(prompt, fmt=None) -> str``); it defaults to the local-Ollama client in
20
+ ``llm.py``. If no model is reachable, the pipeline degrades to the deterministic
21
+ path automatically — so ``run_audit`` returns a real result with no model at all.
22
+
23
+ Demos (run ``python -m llm_feedback_control.auditor``):
24
+ M1 process auditor on a real workflow (exact trace + grounded report)
25
+ M2 gate refusal on belief/continuous input ("model-only, refused")
26
+ M3 non-injective readout refusal (the no-hallucinated-synthesis guard)
27
+ plus a HARDENING test of the gate on deliberately ambiguous / mixed inputs.
28
+ """
29
+ import sys, json, re
30
+
31
+ from .llm import gen
32
+
33
+ try:
34
+ sys.stdout.reconfigure(encoding="utf-8")
35
+ except Exception:
36
+ pass
37
+
38
+
39
+ def norm(s):
40
+ return re.sub(r"[^a-z0-9]", "", str(s).lower())
41
+
42
+
43
+ # === exact engine: F_p fp_orbit + graph facts (self-contained) ============
44
+ PRIMES = (2, 3, 5, 7)
45
+
46
+
47
+ def fp_orbit(M, x0, p, max_steps=20000):
48
+ n = len(x0); x = [v % p for v in x0]; seen = {}; orbit = []
49
+ for t in range(max_steps):
50
+ k = tuple(x)
51
+ if k in seen:
52
+ s = seen[k]
53
+ return s, t - s, orbit[s:] # transient, period, cycle vectors
54
+ seen[k] = t; orbit.append(x)
55
+ x = [sum(M[i][j] * x[j] for j in range(n)) % p for i in range(n)]
56
+ return None, None, []
57
+
58
+
59
+ def transfer_operator(states, trans, p):
60
+ idx = {s: i for i, s in enumerate(states)}; n = len(states)
61
+ M = [[0] * n for _ in range(n)]
62
+ for a, b in trans:
63
+ if a in idx and b in idx:
64
+ M[idx[b]][idx[a]] = (M[idx[b]][idx[a]] + 1) % p # flow a -> b
65
+ return M, idx
66
+
67
+
68
+ def graph_facts(states, trans):
69
+ out = {s: [b for a, b in trans if a == s] for s in states}
70
+ terminals = sorted(s for s in states if not out.get(s))
71
+ start = states[0] if states else None
72
+ seen = set()
73
+ if start is not None:
74
+ stack = [start]; seen = {start}
75
+ while stack:
76
+ u = stack.pop()
77
+ for v in out.get(u, []):
78
+ if v not in seen:
79
+ seen.add(v); stack.append(v)
80
+ unreachable = sorted(s for s in states if s not in seen)
81
+ # cycle detection (DFS)
82
+ WHITE, GREY, BLACK = 0, 1, 2
83
+ color = {s: WHITE for s in states}; has_cycle = [False]
84
+ def dfs(u):
85
+ color[u] = GREY
86
+ for v in out.get(u, []):
87
+ if color.get(v) == GREY:
88
+ has_cycle[0] = True
89
+ elif color.get(v) == WHITE:
90
+ dfs(v)
91
+ color[u] = BLACK
92
+ for s in states:
93
+ if color[s] == WHITE:
94
+ dfs(s)
95
+ return dict(terminal_states=terminals, unreachable_states=unreachable,
96
+ has_cycle=has_cycle[0])
97
+
98
+
99
+ def exact_analysis(states, trans):
100
+ """Per-prime exact trace + bad-prime + readout injectivity (the M3 guard)."""
101
+ if not states:
102
+ return {"primes": [], "facts": graph_facts(states, trans)}
103
+ x0 = [1] + [0] * (len(states) - 1) # launch at the start state
104
+ per_prime = []
105
+ for p in PRIMES:
106
+ M, _ = transfer_operator(states, trans, p)
107
+ transient, period, cycle = fp_orbit(M, x0, p)
108
+ mode = cycle[0] if cycle else [0] * len(states)
109
+ bad = all(v == 0 for v in mode)
110
+ # readout = sum of mode; injective iff distinct cycle vectors -> distinct readouts
111
+ readouts = [sum(v) % p for v in cycle] if cycle else []
112
+ distinct_vecs = len({tuple(v) for v in cycle})
113
+ injective = len(set(readouts)) == distinct_vecs if cycle else True
114
+ per_prime.append(dict(prime=p, transient=transient, period=period,
115
+ mode=mode, bad_prime=bad, readout_injective=injective))
116
+ return {"primes": per_prime, "facts": graph_facts(states, trans)}
117
+
118
+
119
+ # === regime gate (hybrid: heuristic + LLM tie-break) ======================
120
+ CONT_BELIEF = ["continuous", "continuously", "drift", "rises", "grows", "increase",
121
+ "rate", "percent", "gradually", "slowly", "temperature", "price",
122
+ "demand", "confidence", "trust", "trusts", "trustworthy", "feels",
123
+ "happier", "sentiment", "usually", "accumulat", "improves", "volume"]
124
+ FINITE_CUES = ["goes to", "moves from", "enters", "starts in", "opens in", "proceed to",
125
+ "escalates", "commits", "rolls back", "either", " or ", "then",
126
+ "if approved", "if rejected", "if unresolved", "retry", "fails"]
127
+
128
+
129
+ def gate_heuristic(text):
130
+ t = text.lower()
131
+ cont = sum(t.count(c) for c in CONT_BELIEF)
132
+ fin = sum(t.count(c) for c in FINITE_CUES)
133
+ return fin, cont
134
+
135
+
136
+ def regime_gate(text, use_llm=True, generate=None):
137
+ """Route text into "finite_structural", "model_only", or "mixed".
138
+
139
+ Clear cases are decided by a cheap heuristic; only genuinely ambiguous cases
140
+ consult the LLM (``generate`` or the default Ollama client). With no model
141
+ reachable the LLM tie-break is skipped and the heuristic decides."""
142
+ g = generate or gen
143
+ fin, cont = gate_heuristic(text)
144
+ margin = abs(fin - cont)
145
+ # clear cases: decide by heuristic
146
+ if margin >= 2 and not (fin > 0 and cont > 0 and min(fin, cont) >= 2):
147
+ verdict = "finite_structural" if fin > cont else "model_only"
148
+ return dict(verdict=verdict, reason=f"heuristic (fin={fin},cont={cont})", source="heuristic")
149
+ # ambiguous / mixed: ask the LLM to adjudicate
150
+ if use_llm:
151
+ try:
152
+ raw = g('Classify the description into exactly one label: '
153
+ '"finite_structural" (a finite set of states and transitions), '
154
+ '"model_only" (continuous/probabilistic/belief-driven, no finite state machine), '
155
+ 'or "mixed" (both). Return JSON {"label": "..."}. '
156
+ f'Description: "{text}"', fmt="json")
157
+ label = json.loads(raw).get("label", "").strip()
158
+ if label in ("finite_structural", "model_only", "mixed"):
159
+ return dict(verdict=label, reason=f"LLM tie-break (fin={fin},cont={cont})", source="llm")
160
+ except Exception:
161
+ pass
162
+ # fallback: both present -> mixed; else heuristic
163
+ if fin > 0 and cont > 0:
164
+ return dict(verdict="mixed", reason=f"both cues present (fin={fin},cont={cont})", source="heuristic")
165
+ return dict(verdict="finite_structural" if fin >= cont else "model_only",
166
+ reason=f"heuristic-fallback (fin={fin},cont={cont})", source="heuristic")
167
+
168
+
169
+ # === extraction (LLM + schema + deterministic fallback) ===================
170
+ def valid(o):
171
+ return (isinstance(o, dict) and isinstance(o.get("states"), list)
172
+ and isinstance(o.get("transitions"), list)
173
+ and all(isinstance(t, list) and len(t) == 2 for t in o["transitions"]))
174
+
175
+
176
+ def fallback_extract(text):
177
+ st, tr = set(), set()
178
+ for m in re.finditer(r"([A-Z][A-Za-z0-9]+)\s+(?:goes to|moves to|to)\s+([A-Z][A-Za-z0-9]+)"
179
+ r"(?:\s+or(?: to)?\s+([A-Z][A-Za-z0-9]+))?", text):
180
+ a, b, c = m.group(1), m.group(2), m.group(3)
181
+ st |= {a, b}; tr.add((a, b))
182
+ if c: st.add(c); tr.add((a, c))
183
+ for m in re.finditer(r"(?:enters|starts in|opens in)\s+([A-Z][A-Za-z0-9]+)", text):
184
+ st.add(m.group(1))
185
+ # Order states by FIRST APPEARANCE in the text, not alphabetically: the graph
186
+ # analysis treats states[0] as the start, so "start = first state mentioned"
187
+ # must hold (this matches how an LLM lists them in narrative order).
188
+ first = {}
189
+ for m in re.finditer(r"[A-Z][A-Za-z0-9]+", text):
190
+ first.setdefault(m.group(0), len(first))
191
+ states = sorted(st, key=lambda s: first.get(s, len(first)))
192
+ return {"states": states, "transitions": [list(t) for t in sorted(tr)]}
193
+
194
+
195
+ def extract_workflow(text, generate=None):
196
+ """Extract a finite state machine: LLM (schema-validated) with a
197
+ deterministic regex fallback. Returns ``(graph, how)`` where how is
198
+ "llm" or "fallback"."""
199
+ g = generate or gen
200
+ try:
201
+ raw = g('Extract the finite state machine. Return ONLY JSON '
202
+ '{"states":[...],"transitions":[["FROM","TO"],...]} using exact state '
203
+ f'names from the text. Text: "{text}"', fmt="json")
204
+ o = json.loads(raw)
205
+ if valid(o) and o["states"]:
206
+ return o, "llm"
207
+ except Exception:
208
+ pass
209
+ return fallback_extract(text), "fallback"
210
+
211
+
212
+ # === grounded report ======================================================
213
+ def grounded_report(states, trace, llm=True, generate=None):
214
+ g = generate or gen
215
+ facts = trace["facts"]
216
+ bad = [pp["prime"] for pp in trace["primes"] if pp["bad_prime"]]
217
+ noninj = [pp["prime"] for pp in trace["primes"] if not pp["readout_injective"]]
218
+ deterministic = (
219
+ f"- States ({len(states)}): {', '.join(states)}\n"
220
+ f"- Terminal states: {', '.join(facts['terminal_states']) or 'none'}\n"
221
+ f"- Unreachable from start: {', '.join(facts['unreachable_states']) or 'none'}\n"
222
+ f"- Contains a cycle (loop): {facts['has_cycle']}\n"
223
+ f"- Bad primes (mode annihilates): {bad or 'none'}\n"
224
+ f"- Non-injective readout at primes (lift REFUSED): {noninj or 'none'}\n"
225
+ )
226
+ english = ""
227
+ if llm:
228
+ try:
229
+ english = g("Write two plain sentences describing this process using ONLY "
230
+ "these verified facts. Name only the listed states; invent nothing.\n"
231
+ + deterministic).strip()
232
+ except Exception:
233
+ english = "(LLM rewrite unavailable; deterministic facts above are authoritative.)"
234
+ return deterministic, english
235
+
236
+
237
+ # === end-to-end audit =====================================================
238
+ def run_audit(text, verbose=True, generate=None):
239
+ """Full pipeline: gate -> extract -> exact analysis -> grounded report,
240
+ with explicit refusals. Works with no model (deterministic fallback);
241
+ pass ``generate`` to use a specific LLM backend."""
242
+ gate = regime_gate(text, generate=generate)
243
+ out = {"text": text, "gate": gate}
244
+ if gate["verdict"] == "model_only":
245
+ out["result"] = "REFUSED: model-only regime; no exact finite-structural analysis."
246
+ return out
247
+ graph, how = extract_workflow(text, generate=generate)
248
+ out["extraction"] = {"via": how, "states": graph["states"], "transitions": graph["transitions"]}
249
+ if not graph["states"]:
250
+ out["result"] = "REFUSED: no finite structure could be extracted."
251
+ return out
252
+ trace = exact_analysis(graph["states"], [tuple(t) for t in graph["transitions"]])
253
+ out["trace"] = trace
254
+ det, eng = grounded_report(graph["states"], trace, llm=True, generate=generate)
255
+ out["report_facts"] = det
256
+ out["report_english"] = eng
257
+ out["result"] = "OK" + (" (mixed: finite part analysed, continuous part deferred)"
258
+ if gate["verdict"] == "mixed" else "")
259
+ return out
260
+
261
+
262
+ # === demos ================================================================
263
+ def banner(t):
264
+ print("\n" + "=" * 74 + f"\n{t}\n" + "=" * 74)
265
+
266
+
267
+ def demo_M1():
268
+ banner("M1 — process auditor (full pipeline, exact trace + grounded report)")
269
+ text = ("A customer order enters Review. If approved it goes to Packing. If "
270
+ "rejected it goes to Refund. Packing goes to Shipped. Shipped goes to "
271
+ "Closed. Refund goes to Closed.")
272
+ r = run_audit(text)
273
+ print("gate :", r["gate"]["verdict"], "|", r["gate"]["reason"])
274
+ print("extracted:", r["extraction"]["via"], r["extraction"]["states"])
275
+ print("report facts:\n" + r["report_facts"])
276
+ print("grounded english:", r.get("report_english", "")[:300])
277
+ print("result :", r["result"])
278
+
279
+
280
+ def demo_M2():
281
+ banner("M2 — gate refusal on belief/continuous input")
282
+ text = "The market price drifts until confidence improves, then buyers slowly return."
283
+ r = run_audit(text)
284
+ print("input :", text)
285
+ print("gate :", r["gate"]["verdict"], "|", r["gate"]["reason"])
286
+ print("result :", r["result"])
287
+
288
+
289
+ def demo_M3():
290
+ banner("M3 — non-injective readout refusal (no hallucinated synthesis)")
291
+ # a symmetric 4-cycle: its F_p standing mode has a multi-vector cycle whose
292
+ # sum-readout collapses distinct vectors -> the lift must be refused.
293
+ states = ["S0", "S1", "S2", "S3"]
294
+ trans = [("S0", "S1"), ("S1", "S2"), ("S2", "S3"), ("S3", "S0"),
295
+ ("S1", "S0"), ("S2", "S1"), ("S3", "S2"), ("S0", "S3")] # undirected 4-cycle
296
+ trace = exact_analysis(states, trans)
297
+ for pp in trace["primes"]:
298
+ tag = "BAD-PRIME" if pp["bad_prime"] else ("READOUT NON-INJECTIVE -> LIFT REFUSED"
299
+ if not pp["readout_injective"] else "ok")
300
+ print(f" prime {pp['prime']}: period={pp['period']} mode={pp['mode']} -> {tag}")
301
+ refused = [pp["prime"] for pp in trace["primes"] if not pp["readout_injective"] or pp["bad_prime"]]
302
+ print(f" => CRT synthesis runs ONLY over primes with injective readouts; "
303
+ f"refused/degenerate primes excluded: {refused}")
304
+
305
+
306
+ def test_gate_hard():
307
+ banner("Q2 HARDENING — gate on ambiguous / mixed inputs (hybrid vs heuristic-only)")
308
+ corpus = [
309
+ ("After validation the system either commits or rolls back.", "finite_structural"),
310
+ ("The retry counter increments each cycle until it reaches the limit, then the job fails.", "finite_structural"),
311
+ ("Orders move from Review to Packing, and packing time grows as volume increases.", "mixed"),
312
+ ("If the customer trusts the brand, they proceed to Checkout; otherwise they leave.", "mixed"),
313
+ ("The model's confidence rises with each correct prediction.", "model_only"),
314
+ ("A request goes to Pending, then to Approved or Denied.", "finite_structural"),
315
+ ("Sentiment improves gradually as more reviews accumulate.", "model_only"),
316
+ ("A ticket escalates from Tier1 to Tier2 to Tier3 if unresolved.", "finite_structural"),
317
+ ]
318
+ h_ok = hyb_ok = 0
319
+ for text, truth in corpus:
320
+ fin, cont = gate_heuristic(text)
321
+ h_pred = "finite_structural" if fin > cont else ("model_only" if cont > fin else "mixed")
322
+ hyb = regime_gate(text, use_llm=True)["verdict"]
323
+ h_ok += (h_pred == truth); hyb_ok += (hyb == truth)
324
+ print(f" truth={truth:<17} heuristic={h_pred:<17} hybrid={hyb:<17} | {text[:42]}")
325
+ print(f"\n heuristic-only accuracy: {h_ok}/{len(corpus)} hybrid(LLM) accuracy: {hyb_ok}/{len(corpus)}")
326
+
327
+
328
+ def main():
329
+ demo_M1()
330
+ demo_M2()
331
+ demo_M3()
332
+ test_gate_hard()
333
+ print("\n(all stages self-contained; nothing imported from any external solver)")
334
+
335
+
336
+ if __name__ == "__main__":
337
+ main()
@@ -0,0 +1,151 @@
1
+ """The bounded POSITIVE-feedback loop (the op-amp "close the loop").
2
+
3
+ Negative feedback (gate / ground / refuse) is validated in auditor.py: it
4
+ stabilises, but it checks FORM, not COMPLETENESS — a one-shot extraction can
5
+ silently drop a branch and the system still says "OK".
6
+
7
+ This adds the regenerative loop that fixes that, using a reference that needs
8
+ NONE of the special mathematics — just deterministic text<->graph consistency:
9
+
10
+ extract (LLM) -> consistency_gaps(text, graph) [deterministic reference]
11
+ ^ |
12
+ |____ re-prompt with the gaps <----' (positive feedback: amplify coverage)
13
+
14
+ Bounded by: a FIXED-POINT test (stop when the graph stops changing / no gaps)
15
+ and an iteration cap with a REFUSAL clamp (if it can't converge, say so — do
16
+ NOT report a confident-but-incomplete result). That refusal clamp is the
17
+ stability bound that keeps the regenerative loop from running away.
18
+
19
+ The reference is plain regex graph consistency — so the "LLM feedback control /
20
+ refusal-as-stabilizer" discipline stands on its own, with no special math.
21
+ The LLM backend is injectable via ``generate`` (defaults to llm.gen).
22
+ """
23
+ import re, json
24
+ from .llm import gen
25
+ from .auditor import valid, fallback_extract, norm
26
+
27
+ STOP = {"If", "The", "A", "An", "After", "Once", "When", "Otherwise", "It", "Then"}
28
+
29
+
30
+ def candidate_states(text):
31
+ c = set()
32
+ for m in re.finditer(r"(?:goes to|moves to|move to|back to|to|enters|starts in|opens in|into)\s+([A-Z][A-Za-z0-9]+)", text):
33
+ c.add(m.group(1))
34
+ for m in re.finditer(r"\b([A-Z][A-Za-z0-9]+)\s+(?:goes|moves|closes|enters|ends)", text):
35
+ c.add(m.group(1))
36
+ return {s for s in c if s not in STOP}
37
+
38
+
39
+ def candidate_trans(text):
40
+ tr = set()
41
+ for m in re.finditer(r"([A-Z][A-Za-z0-9]+)\s+(?:goes to|moves to|move to)\s+([A-Z][A-Za-z0-9]+)"
42
+ r"(?:\s+or(?: to)?\s+([A-Z][A-Za-z0-9]+))?", text):
43
+ a, b, c = m.group(1), m.group(2), m.group(3)
44
+ if a not in STOP: tr.add((a, b))
45
+ if c: tr.add((a, c))
46
+ return tr
47
+
48
+
49
+ def consistency_gaps(text, graph):
50
+ """Deterministic reference: what does the TEXT mention that the GRAPH lacks?"""
51
+ gs = {norm(s) for s in graph.get("states", [])}
52
+ gt = {(norm(a), norm(b)) for a, b in graph.get("transitions", [])}
53
+ miss_s = sorted({s for s in candidate_states(text) if norm(s) not in gs})
54
+ miss_t = sorted({(a, b) for a, b in candidate_trans(text)
55
+ if (norm(a), norm(b)) not in gt})
56
+ return miss_s, miss_t
57
+
58
+
59
+ def extract_iterative(text, max_iters=4, verbose=True, generate=None):
60
+ """Positive-feedback extraction: re-prompt on deterministic gaps until a
61
+ fixed point, bounded by ``max_iters`` + a refusal clamp.
62
+
63
+ Returns ``(graph, initial, history, converged)``: the final graph, the
64
+ open-loop iter-0 snapshot, a per-iteration history, and whether it
65
+ converged to a clean fixed point (no residual gaps). Pass ``generate`` to
66
+ use a specific LLM backend; with no model it returns the deterministic
67
+ extraction unchanged (history length 1)."""
68
+ g = generate or gen
69
+ # iteration 0: plain extraction
70
+ try:
71
+ raw = g('Extract the finite state machine. Return ONLY JSON '
72
+ '{"states":[...],"transitions":[["FROM","TO"],...]} using exact state '
73
+ f'names from the text. Text: "{text}"', fmt="json")
74
+ graph = json.loads(raw)
75
+ if not (valid(graph) and graph.get("states")):
76
+ graph = fallback_extract(text)
77
+ except Exception:
78
+ graph = fallback_extract(text)
79
+
80
+ initial = json.loads(json.dumps(graph)) # iter-0 (open-loop) snapshot
81
+ history = []
82
+ for it in range(max_iters):
83
+ miss_s, miss_t = consistency_gaps(text, graph)
84
+ sig = (tuple(sorted(norm(s) for s in graph["states"])),
85
+ tuple(sorted((norm(a), norm(b)) for a, b in graph["transitions"])))
86
+ history.append((it, len(graph["states"]), len(graph["transitions"]), miss_s, miss_t))
87
+ if verbose:
88
+ print(f" iter {it}: states={graph['states']}")
89
+ print(f" gaps -> missing states {miss_s or '∅'}, missing transitions {miss_t or '∅'}")
90
+ if not miss_s and not miss_t:
91
+ return graph, initial, history, True # FIXED POINT (converged, no gaps)
92
+ # positive feedback: re-prompt with the deterministic gaps
93
+ gaps_txt = (f"missing states: {miss_s}; missing transitions: {miss_t}")
94
+ try:
95
+ raw = g('Here is a state machine you extracted, and a list of items the '
96
+ 'source text mentions that are MISSING from it. Return the COMPLETE '
97
+ 'corrected machine as JSON {"states":[...],"transitions":[["FROM","TO"],...]}, '
98
+ 'adding the missing items (and their transitions) using exact names. '
99
+ f'Current: {json.dumps({"states": graph["states"], "transitions": graph["transitions"]})}. '
100
+ f'Missing per the text: {gaps_txt}. Source text: "{text}"', fmt="json")
101
+ ng = json.loads(raw)
102
+ if valid(ng) and ng.get("states"):
103
+ # check it actually changed (avoid stalling)
104
+ nsig = (tuple(sorted(norm(s) for s in ng["states"])),
105
+ tuple(sorted((norm(a), norm(b)) for a, b in ng["transitions"])))
106
+ graph = ng
107
+ if nsig == sig:
108
+ break # no change -> not converging
109
+ except Exception:
110
+ break
111
+ # exhausted iterations with residual gaps -> REFUSAL CLAMP (stability bound)
112
+ miss_s, miss_t = consistency_gaps(text, graph)
113
+ converged = not miss_s and not miss_t
114
+ return graph, initial, history, converged
115
+
116
+
117
+ def main():
118
+ print("=" * 74)
119
+ print("POSITIVE-FEEDBACK EXTRACTION (op-amp 'close the loop'); reference = plain")
120
+ print("text<->graph consistency, NO special math involved")
121
+ print("=" * 74)
122
+ text = ("A customer order enters Review. If approved it goes to Packing. If "
123
+ "rejected it goes to Refund. Packing goes to Shipped. Shipped goes to "
124
+ "Closed. Refund goes to Closed.")
125
+ print(f"\nText: {text}\n")
126
+ graph, _initial, history, converged = extract_iterative(text)
127
+ print("\n" + "-" * 74)
128
+ print(f"iterations run: {len(history)}")
129
+ if converged:
130
+ print(f"CONVERGED to a fixed point with NO residual gaps. Final states: {graph['states']}")
131
+ print(" -> the dropped branch was recovered by the regenerative loop, then the")
132
+ print(" fixed-point test (negative-feedback reference) stopped it cleanly.")
133
+ else:
134
+ ms, mt = consistency_gaps(text, graph)
135
+ print(f"DID NOT CONVERGE within the cap. REFUSAL CLAMP fires:")
136
+ print(f" residual gaps: missing states {ms}, missing transitions {mt}")
137
+ print(" -> the system refuses to report a confident-but-incomplete result")
138
+ print(" (the stability bound that keeps positive feedback from faking 'OK').")
139
+ print("\n" + "=" * 74)
140
+ print("PRINCIPLE DEMONSTRATED")
141
+ print("=" * 74)
142
+ print("""\
143
+ - POSITIVE feedback (regenerative re-extraction) recovered coverage that the
144
+ one-shot negative-feedback pass could not — it amplified toward completeness.
145
+ - It was made SAFE by two negative-feedback bounds: a deterministic fixed-point
146
+ reference (text<->graph consistency) and a refusal clamp on non-convergence.
147
+ - The reference uses ZERO special mathematics — just regex graph consistency.""")
148
+
149
+
150
+ if __name__ == "__main__":
151
+ main()
@@ -0,0 +1,143 @@
1
+ """LLM client for llm-feedback-control.
2
+
3
+ Two backends behind a tiny interface:
4
+ * gen() — a local Ollama model (default phi3:mini, the "small model")
5
+ * gen_ceiling() — a stronger reference model: a larger Ollama model OR the
6
+ OpenAI API (used only as a quality CEILING in experiments)
7
+
8
+ Everything is configurable by environment variable so the same code runs
9
+ locally and on a remote box (e.g. EC2) without edits:
10
+
11
+ OLLAMA_HOST default http://localhost:11434
12
+ LFC_MODEL default phi3:mini (the small model under test)
13
+ LFC_CEILING default llama3.1:8b (a bigger local Ollama model)
14
+ CEILING_BACKEND "ollama" (default) or "openai"
15
+ OPENAI_API_KEY required iff CEILING_BACKEND=openai
16
+ OPENAI_MODEL default gpt-4o-mini
17
+
18
+ Only the standard library is used (urllib + json) — no SDK dependency, and the
19
+ whole package has **zero third-party runtime dependencies**.
20
+
21
+ You are never locked to Ollama: every high-level entry point in this package
22
+ (`run_audit`, `extract_iterative`, `regime_gate`, ...) accepts an injectable
23
+ ``generate`` callable, so you can plug in OpenAI, Anthropic, a local server, or
24
+ any function ``f(prompt, fmt=None) -> str``. The functions in this module are
25
+ just the convenient defaults.
26
+ """
27
+ import os
28
+ import json
29
+ import urllib.request
30
+ import urllib.error
31
+
32
+ OLLAMA_HOST = os.environ.get("OLLAMA_HOST", "http://localhost:11434").rstrip("/")
33
+ MODEL = os.environ.get("LFC_MODEL", "phi3:mini")
34
+ CEILING_MODEL = os.environ.get("LFC_CEILING", "llama3.1:8b")
35
+ CEILING_BACKEND = os.environ.get("CEILING_BACKEND", "ollama")
36
+ OPENAI_MODEL = os.environ.get("OPENAI_MODEL", "gpt-4o-mini")
37
+
38
+
39
+ class BackendError(RuntimeError):
40
+ """No LLM backend was reachable. The message explains exactly what to do.
41
+
42
+ Note: the high-level pipeline (`run_audit`, `extract_iterative`, ...) catches
43
+ this internally and falls back to the deterministic path, so a missing model
44
+ degrades gracefully rather than crashing. This is only raised to callers who
45
+ invoke `gen` / `gen_ceiling` directly.
46
+ """
47
+
48
+
49
+ def _ollama(prompt, fmt, model, timeout):
50
+ body = {"model": model, "prompt": prompt, "stream": False,
51
+ "options": {"temperature": 0.0}}
52
+ if fmt:
53
+ body["format"] = fmt
54
+ req = urllib.request.Request(f"{OLLAMA_HOST}/api/generate",
55
+ data=json.dumps(body).encode(),
56
+ headers={"Content-Type": "application/json"})
57
+ try:
58
+ with urllib.request.urlopen(req, timeout=timeout) as r:
59
+ return json.loads(r.read()).get("response", "")
60
+ except urllib.error.URLError as e:
61
+ raise BackendError(
62
+ f"Could not reach an Ollama server at {OLLAMA_HOST} ({e}).\n"
63
+ f" - Install Ollama: https://ollama.com\n"
64
+ f" - Start it and pull the small model: ollama pull {MODEL}\n"
65
+ f" - Or point OLLAMA_HOST at a running server.\n"
66
+ f" - Or use OpenAI: set CEILING_BACKEND=openai and OPENAI_API_KEY,\n"
67
+ f" or pass your own generate=... callable.\n"
68
+ f" (The deterministic pipeline still works with NO model at all — "
69
+ f"run_audit() falls back to a regex extractor + exact graph analysis.)"
70
+ ) from e
71
+
72
+
73
+ def gen(prompt, fmt=None, model=None, timeout=600):
74
+ """Generate from a local Ollama model. ``fmt="json"`` forces valid JSON.
75
+ Greedy decode (temperature 0) for reproducibility.
76
+
77
+ Raises :class:`BackendError` (with actionable guidance) if no server is
78
+ reachable."""
79
+ return _ollama(prompt, fmt, model or MODEL, timeout)
80
+
81
+
82
+ def _gen_openai(prompt, model=None, timeout=120):
83
+ try:
84
+ key = os.environ["OPENAI_API_KEY"]
85
+ except KeyError as e:
86
+ raise BackendError(
87
+ "CEILING_BACKEND=openai but OPENAI_API_KEY is not set in the "
88
+ "environment."
89
+ ) from e
90
+ body = {"model": model or OPENAI_MODEL,
91
+ "messages": [{"role": "user", "content": prompt}],
92
+ "temperature": 0, "response_format": {"type": "json_object"}}
93
+ req = urllib.request.Request("https://api.openai.com/v1/chat/completions",
94
+ data=json.dumps(body).encode(),
95
+ headers={"Content-Type": "application/json",
96
+ "Authorization": f"Bearer {key}"})
97
+ try:
98
+ with urllib.request.urlopen(req, timeout=timeout) as r:
99
+ return json.loads(r.read())["choices"][0]["message"]["content"]
100
+ except urllib.error.URLError as e:
101
+ raise BackendError(f"Could not reach the OpenAI API ({e}).") from e
102
+
103
+
104
+ def gen_ceiling(prompt, fmt="json", timeout=600):
105
+ """Generate from the CEILING model (a stronger reference). Backend chosen by
106
+ CEILING_BACKEND: a bigger local Ollama model, or the OpenAI API."""
107
+ if CEILING_BACKEND == "openai":
108
+ return _gen_openai(prompt, timeout=timeout)
109
+ return gen(prompt, fmt=fmt, model=CEILING_MODEL, timeout=timeout)
110
+
111
+
112
+ def info():
113
+ return (f"small={MODEL} @ {OLLAMA_HOST} | ceiling={CEILING_MODEL} "
114
+ f"(backend={CEILING_BACKEND})")
115
+
116
+
117
+ def doctor():
118
+ """Probe the configured backend and report what's available.
119
+
120
+ Returns a status dict. Never raises — safe to call before anything is set
121
+ up. Used by ``python -m llm_feedback_control --check``."""
122
+ status = {
123
+ "ollama_host": OLLAMA_HOST,
124
+ "small_model": MODEL,
125
+ "ceiling_backend": CEILING_BACKEND,
126
+ "ceiling_model": CEILING_MODEL,
127
+ "openai_key_set": bool(os.environ.get("OPENAI_API_KEY")),
128
+ }
129
+ try:
130
+ req = urllib.request.Request(f"{OLLAMA_HOST}/api/tags")
131
+ with urllib.request.urlopen(req, timeout=5) as r:
132
+ models = json.loads(r.read()).get("models", [])
133
+ names = [m.get("name") or m.get("model") for m in models]
134
+ status["ollama_reachable"] = True
135
+ status["models_available"] = names
136
+ status["small_model_present"] = any(
137
+ MODEL.split(":")[0] in (n or "") for n in names)
138
+ except Exception as e: # noqa: BLE001 — doctor must never raise
139
+ status["ollama_reachable"] = False
140
+ status["models_available"] = []
141
+ status["small_model_present"] = False
142
+ status["error"] = str(e)
143
+ return status
@@ -0,0 +1,262 @@
1
+ Metadata-Version: 2.4
2
+ Name: llm-feedback-control
3
+ Version: 0.1.0
4
+ Summary: Reliable, checkable structured output from a small local LLM, by wrapping it in a deterministic feedback loop: a regime gate + exact graph analysis + explicit refusal, plus a bounded re-extraction loop. Zero runtime dependencies; runs with no model at all.
5
+ Author-email: Edward Chalk <edward.chalk@sapientronic.ai>
6
+ License: llm-feedback-control
7
+
8
+ Copyright (c) 2026 Edward Chalk (sapientronic.ai)
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to use,
12
+ copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
13
+ the Software, and to permit persons to whom the Software is furnished to do
14
+ so, subject to the following conditions:
15
+
16
+ 1. The above copyright notice and this permission notice shall be included
17
+ in all copies or substantial portions of the Software.
18
+
19
+ 2. Attribution. Any publication, presentation, derivative work, or product
20
+ that uses or builds on this Software must include visible attribution to
21
+ Edward Chalk and sapientronic.ai. The phrase "Built with llm-feedback-control
22
+ by Edward Chalk (sapientronic.ai)" or equivalent is acceptable.
23
+
24
+ 3. The Software is provided "AS IS", without warranty of any kind, express
25
+ or implied, including but not limited to the warranties of merchantability,
26
+ fitness for a particular purpose, and noninfringement. In no event shall
27
+ the authors or copyright holders be liable for any claim, damages, or
28
+ other liability, whether in an action of contract, tort, or otherwise,
29
+ arising from, out of, or in connection with the Software or the use or
30
+ other dealings in the Software.
31
+
32
+ This license is modeled on the MIT License with an explicit attribution
33
+ clause (clause 2).
34
+
35
+ Project-URL: Homepage, https://github.com/pcoz/llm-feedback-control
36
+ Project-URL: Repository, https://github.com/pcoz/llm-feedback-control
37
+ Project-URL: Issues, https://github.com/pcoz/llm-feedback-control/issues
38
+ Project-URL: Changelog, https://github.com/pcoz/llm-feedback-control/blob/main/CHANGELOG.md
39
+ Keywords: llm,feedback-control,structured-extraction,state-machine,workflow,hallucination,reliability,ollama,small-language-model,auditable,refusal
40
+ Classifier: Development Status :: 4 - Beta
41
+ Classifier: Intended Audience :: Developers
42
+ Classifier: Intended Audience :: Science/Research
43
+ Classifier: License :: OSI Approved :: MIT License
44
+ Classifier: Operating System :: OS Independent
45
+ Classifier: Programming Language :: Python :: 3
46
+ Classifier: Programming Language :: Python :: 3.8
47
+ Classifier: Programming Language :: Python :: 3.9
48
+ Classifier: Programming Language :: Python :: 3.10
49
+ Classifier: Programming Language :: Python :: 3.11
50
+ Classifier: Programming Language :: Python :: 3.12
51
+ Classifier: Programming Language :: Python :: 3.13
52
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
53
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
54
+ Classifier: Topic :: Text Processing :: Linguistic
55
+ Requires-Python: >=3.8
56
+ Description-Content-Type: text/markdown
57
+ License-File: LICENSE
58
+ Provides-Extra: aws
59
+ Requires-Dist: boto3>=1.26; extra == "aws"
60
+ Provides-Extra: dev
61
+ Requires-Dist: pytest>=7.0; extra == "dev"
62
+ Requires-Dist: build>=1.0; extra == "dev"
63
+ Requires-Dist: twine>=4.0; extra == "dev"
64
+ Dynamic: license-file
65
+
66
+ # llm-feedback-control
67
+
68
+ **Get reliable, checkable structured output from a small, local language model —
69
+ by wrapping it in ordinary deterministic code.**
70
+
71
+ [![CI](https://github.com/pcoz/llm-feedback-control/actions/workflows/ci.yml/badge.svg)](https://github.com/pcoz/llm-feedback-control/actions/workflows/ci.yml)
72
+
73
+ ---
74
+
75
+ ## What it actually does
76
+
77
+ You hand it a process written in plain English:
78
+
79
+ > "A claim enters Intake. From Intake it goes to Triage. Triage goes to FastTrack
80
+ > or to Investigation. FastTrack goes to Payout. Investigation goes to Payout or
81
+ > to Denied. Payout goes to Closed. Denied goes to Closed."
82
+
83
+ and it:
84
+
85
+ 1. **turns that into a state machine** — the steps (states) and the arrows between
86
+ them (transitions);
87
+ 2. **computes provable facts** about it — which steps are dead ends, whether
88
+ there are loops, which steps can't be reached from the start;
89
+ 3. **writes a report where every statement is backed by one of those checked
90
+ facts** — so it can't quietly make things up;
91
+ 4. **knows its own limits.** If the text isn't actually a finite step-by-step
92
+ process (e.g. *"prices drift up as confidence grows"*), it **refuses** instead
93
+ of inventing a fake state machine. And if the model's first pass missed part of
94
+ the process, it **loops to fill the gaps** — or refuses if it can't.
95
+
96
+ The point: you get **higher-quality, auditable structured output from a *small*
97
+ model**, trading a few extra passes (latency) for accuracy — no extra parameters,
98
+ no special mathematics, no cloud. It runs on a laptop, and the deterministic parts
99
+ run **with no model at all**.
100
+
101
+ ## Quickstart (works with no model)
102
+
103
+ ```bash
104
+ pip install llm-feedback-control # zero dependencies — pulls nothing else
105
+ ```
106
+
107
+ ```python
108
+ from llm_feedback_control import run_audit
109
+
110
+ r = run_audit("A claim enters Intake. From Intake it goes to Triage. "
111
+ "Triage goes to FastTrack or to Investigation.")
112
+ print(r["result"]) # OK
113
+ print(r["report_facts"]) # terminals, loops, unreachable steps — all checked
114
+ ```
115
+
116
+ That already works on a bare install: with no model reachable it uses a
117
+ deterministic regex extractor plus exact graph analysis. **Plug in a model and the
118
+ extraction quality goes up — nothing else changes.**
119
+
120
+ From the command line:
121
+
122
+ ```bash
123
+ lfc "A ticket opens in New. New goes to Assigned. Assigned goes to Resolved."
124
+ lfc --check # tells you exactly what backend is available and what to do
125
+ lfc --demo # runs the three worked demos
126
+ ```
127
+
128
+ ### Add a model (optional, recommended)
129
+
130
+ The library is **not tied to any provider.** Three ways to give it a model:
131
+
132
+ ```bash
133
+ # 1. Local, free, private — install Ollama (https://ollama.com), then:
134
+ ollama pull phi3:mini
135
+
136
+ # 2. OpenAI (stdlib HTTP, no SDK):
137
+ export CEILING_BACKEND=openai OPENAI_API_KEY=sk-...
138
+ ```
139
+
140
+ ```python
141
+ # 3. Bring your own: pass any callable f(prompt, fmt=None) -> str
142
+ def my_llm(prompt, fmt=None):
143
+ ... # call Anthropic, a local server, anything
144
+ run_audit(text, generate=my_llm)
145
+ ```
146
+
147
+ Run `lfc --check` any time to see what's wired up.
148
+
149
+ ## How it works — "feedback control", explained
150
+
151
+ The design is borrowed from **electronics.** A raw LLM is like a very high-gain
152
+ amplifier: hugely powerful, but left to run "open-loop" it overshoots — fluent,
153
+ yet it drifts and hallucinates. Engineers tame such an amplifier by adding a
154
+ **feedback loop**: feed the output back, compare it against a stable reference,
155
+ and trade some raw power for precision and stability. This library is that
156
+ feedback loop for an LLM. The "reference" is plain deterministic code — graph
157
+ checks and schema rules — that the model's output is measured against.
158
+
159
+ There are two kinds of feedback, and the library uses both:
160
+
161
+ ### Negative feedback — the stabilising checks (`run_audit`)
162
+
163
+ This is the half that *grounds and refuses*. In plain terms:
164
+
165
+ | step | what it means |
166
+ |---|---|
167
+ | **regime gate** | First decide whether the text is even the kind of thing we can analyse exactly (a finite, step-by-step process) versus something fuzzy and continuous. Refuse the fuzzy ones. |
168
+ | **extraction + schema** | Ask the model for the state machine, but force the answer into a strict shape — and fall back to a deterministic regex extractor if it won't comply (or if there's no model). |
169
+ | **exact analysis** | Compute provable facts about the graph: dead ends, loops, unreachable steps. (Plus an *optional* finite-field "spectral fingerprint" — see below.) |
170
+ | **grounded report** | Write the summary using only those verified facts, naming only real states. |
171
+ | **explicit refusal** | When the input is out of regime, or a result can't be made exact, say so — don't guess. |
172
+
173
+ ### Positive feedback — the gap-filling loop (`extract_iterative`)
174
+
175
+ A one-shot extraction often silently **drops a branch** — the model says "OK"
176
+ while quietly missing *Investigation → Denied*. Positive feedback fixes that: it
177
+ **re-asks the model about anything the source text mentions that's missing from
178
+ the answer**, and repeats until nothing is missing (a *fixed point*).
179
+
180
+ Positive feedback is where capability *and* instability both live, so it's bounded
181
+ by two negative-feedback safeguards: a deterministic consistency check (does the
182
+ graph cover everything the text mentions?) and a **refusal clamp** — if it can't
183
+ converge within a few passes, it refuses to report a confident-but-incomplete
184
+ result rather than running away. This **refusal-as-stabilizer** is what makes the
185
+ regenerative loop safe.
186
+
187
+ ## What's measured so far
188
+
189
+ Indicative results, not benchmarks — small corpora, a 3.8B local model
190
+ (`phi3:mini`), greedy decoding. See [`docs/results.md`](docs/results.md) for the
191
+ full tables and method.
192
+
193
+ **Headline (run on EC2 against a ~28 GB ceiling model, mixtral 8x7B):** on a
194
+ messy, branchy, distractor-laden workflow corpus, the small model **+ the feedback
195
+ loop essentially matches a model ~7× its size.**
196
+
197
+ | configuration | states F1 | transitions F1 |
198
+ |---|---|---|
199
+ | small model (phi3:mini), one-shot | 0.98 | 0.89 |
200
+ | **small model + feedback loop** | **1.00** | **0.90** |
201
+ | big ceiling model (mixtral, ~28 GB), one-shot | 1.00 | 0.91 |
202
+
203
+ → the loop recovers **100%** of the small→big gap on states and **77%** on
204
+ transitions — and on several individual workflows the closed-loop small model
205
+ *beat* the big model, because the deterministic reference catches edges that raw
206
+ fluency invents or drops.
207
+
208
+ Other measured pieces: extraction states precision/recall ≈ 1.00 / 0.92; the
209
+ regime gate scores 1.00 precision/recall separating finite from continuous on a
210
+ clean corpus (it's brittle on deliberately *mixed* inputs — an open problem).
211
+
212
+ ## Documentation
213
+
214
+ | doc | contents |
215
+ |---|---|
216
+ | [`docs/index.md`](docs/index.md) | overview and where to start |
217
+ | [`docs/architecture.md`](docs/architecture.md) | the op-amp model in depth; the pipeline; refusal-as-stabilizer |
218
+ | [`docs/usage.md`](docs/usage.md) | install, the API, the CLI, configuration, bring-your-own-backend |
219
+ | [`docs/results.md`](docs/results.md) | the measured results, method, and honest scope |
220
+ | [`docs/api.md`](docs/api.md) | reference for every public function |
221
+ | [`docs/faq.md`](docs/faq.md) | "do I need a GPU?", "what models?", "does it work offline?" … |
222
+
223
+ ## Repository layout
224
+
225
+ ```
226
+ src/llm_feedback_control/ the package (zero-dependency, pure standard library)
227
+ llm.py the LLM client + injectable backend + a doctor()
228
+ auditor.py the negative-feedback pipeline (run_audit)
229
+ feedback.py the bounded positive-feedback loop (extract_iterative)
230
+ __main__.py the `lfc` command-line tool
231
+ experiments/ repro scripts for the measured results (not shipped)
232
+ aws/ optional: run a large ceiling model on EC2 (not shipped)
233
+ docs/ the documentation suite
234
+ tests/ deterministic tests (no model / no network)
235
+ ```
236
+
237
+ ## Honest scope
238
+
239
+ - **A reliability architecture, not a model improvement.** The win is "the system
240
+ knows what it can compute exactly and refuses the rest" — orthogonal to model
241
+ scale. It helps on the *structured / verifiable slice* (workflows, state
242
+ machines, configs), not open-ended generation.
243
+ - **It uses no special mathematics.** The deterministic reference is plain
244
+ graph/text consistency. (The finite-field "spectral fingerprint" is an *optional*
245
+ extra exact check, honestly redundant with graph analysis for most workflow
246
+ audits — keep it or ignore it.)
247
+ - **Needs a deterministic reference.** Where there's nothing to check against, the
248
+ gate (correctly) refuses to claim exactness.
249
+ - **Results are indicative.** Small corpora; treat the numbers as direction, not
250
+ guarantees.
251
+
252
+ ## Origin
253
+
254
+ This project is the practical, validated spin-off of an internal research
255
+ investigation. The investigation's grander mathematical claims did not hold up
256
+ under measurement; this engineering architecture — LLM feedback control with
257
+ refusal-as-stabilizer — is the part that did. It stands on its own.
258
+
259
+ ## License
260
+
261
+ MIT with an attribution clause — see [`LICENSE`](LICENSE).
262
+ Built with llm-feedback-control by Edward Chalk (sapientronic.ai).
@@ -0,0 +1,11 @@
1
+ llm_feedback_control/__init__.py,sha256=75Udenah_ANeGOwYttX_-5kWtFr_r-WxQhet4HPQ4a8,3723
2
+ llm_feedback_control/__main__.py,sha256=L_1H35DBTkyPO-6OsjUuMPt2jNwTVbSsCmmG5vYAMR0,3969
3
+ llm_feedback_control/auditor.py,sha256=EjbIZbqKaEvUvXnnrBXJ-cjO99Xw7ioxLWgkgEvZfRY,15491
4
+ llm_feedback_control/feedback.py,sha256=gQqfembMfGGTlgF6HcYOXkQAlEM2l39xK9MKdZDV0AA,7570
5
+ llm_feedback_control/llm.py,sha256=Pm5V7Km43AXcKykW4aDX28mPrK8hbqhygg1zl49q5Do,6290
6
+ llm_feedback_control-0.1.0.dist-info/licenses/LICENSE,sha256=ITnVOjCtoAKd-QQxtLh77T94TSkmzIddIxyEHWtWSlQ,1434
7
+ llm_feedback_control-0.1.0.dist-info/METADATA,sha256=vY7GFnHYzPgAVZDFLNjv8RwOtZlwgo9R1SZpwK3eT0s,13086
8
+ llm_feedback_control-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
9
+ llm_feedback_control-0.1.0.dist-info/entry_points.txt,sha256=FqOQG5kIa81IkFvvuD_y5Nk3I2DQCvtTQSC5TeMhOV4,59
10
+ llm_feedback_control-0.1.0.dist-info/top_level.txt,sha256=dhKfG5rAR6PqKr4uSAslVgGcmQq19gzvPN_S2UKp1SE,21
11
+ llm_feedback_control-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ lfc = llm_feedback_control.__main__:main
@@ -0,0 +1,28 @@
1
+ llm-feedback-control
2
+
3
+ Copyright (c) 2026 Edward Chalk (sapientronic.ai)
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to use,
7
+ copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
8
+ the Software, and to permit persons to whom the Software is furnished to do
9
+ so, subject to the following conditions:
10
+
11
+ 1. The above copyright notice and this permission notice shall be included
12
+ in all copies or substantial portions of the Software.
13
+
14
+ 2. Attribution. Any publication, presentation, derivative work, or product
15
+ that uses or builds on this Software must include visible attribution to
16
+ Edward Chalk and sapientronic.ai. The phrase "Built with llm-feedback-control
17
+ by Edward Chalk (sapientronic.ai)" or equivalent is acceptable.
18
+
19
+ 3. The Software is provided "AS IS", without warranty of any kind, express
20
+ or implied, including but not limited to the warranties of merchantability,
21
+ fitness for a particular purpose, and noninfringement. In no event shall
22
+ the authors or copyright holders be liable for any claim, damages, or
23
+ other liability, whether in an action of contract, tort, or otherwise,
24
+ arising from, out of, or in connection with the Software or the use or
25
+ other dealings in the Software.
26
+
27
+ This license is modeled on the MIT License with an explicit attribution
28
+ clause (clause 2).
@@ -0,0 +1 @@
1
+ llm_feedback_control