nfield 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. nfield/__init__.py +102 -0
  2. nfield/_version.py +17 -0
  3. nfield/assembly/__init__.py +49 -0
  4. nfield/assembly/_blackboard.py +488 -0
  5. nfield/assembly/_quality.py +254 -0
  6. nfield/assembly/_trie.py +376 -0
  7. nfield/cli/__init__.py +22 -0
  8. nfield/cli/_app.py +235 -0
  9. nfield/config.py +207 -0
  10. nfield/engine/__init__.py +39 -0
  11. nfield/engine/_async.py +610 -0
  12. nfield/engine/_sync.py +248 -0
  13. nfield/exceptions.py +251 -0
  14. nfield/export.py +145 -0
  15. nfield/extraction/__init__.py +55 -0
  16. nfield/extraction/_papt.py +335 -0
  17. nfield/extraction/_prompt.py +461 -0
  18. nfield/extraction/_sfep.py +572 -0
  19. nfield/io.py +122 -0
  20. nfield/pipeline/__init__.py +62 -0
  21. nfield/pipeline/_coverage.py +88 -0
  22. nfield/pipeline/_state.py +135 -0
  23. nfield/pipeline/_structure.py +653 -0
  24. nfield/pipeline/s0_resources.py +63 -0
  25. nfield/pipeline/s1_schema.py +97 -0
  26. nfield/pipeline/s2a_structure.py +54 -0
  27. nfield/pipeline/s2b_prepass.py +342 -0
  28. nfield/pipeline/s2c_packing.py +898 -0
  29. nfield/pipeline/s3_excerpt.py +130 -0
  30. nfield/pipeline/s4_extract.py +348 -0
  31. nfield/pipeline/s5_validate.py +132 -0
  32. nfield/pipeline/s5b_recover.py +418 -0
  33. nfield/pipeline/s6_assemble.py +157 -0
  34. nfield/providers/__init__.py +45 -0
  35. nfield/providers/_base.py +240 -0
  36. nfield/providers/_protocol.py +79 -0
  37. nfield/providers/_reasoning.py +108 -0
  38. nfield/providers/_registry.py +171 -0
  39. nfield/providers/_token_budget.py +77 -0
  40. nfield/providers/groq/__init__.py +10 -0
  41. nfield/providers/groq/_provider.py +287 -0
  42. nfield/providers/openai/__init__.py +10 -0
  43. nfield/providers/openai/_provider.py +300 -0
  44. nfield/py.typed +0 -0
  45. nfield/retrieval/__init__.py +41 -0
  46. nfield/retrieval/_bmx.py +213 -0
  47. nfield/retrieval/_chunker.py +495 -0
  48. nfield/retrieval/_glean.py +470 -0
  49. nfield/retrieval/_morphology.py +272 -0
  50. nfield/retrieval/_retarget.py +145 -0
  51. nfield/retrieval/_tokenize.py +62 -0
  52. nfield/schema/__init__.py +49 -0
  53. nfield/schema/_deps.py +154 -0
  54. nfield/schema/_difficulty.py +193 -0
  55. nfield/schema/_flatten.py +492 -0
  56. nfield/schema/_preflight.py +393 -0
  57. nfield/schema/_tau.py +173 -0
  58. nfield/schema/_types.py +282 -0
  59. nfield/types.py +223 -0
  60. nfield/validation/__init__.py +71 -0
  61. nfield/validation/_grounding.py +294 -0
  62. nfield/validation/_normalize.py +132 -0
  63. nfield/validation/_retry.py +576 -0
  64. nfield/validation/_type_check.py +334 -0
  65. nfield-0.1.0.dist-info/METADATA +201 -0
  66. nfield-0.1.0.dist-info/RECORD +69 -0
  67. nfield-0.1.0.dist-info/WHEEL +4 -0
  68. nfield-0.1.0.dist-info/entry_points.txt +2 -0
  69. nfield-0.1.0.dist-info/licenses/LICENSE +192 -0
nfield/__init__.py ADDED
@@ -0,0 +1,102 @@
1
+ """nfield - N-field structured extraction from documents with LLMs.
2
+
3
+ Extract hundreds of structured fields from any document without the format tax.
4
+
5
+ Quickstart:
6
+ >>> from nfield import nfield
7
+ >>> # result = nfield(document, MySchema, "groq/llama-3.1-8b")
8
+ >>> # result.data, result.metadata, result.status
9
+
10
+ Every public name is imported lazily, so ``import nfield`` stays fast and
11
+ never fails because an optional provider SDK (e.g. groq) is not installed.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import importlib
17
+ from typing import TYPE_CHECKING
18
+
19
+ from ._version import __version__
20
+
21
+ if TYPE_CHECKING:
22
+ from .config import ExtractionConfig
23
+ from .engine import AsyncNField, NField, nfield, nfield_async
24
+ from .exceptions import (
25
+ AssemblyError,
26
+ ExtractionError,
27
+ NFieldError,
28
+ ProviderError,
29
+ SchemaError,
30
+ ValidationError,
31
+ )
32
+ from .export import result_to_dataframe, results_to_csv, results_to_dataframe
33
+ from .io import load_document, load_results, load_schema, save_results
34
+ from .providers import from_model
35
+ from .types import ExtractionResult, ExtractionStatus, FieldResult, Metadata
36
+
37
+ __all__ = [
38
+ "AssemblyError",
39
+ "AsyncNField",
40
+ "ExtractionConfig",
41
+ "ExtractionError",
42
+ "ExtractionResult",
43
+ "ExtractionStatus",
44
+ "FieldResult",
45
+ "Metadata",
46
+ "NField",
47
+ "NFieldError",
48
+ "ProviderError",
49
+ "SchemaError",
50
+ "ValidationError",
51
+ "__version__",
52
+ "from_model",
53
+ "load_document",
54
+ "load_results",
55
+ "load_schema",
56
+ "nfield",
57
+ "nfield_async",
58
+ "result_to_dataframe",
59
+ "results_to_csv",
60
+ "results_to_dataframe",
61
+ "save_results",
62
+ ]
63
+
64
+ _dynamic_imports: dict[str, str] = {
65
+ # Entry-point functions and engine classes
66
+ "nfield": ".engine",
67
+ "nfield_async": ".engine",
68
+ "NField": ".engine",
69
+ "AsyncNField": ".engine",
70
+ # Filesystem helpers (load inputs, persist results)
71
+ "load_document": ".io",
72
+ "load_schema": ".io",
73
+ "save_results": ".io",
74
+ "load_results": ".io",
75
+ # Tabular export (optional pandas dependency)
76
+ "results_to_dataframe": ".export",
77
+ "result_to_dataframe": ".export",
78
+ "results_to_csv": ".export",
79
+ # Provider factory
80
+ "from_model": ".providers",
81
+ # Config
82
+ "ExtractionConfig": ".config",
83
+ # Types
84
+ "ExtractionResult": ".types",
85
+ "FieldResult": ".types",
86
+ "Metadata": ".types",
87
+ "ExtractionStatus": ".types",
88
+ # Exceptions
89
+ "NFieldError": ".exceptions",
90
+ "SchemaError": ".exceptions",
91
+ "ProviderError": ".exceptions",
92
+ "ExtractionError": ".exceptions",
93
+ "ValidationError": ".exceptions",
94
+ "AssemblyError": ".exceptions",
95
+ }
96
+
97
+
98
+ def __getattr__(name: str) -> object:
99
+ if name in _dynamic_imports:
100
+ module = importlib.import_module(_dynamic_imports[name], package=__name__)
101
+ return getattr(module, name)
102
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
nfield/_version.py ADDED
@@ -0,0 +1,17 @@
1
+ """Single source of version truth.
2
+
3
+ hatch-vcs writes the real version at build time from git tags.
4
+ At development time, falls back to "0.0.0+unknown".
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ try:
10
+ from importlib.metadata import PackageNotFoundError, version
11
+
12
+ try:
13
+ __version__ = version("nfield")
14
+ except PackageNotFoundError:
15
+ __version__ = "0.0.0+unknown"
16
+ except ImportError:
17
+ __version__ = "0.0.0+unknown"
@@ -0,0 +1,49 @@
1
+ """nfield assembly module - JSON assembly, blackboard, and quality scoring.
2
+
3
+ Public surface
4
+ --------------
5
+ * :func:`assemble_json` - assemble flat SFEP results into a nested JSON dict.
6
+ * :func:`parse_path_segments` - parse dot-notation path into segment list.
7
+ * :class:`RadixTrie` - low-level trie for custom assembly workflows.
8
+ * :class:`Blackboard` - per-field state machine for extraction state tracking.
9
+ * :class:`FieldState` - enum of the 6 blackboard field states.
10
+ * :func:`compute_quality_score` - compute quality metrics from blackboard state.
11
+ * :class:`QualityReport` - immutable quality metrics dataclass.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import importlib
17
+ from typing import TYPE_CHECKING
18
+
19
+ if TYPE_CHECKING:
20
+ from ._blackboard import Blackboard, FieldState
21
+ from ._quality import QualityReport, compute_quality_score
22
+ from ._trie import RadixTrie, assemble_json, parse_path_segments
23
+
24
+ __all__ = [
25
+ "Blackboard",
26
+ "FieldState",
27
+ "QualityReport",
28
+ "RadixTrie",
29
+ "assemble_json",
30
+ "compute_quality_score",
31
+ "parse_path_segments",
32
+ ]
33
+
34
+ _dynamic_imports: dict[str, str] = {
35
+ "Blackboard": "._blackboard",
36
+ "FieldState": "._blackboard",
37
+ "QualityReport": "._quality",
38
+ "compute_quality_score": "._quality",
39
+ "RadixTrie": "._trie",
40
+ "assemble_json": "._trie",
41
+ "parse_path_segments": "._trie",
42
+ }
43
+
44
+
45
+ def __getattr__(name: str) -> object:
46
+ if name in _dynamic_imports:
47
+ module = importlib.import_module(_dynamic_imports[name], package=__name__)
48
+ return getattr(module, name)
49
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
@@ -0,0 +1,488 @@
1
+ """Blackboard state machine for per-field extraction state tracking.
2
+
3
+ The Blackboard is the shared data structure that accumulates extraction
4
+ results across all capacity leaves and retry rounds. It tracks the state
5
+ of each field throughout the pipeline using a finite state machine with
6
+ 6 states per field.
7
+
8
+ State transitions
9
+ -----------------
10
+
11
+ EMPTY ──write()──► PENDING ──write()──► FILLED
12
+
13
+ mark_failed() ─────┼───► FAILED
14
+ mark_needs_revalidation() ► NEEDS_REVALIDATION
15
+ write() (conflict) ─────► CONFLICT
16
+
17
+ Notes
18
+ -----
19
+ * ``write_raw()`` is the dep-change-safe variant: it does NOT transition
20
+ a FILLED field back to PENDING. Used when updating a dependency whose
21
+ change may invalidate a dependent field.
22
+ * Cross-leaf conflict detection: if two leaves extract different non-None
23
+ values for the same field, the state transitions to ``CONFLICT`` and
24
+ both values are stored for reporting.
25
+ * Once a field is ``FAILED`` or ``CONFLICT``, it can only transition to
26
+ ``NEEDS_REVALIDATION`` (for human review), not back to ``FILLED``.
27
+ """
28
+
29
+ from __future__ import annotations
30
+
31
+ from enum import Enum
32
+ from typing import Any
33
+
34
+ from nfield.exceptions import AssemblyError
35
+
36
+ __all__ = [
37
+ "Blackboard",
38
+ "FieldState",
39
+ ]
40
+
41
+
42
+ # ---------------------------------------------------------------------------
43
+ # FieldState enum
44
+ # ---------------------------------------------------------------------------
45
+
46
+
47
+ class FieldState(Enum):
48
+ """State of a single field in the extraction blackboard.
49
+
50
+ Attributes:
51
+ EMPTY: Field has not been seen in any extraction output yet.
52
+ PENDING: Field has been written at least once but not confirmed.
53
+ FILLED: Field has a validated value (at least type-valid).
54
+ FAILED: Field extraction failed and retry did not recover it.
55
+ CONFLICT: Two or more leaves extracted different values for this field.
56
+ NEEDS_REVALIDATION: Field is flagged for human or semantic review.
57
+
58
+ Example:
59
+ >>> FieldState.FILLED.value
60
+ 'filled'
61
+ """
62
+
63
+ EMPTY = "empty"
64
+ PENDING = "pending"
65
+ FILLED = "filled"
66
+ FAILED = "failed"
67
+ CONFLICT = "conflict"
68
+ NEEDS_REVALIDATION = "needs_revalidation"
69
+
70
+
71
+ # ---------------------------------------------------------------------------
72
+ # Blackboard
73
+ # ---------------------------------------------------------------------------
74
+ #
75
+ # State transitions are enforced inline in each write/mark_* method below, not via
76
+ # a transition table: the real rules are value-dependent (a same-value re-write is a
77
+ # no-op, a different value escalates to CONFLICT, a transient flag tags a FAILED),
78
+ # which a flat state->states table cannot express. The legal moves, for reference:
79
+ # EMPTY -> PENDING | FILLED | FAILED
80
+ # PENDING -> FILLED | FAILED | CONFLICT
81
+ # FILLED -> CONFLICT | NEEDS_REVALIDATION (+ reopen_for_retry -> PENDING)
82
+ # FAILED -> FILLED | NEEDS_REVALIDATION (+ reopen_for_retry -> PENDING)
83
+ # CONFLICT -> NEEDS_REVALIDATION (+ reopen_for_retry -> PENDING)
84
+ # NEEDS_REVALIDATION -> (terminal) (+ reopen_for_retry -> PENDING)
85
+
86
+
87
+ class Blackboard:
88
+ """Per-field state machine tracking extraction results across all leaves.
89
+
90
+ The Blackboard is initialized with the complete list of field paths
91
+ from Stage 1 and accumulates values written by Stage 4 (extraction)
92
+ and Stage 5 (validation + retry).
93
+
94
+ Attributes:
95
+ _states: Mapping of field path to current FieldState.
96
+ _values: Mapping of field path to current typed value.
97
+ _errors: Mapping of failed field paths to error messages.
98
+ _conflict_values: Mapping of conflicted field paths to all seen values.
99
+
100
+ Example:
101
+ >>> bb = Blackboard(["name", "age"])
102
+ >>> bb.write("name", "Alice")
103
+ >>> bb.get_filled()
104
+ {'name': 'Alice'}
105
+ >>> bb.get_missing()
106
+ ['age']
107
+ """
108
+
109
+ def __init__(self, paths: list[str]) -> None:
110
+ """Initialise a Blackboard for the given field paths.
111
+
112
+ Args:
113
+ paths: All field paths from the flattened schema (Stage 1 output).
114
+ All paths start in ``EMPTY`` state.
115
+
116
+ Raises:
117
+ ValueError: If *paths* contains duplicates.
118
+ """
119
+ if len(paths) != len(set(paths)):
120
+ duplicates = [p for p in paths if paths.count(p) > 1]
121
+ raise ValueError(f"Blackboard paths must be unique; duplicates: {duplicates}")
122
+ self._states: dict[str, FieldState] = dict.fromkeys(paths, FieldState.EMPTY)
123
+ self._values: dict[str, Any] = {}
124
+ self._errors: dict[str, str] = {}
125
+ self._conflict_values: dict[str, list[Any]] = {}
126
+ # Paths whose FAILED state is a transient API/call failure (the call never
127
+ # returned), tracked apart from a genuine "absent in document" failure.
128
+ self._call_failed: set[str] = set()
129
+
130
+ # ------------------------------------------------------------------
131
+ # Write operations
132
+ # ------------------------------------------------------------------
133
+
134
+ def write(self, path: str, value: Any) -> None:
135
+ """Write a value for a field, transitioning its state.
136
+
137
+ Transitions:
138
+ * ``EMPTY`` / ``PENDING`` → ``FILLED`` (or ``CONFLICT`` if value differs)
139
+ * ``FILLED`` with same value → no-op
140
+ * ``FILLED`` with new value → ``CONFLICT``
141
+ * ``FAILED`` → ``FILLED`` (retry recovered this field)
142
+
143
+ Args:
144
+ path: Dot-notation field path.
145
+ value: Typed Python value from the SFEP parser.
146
+
147
+ Raises:
148
+ AssemblyError: If the path is not registered in this blackboard.
149
+ """
150
+ self._require_path(path)
151
+ state = self._states[path]
152
+
153
+ if state == FieldState.FILLED:
154
+ existing = self._values.get(path)
155
+ if existing == value:
156
+ return # Same value from a second leaf - no conflict
157
+ # Different value from a second leaf - conflict
158
+ self._conflict_values.setdefault(path, [existing])
159
+ if value not in self._conflict_values[path]:
160
+ self._conflict_values[path].append(value)
161
+ self._states[path] = FieldState.CONFLICT
162
+ return
163
+
164
+ if state == FieldState.CONFLICT:
165
+ # Already conflicted - accumulate additional values
166
+ if value not in self._conflict_values.get(path, []):
167
+ self._conflict_values.setdefault(path, []).append(value)
168
+ return
169
+
170
+ if state == FieldState.NEEDS_REVALIDATION:
171
+ # Terminal state - no further writes
172
+ return
173
+
174
+ # EMPTY / PENDING / FAILED → FILLED
175
+ self._values[path] = value
176
+ self._states[path] = FieldState.FILLED
177
+ self._call_failed.discard(path)
178
+
179
+ def write_raw(self, path: str, value: Any) -> None:
180
+ """Dependency-change-safe write: does not overwrite a FILLED field.
181
+
182
+ Used when updating dependency values that may propagate to dependent
183
+ fields. Prevents overwriting a valid extracted value with a stale
184
+ dependency update.
185
+
186
+ If the field is ``EMPTY`` or ``PENDING``, behaves like :meth:`write`.
187
+ If the field is already ``FILLED``, the write is silently discarded
188
+ and the field is flagged ``NEEDS_REVALIDATION`` (since its dependency
189
+ changed, its value may be stale).
190
+
191
+ Args:
192
+ path: Dot-notation field path.
193
+ value: New typed Python value.
194
+
195
+ Raises:
196
+ AssemblyError: If the path is not registered in this blackboard.
197
+ """
198
+ self._require_path(path)
199
+ state = self._states[path]
200
+
201
+ if state == FieldState.FILLED:
202
+ # Dependency changed while this field already has a value -
203
+ # flag for revalidation without overwriting
204
+ self._states[path] = FieldState.NEEDS_REVALIDATION
205
+ return
206
+
207
+ if state in (FieldState.FAILED, FieldState.CONFLICT, FieldState.NEEDS_REVALIDATION):
208
+ return # Cannot update terminal/conflict states
209
+
210
+ # EMPTY / PENDING → write normally
211
+ self._values[path] = value
212
+ self._states[path] = FieldState.FILLED
213
+
214
+ # ------------------------------------------------------------------
215
+ # State transitions
216
+ # ------------------------------------------------------------------
217
+
218
+ def mark_failed(self, path: str, error: str, *, transient: bool = False) -> None:
219
+ """Transition a field to ``FAILED`` state with an error message.
220
+
221
+ Args:
222
+ path: Dot-notation field path.
223
+ error: Human-readable description of the failure.
224
+ transient: ``True`` when the failure is a call/API error (the request
225
+ never returned) rather than the field being absent from the
226
+ document. Tracked separately so reporting and recovery can tell a
227
+ network blip from genuinely missing data.
228
+
229
+ Raises:
230
+ AssemblyError: If the path is not registered.
231
+ """
232
+ self._require_path(path)
233
+ state = self._states[path]
234
+ if state not in (
235
+ FieldState.EMPTY,
236
+ FieldState.PENDING,
237
+ FieldState.FILLED,
238
+ FieldState.FAILED,
239
+ ):
240
+ return # Cannot transition from CONFLICT or NEEDS_REVALIDATION to FAILED
241
+ self._states[path] = FieldState.FAILED
242
+ self._errors[path] = error
243
+ if transient:
244
+ self._call_failed.add(path)
245
+ else:
246
+ self._call_failed.discard(path)
247
+
248
+ def mark_needs_revalidation(self, path: str) -> None:
249
+ """Transition a field to ``NEEDS_REVALIDATION`` state.
250
+
251
+ Args:
252
+ path: Dot-notation field path.
253
+
254
+ Raises:
255
+ AssemblyError: If the path is not registered.
256
+ """
257
+ self._require_path(path)
258
+ self._states[path] = FieldState.NEEDS_REVALIDATION
259
+
260
+ def mark_pending(self, path: str) -> None:
261
+ """Transition a field from ``EMPTY`` to ``PENDING`` state.
262
+
263
+ Used to indicate that extraction for this field is in-flight.
264
+
265
+ Args:
266
+ path: Dot-notation field path.
267
+
268
+ Raises:
269
+ AssemblyError: If the path is not registered.
270
+ """
271
+ self._require_path(path)
272
+ if self._states[path] == FieldState.EMPTY:
273
+ self._states[path] = FieldState.PENDING
274
+
275
+ def reopen_for_retry(self, path: str) -> bool:
276
+ """Reopen a FAILED / CONFLICT / NEEDS_REVALIDATION field for re-extraction.
277
+
278
+ A controlled escape hatch for the retry orchestrator: it moves a field
279
+ that the normal FSM treats as settled (or terminal) back to ``PENDING`` so
280
+ a subsequent :meth:`write` can record a fresh value. Clears the field's
281
+ prior error and any stored conflicting values, since the retry supersedes
282
+ them. Fields in ``EMPTY``/``PENDING``/``FILLED`` are left unchanged.
283
+
284
+ Args:
285
+ path: Dot-notation field path.
286
+
287
+ Returns:
288
+ ``True`` if the field was reopened, ``False`` if its state was not
289
+ eligible (so the caller knows whether a retry will be applied).
290
+
291
+ Raises:
292
+ AssemblyError: If the path is not registered.
293
+ """
294
+ self._require_path(path)
295
+ if self._states[path] in (
296
+ FieldState.FAILED,
297
+ FieldState.CONFLICT,
298
+ FieldState.NEEDS_REVALIDATION,
299
+ ):
300
+ self._states[path] = FieldState.PENDING
301
+ self._errors.pop(path, None)
302
+ self._conflict_values.pop(path, None)
303
+ return True
304
+ return False
305
+
306
+ # ------------------------------------------------------------------
307
+ # Read operations
308
+ # ------------------------------------------------------------------
309
+
310
+ def get_missing(self) -> list[str]:
311
+ """Return paths of fields still in ``EMPTY`` state after extraction.
312
+
313
+ Returns:
314
+ Sorted list of dot-notation paths that were never extracted.
315
+
316
+ Example:
317
+ >>> bb = Blackboard(["a", "b"])
318
+ >>> bb.write("a", 1)
319
+ >>> bb.get_missing()
320
+ ['b']
321
+ """
322
+ return sorted(p for p, s in self._states.items() if s == FieldState.EMPTY)
323
+
324
+ def get_conflicts(self) -> list[str]:
325
+ """Return paths of fields in ``CONFLICT`` state.
326
+
327
+ Returns:
328
+ Sorted list of dot-notation paths with conflicting values.
329
+
330
+ Example:
331
+ >>> bb = Blackboard(["x"])
332
+ >>> bb.write("x", 1)
333
+ >>> bb.write("x", 2)
334
+ >>> bb.get_conflicts()
335
+ ['x']
336
+ """
337
+ return sorted(p for p, s in self._states.items() if s == FieldState.CONFLICT)
338
+
339
+ def get_needs_revalidation(self) -> list[str]:
340
+ """Return paths of fields flagged for revalidation.
341
+
342
+ Returns:
343
+ Sorted list of dot-notation paths in NEEDS_REVALIDATION state.
344
+ """
345
+ return sorted(p for p, s in self._states.items() if s == FieldState.NEEDS_REVALIDATION)
346
+
347
+ def get_failed(self) -> list[str]:
348
+ """Return paths of fields in ``FAILED`` state.
349
+
350
+ Returns:
351
+ Sorted list of dot-notation paths that failed extraction.
352
+ """
353
+ return sorted(p for p, s in self._states.items() if s == FieldState.FAILED)
354
+
355
+ def get_call_failed(self) -> list[str]:
356
+ """Return paths whose ``FAILED`` state is a transient call/API failure.
357
+
358
+ These are fields the model never got a chance to answer (the request
359
+ failed), as opposed to fields it answered ``NULL`` (absent from the
360
+ document). Used to report API failures distinctly from missing data.
361
+
362
+ Returns:
363
+ Sorted list of dot-notation paths still FAILED due to a call error.
364
+ """
365
+ return sorted(p for p in self._call_failed if self._states.get(p) == FieldState.FAILED)
366
+
367
+ def get_filled(self) -> dict[str, Any]:
368
+ """Return fields that hold a real (non-``None``) extracted value.
369
+
370
+ ``None`` is excluded on purpose: the recovery pass marks tree-backtracked
371
+ "confirmed absent" fields ``FILLED`` with ``None`` (:meth:`write_raw`), but
372
+ such a field has no value - it was confirmed missing, not extracted. Counting
373
+ it as filled would overstate the extraction rate, so it is omitted here and
374
+ therefore counted as missing by the quality metrics.
375
+
376
+ Returns:
377
+ Dict of ``{path: value}`` for ``FILLED`` fields whose value is not ``None``.
378
+
379
+ Example:
380
+ >>> bb = Blackboard(["name", "nickname"])
381
+ >>> bb.write("name", "Alice")
382
+ >>> bb.write_raw("nickname", None) # confirmed absent
383
+ >>> bb.get_filled()
384
+ {'name': 'Alice'}
385
+ """
386
+ return {
387
+ p: self._values[p]
388
+ for p, s in self._states.items()
389
+ if s == FieldState.FILLED and self._values.get(p) is not None
390
+ }
391
+
392
+ def get_value(self, path: str) -> Any:
393
+ """Return the last value written for a field, regardless of its state.
394
+
395
+ Unlike :meth:`get_filled`, this returns the stored value even for a ``FAILED``
396
+ or ``NEEDS_REVALIDATION`` field, so a retry can show the model the exact value
397
+ its previous attempt produced. Returns ``None`` if nothing was ever written.
398
+
399
+ Args:
400
+ path: Dot-notation field path.
401
+
402
+ Returns:
403
+ The stored value, or ``None`` if the field has no recorded value.
404
+ """
405
+ return self._values.get(path)
406
+
407
+ def get_conflict_values(self, path: str) -> list[Any]:
408
+ """Return all conflicting values seen for a field.
409
+
410
+ Args:
411
+ path: Dot-notation field path.
412
+
413
+ Returns:
414
+ List of all values written to a CONFLICT field.
415
+ """
416
+ return list(self._conflict_values.get(path, []))
417
+
418
+ def get_state(self, path: str) -> FieldState:
419
+ """Return the current state of a field.
420
+
421
+ Args:
422
+ path: Dot-notation field path.
423
+
424
+ Returns:
425
+ Current :class:`FieldState` for the path.
426
+
427
+ Raises:
428
+ AssemblyError: If the path is not registered.
429
+ """
430
+ self._require_path(path)
431
+ return self._states[path]
432
+
433
+ def get_error(self, path: str) -> str | None:
434
+ """Return the error message for a failed field.
435
+
436
+ Args:
437
+ path: Dot-notation field path.
438
+
439
+ Returns:
440
+ Error message string, or ``None`` if the field did not fail.
441
+ """
442
+ return self._errors.get(path)
443
+
444
+ def all_paths(self) -> list[str]:
445
+ """Return all registered field paths in sorted order.
446
+
447
+ Returns:
448
+ Sorted list of all paths registered at construction.
449
+ """
450
+ return sorted(self._states)
451
+
452
+ def summary(self) -> dict[str, int]:
453
+ """Return a count of fields in each state.
454
+
455
+ Returns:
456
+ Dict mapping state name to field count.
457
+
458
+ Example:
459
+ >>> bb = Blackboard(["a", "b", "c"])
460
+ >>> bb.write("a", 1)
461
+ >>> bb.mark_failed("b", "parse error")
462
+ >>> bb.summary()
463
+ {'empty': 1, 'pending': 0, 'filled': 1, 'failed': 1, 'conflict': 0, 'needs_revalidation': 0}
464
+ """
465
+ counts: dict[str, int] = {s.value: 0 for s in FieldState}
466
+ for state in self._states.values():
467
+ counts[state.value] += 1
468
+ return counts
469
+
470
+ # ------------------------------------------------------------------
471
+ # Private
472
+ # ------------------------------------------------------------------
473
+
474
+ def _require_path(self, path: str) -> None:
475
+ """Assert that *path* is registered in this blackboard.
476
+
477
+ Args:
478
+ path: Path to check.
479
+
480
+ Raises:
481
+ AssemblyError: If the path was not registered at construction.
482
+ """
483
+ if path not in self._states:
484
+ raise AssemblyError(
485
+ f"Unknown field path {path!r} - "
486
+ "path must be registered at Blackboard construction",
487
+ path=path,
488
+ )