biblicus 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biblicus/__init__.py +5 -5
- biblicus/analysis/__init__.py +1 -1
- biblicus/analysis/base.py +10 -10
- biblicus/analysis/markov.py +78 -68
- biblicus/analysis/models.py +47 -47
- biblicus/analysis/profiling.py +58 -48
- biblicus/analysis/topic_modeling.py +56 -51
- biblicus/cli.py +224 -177
- biblicus/{recipes.py → configuration.py} +14 -14
- biblicus/constants.py +2 -2
- biblicus/context_engine/assembler.py +49 -19
- biblicus/context_engine/retrieval.py +46 -42
- biblicus/corpus.py +116 -108
- biblicus/errors.py +3 -3
- biblicus/evaluation.py +27 -25
- biblicus/extraction.py +103 -98
- biblicus/extraction_evaluation.py +26 -26
- biblicus/extractors/deepgram_stt.py +7 -7
- biblicus/extractors/docling_granite_text.py +11 -11
- biblicus/extractors/docling_smol_text.py +11 -11
- biblicus/extractors/markitdown_text.py +4 -4
- biblicus/extractors/openai_stt.py +7 -7
- biblicus/extractors/paddleocr_vl_text.py +20 -18
- biblicus/extractors/pipeline.py +8 -8
- biblicus/extractors/rapidocr_text.py +3 -3
- biblicus/extractors/unstructured_text.py +3 -3
- biblicus/hooks.py +4 -4
- biblicus/knowledge_base.py +33 -31
- biblicus/models.py +78 -78
- biblicus/retrieval.py +47 -40
- biblicus/retrievers/__init__.py +50 -0
- biblicus/retrievers/base.py +65 -0
- biblicus/{backends → retrievers}/embedding_index_common.py +44 -41
- biblicus/{backends → retrievers}/embedding_index_file.py +87 -58
- biblicus/{backends → retrievers}/embedding_index_inmemory.py +88 -59
- biblicus/retrievers/hybrid.py +301 -0
- biblicus/{backends → retrievers}/scan.py +83 -73
- biblicus/{backends → retrievers}/sqlite_full_text_search.py +115 -101
- biblicus/{backends → retrievers}/tf_vector.py +87 -77
- biblicus/text/prompts.py +16 -8
- biblicus/text/tool_loop.py +63 -5
- {biblicus-1.0.0.dist-info → biblicus-1.1.0.dist-info}/METADATA +30 -21
- biblicus-1.1.0.dist-info/RECORD +91 -0
- biblicus/backends/__init__.py +0 -50
- biblicus/backends/base.py +0 -65
- biblicus/backends/hybrid.py +0 -292
- biblicus-1.0.0.dist-info/RECORD +0 -91
- {biblicus-1.0.0.dist-info → biblicus-1.1.0.dist-info}/WHEEL +0 -0
- {biblicus-1.0.0.dist-info → biblicus-1.1.0.dist-info}/entry_points.txt +0 -0
- {biblicus-1.0.0.dist-info → biblicus-1.1.0.dist-info}/licenses/LICENSE +0 -0
- {biblicus-1.0.0.dist-info → biblicus-1.1.0.dist-info}/top_level.txt +0 -0
biblicus/__init__.py
CHANGED
|
@@ -16,13 +16,13 @@ from .context_engine import (
|
|
|
16
16
|
from .corpus import Corpus
|
|
17
17
|
from .knowledge_base import KnowledgeBase
|
|
18
18
|
from .models import (
|
|
19
|
+
ConfigurationManifest,
|
|
19
20
|
CorpusConfig,
|
|
20
21
|
Evidence,
|
|
21
22
|
IngestResult,
|
|
22
23
|
QueryBudget,
|
|
23
|
-
RecipeManifest,
|
|
24
24
|
RetrievalResult,
|
|
25
|
-
|
|
25
|
+
RetrievalSnapshot,
|
|
26
26
|
)
|
|
27
27
|
|
|
28
28
|
__all__ = [
|
|
@@ -42,9 +42,9 @@ __all__ = [
|
|
|
42
42
|
"IngestResult",
|
|
43
43
|
"KnowledgeBase",
|
|
44
44
|
"QueryBudget",
|
|
45
|
-
"
|
|
45
|
+
"ConfigurationManifest",
|
|
46
46
|
"RetrievalResult",
|
|
47
|
-
"
|
|
47
|
+
"RetrievalSnapshot",
|
|
48
48
|
]
|
|
49
49
|
|
|
50
|
-
__version__ = "1.
|
|
50
|
+
__version__ = "1.1.0"
|
biblicus/analysis/__init__.py
CHANGED
|
@@ -41,5 +41,5 @@ def get_analysis_backend(analysis_id: str) -> CorpusAnalysisBackend:
|
|
|
41
41
|
backend_class = registry.get(analysis_id)
|
|
42
42
|
if backend_class is None:
|
|
43
43
|
known = ", ".join(sorted(registry))
|
|
44
|
-
raise KeyError(f"Unknown analysis
|
|
44
|
+
raise KeyError(f"Unknown analysis retriever '{analysis_id}'. Known retrievers: {known}")
|
|
45
45
|
return backend_class()
|
biblicus/analysis/base.py
CHANGED
|
@@ -10,7 +10,7 @@ from typing import Dict
|
|
|
10
10
|
from pydantic import BaseModel
|
|
11
11
|
|
|
12
12
|
from ..corpus import Corpus
|
|
13
|
-
from ..models import
|
|
13
|
+
from ..models import ExtractionSnapshotReference
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class CorpusAnalysisBackend(ABC):
|
|
@@ -28,21 +28,21 @@ class CorpusAnalysisBackend(ABC):
|
|
|
28
28
|
self,
|
|
29
29
|
corpus: Corpus,
|
|
30
30
|
*,
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
31
|
+
configuration_name: str,
|
|
32
|
+
configuration: Dict[str, object],
|
|
33
|
+
extraction_snapshot: ExtractionSnapshotReference,
|
|
34
34
|
) -> BaseModel:
|
|
35
35
|
"""
|
|
36
36
|
Run an analysis pipeline for a corpus.
|
|
37
37
|
|
|
38
38
|
:param corpus: Corpus to analyze.
|
|
39
39
|
:type corpus: Corpus
|
|
40
|
-
:param
|
|
41
|
-
:type
|
|
42
|
-
:param
|
|
43
|
-
:type
|
|
44
|
-
:param
|
|
45
|
-
:type
|
|
40
|
+
:param configuration_name: Human-readable configuration name.
|
|
41
|
+
:type configuration_name: str
|
|
42
|
+
:param configuration: Analysis configuration values.
|
|
43
|
+
:type configuration: dict[str, object]
|
|
44
|
+
:param extraction_snapshot: Extraction snapshot reference for text inputs.
|
|
45
|
+
:type extraction_snapshot: biblicus.models.ExtractionSnapshotReference
|
|
46
46
|
:return: Analysis output model.
|
|
47
47
|
:rtype: pydantic.BaseModel
|
|
48
48
|
"""
|
biblicus/analysis/markov.py
CHANGED
|
@@ -23,23 +23,23 @@ from ..context import (
|
|
|
23
23
|
fit_context_pack_to_token_budget,
|
|
24
24
|
)
|
|
25
25
|
from ..corpus import Corpus
|
|
26
|
-
from ..models import Evidence,
|
|
26
|
+
from ..models import Evidence, ExtractionSnapshotReference, QueryBudget, RetrievalResult
|
|
27
27
|
from ..retrieval import hash_text
|
|
28
28
|
from ..text.annotate import TextAnnotateRequest, apply_text_annotate
|
|
29
29
|
from ..text.extract import TextExtractRequest, apply_text_extract
|
|
30
30
|
from ..time import utc_now_iso
|
|
31
31
|
from .base import CorpusAnalysisBackend
|
|
32
32
|
from .models import (
|
|
33
|
-
|
|
33
|
+
AnalysisConfigurationManifest,
|
|
34
34
|
AnalysisRunInput,
|
|
35
35
|
AnalysisRunManifest,
|
|
36
36
|
MarkovAnalysisArtifactsGraphVizConfig,
|
|
37
|
+
MarkovAnalysisConfiguration,
|
|
37
38
|
MarkovAnalysisDecodedPath,
|
|
38
39
|
MarkovAnalysisModelFamily,
|
|
39
40
|
MarkovAnalysisObservation,
|
|
40
41
|
MarkovAnalysisObservationsEncoder,
|
|
41
42
|
MarkovAnalysisOutput,
|
|
42
|
-
MarkovAnalysisRecipeConfig,
|
|
43
43
|
MarkovAnalysisReport,
|
|
44
44
|
MarkovAnalysisSegment,
|
|
45
45
|
MarkovAnalysisSegmentationMethod,
|
|
@@ -107,67 +107,72 @@ class MarkovBackend(CorpusAnalysisBackend):
|
|
|
107
107
|
self,
|
|
108
108
|
corpus: Corpus,
|
|
109
109
|
*,
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
110
|
+
configuration_name: str,
|
|
111
|
+
configuration: Dict[str, object],
|
|
112
|
+
extraction_snapshot: ExtractionSnapshotReference,
|
|
113
113
|
) -> BaseModel:
|
|
114
114
|
"""
|
|
115
115
|
Run Markov analysis for a corpus.
|
|
116
116
|
|
|
117
117
|
:param corpus: Corpus to analyze.
|
|
118
118
|
:type corpus: Corpus
|
|
119
|
-
:param
|
|
120
|
-
:type
|
|
121
|
-
:param
|
|
122
|
-
:type
|
|
123
|
-
:param
|
|
124
|
-
:type
|
|
119
|
+
:param configuration_name: Human-readable configuration name.
|
|
120
|
+
:type configuration_name: str
|
|
121
|
+
:param configuration: Analysis configuration values.
|
|
122
|
+
:type configuration: dict[str, object]
|
|
123
|
+
:param extraction_snapshot: Extraction snapshot reference for text inputs.
|
|
124
|
+
:type extraction_snapshot: biblicus.models.ExtractionSnapshotReference
|
|
125
125
|
:return: Markov analysis output model.
|
|
126
126
|
:rtype: pydantic.BaseModel
|
|
127
127
|
"""
|
|
128
128
|
parsed_config = (
|
|
129
|
-
|
|
130
|
-
if isinstance(
|
|
131
|
-
else
|
|
129
|
+
configuration
|
|
130
|
+
if isinstance(configuration, MarkovAnalysisConfiguration)
|
|
131
|
+
else MarkovAnalysisConfiguration.model_validate(configuration)
|
|
132
132
|
)
|
|
133
133
|
return _run_markov(
|
|
134
134
|
corpus=corpus,
|
|
135
|
-
|
|
135
|
+
configuration_name=configuration_name,
|
|
136
136
|
config=parsed_config,
|
|
137
|
-
|
|
137
|
+
extraction_snapshot=extraction_snapshot,
|
|
138
138
|
)
|
|
139
139
|
|
|
140
140
|
|
|
141
141
|
def _run_markov(
|
|
142
142
|
*,
|
|
143
143
|
corpus: Corpus,
|
|
144
|
-
|
|
145
|
-
config:
|
|
146
|
-
|
|
144
|
+
configuration_name: str,
|
|
145
|
+
config: MarkovAnalysisConfiguration,
|
|
146
|
+
extraction_snapshot: ExtractionSnapshotReference,
|
|
147
147
|
) -> MarkovAnalysisOutput:
|
|
148
|
-
|
|
148
|
+
configuration_manifest = _create_configuration_manifest(
|
|
149
|
+
name=configuration_name,
|
|
150
|
+
config=config,
|
|
151
|
+
)
|
|
149
152
|
catalog = corpus.load_catalog()
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
+
snapshot_id = _analysis_snapshot_id(
|
|
154
|
+
configuration_id=configuration_manifest.configuration_id,
|
|
155
|
+
extraction_snapshot=extraction_snapshot,
|
|
153
156
|
catalog_generated_at=catalog.generated_at,
|
|
154
157
|
)
|
|
155
158
|
run_manifest = AnalysisRunManifest(
|
|
156
|
-
|
|
157
|
-
|
|
159
|
+
snapshot_id=snapshot_id,
|
|
160
|
+
configuration=configuration_manifest,
|
|
158
161
|
corpus_uri=catalog.corpus_uri,
|
|
159
162
|
catalog_generated_at=catalog.generated_at,
|
|
160
163
|
created_at=utc_now_iso(),
|
|
161
|
-
input=AnalysisRunInput(
|
|
164
|
+
input=AnalysisRunInput(extraction_snapshot=extraction_snapshot),
|
|
162
165
|
artifact_paths=[],
|
|
163
166
|
stats={},
|
|
164
167
|
)
|
|
165
|
-
run_dir = corpus.analysis_run_dir(
|
|
168
|
+
run_dir = corpus.analysis_run_dir(
|
|
169
|
+
analysis_id=MarkovBackend.analysis_id, snapshot_id=snapshot_id
|
|
170
|
+
)
|
|
166
171
|
run_dir.mkdir(parents=True, exist_ok=True)
|
|
167
172
|
|
|
168
173
|
documents, text_report = _collect_documents(
|
|
169
174
|
corpus=corpus,
|
|
170
|
-
|
|
175
|
+
extraction_snapshot=extraction_snapshot,
|
|
171
176
|
config=config.text_source,
|
|
172
177
|
)
|
|
173
178
|
segments = _segment_documents(documents=documents, config=config)
|
|
@@ -251,17 +256,17 @@ def _run_markov(
|
|
|
251
256
|
output = MarkovAnalysisOutput(
|
|
252
257
|
analysis_id=MarkovBackend.analysis_id,
|
|
253
258
|
generated_at=utc_now_iso(),
|
|
254
|
-
|
|
259
|
+
snapshot=run_manifest,
|
|
255
260
|
report=report,
|
|
256
261
|
)
|
|
257
262
|
(run_dir / "output.json").write_text(output.model_dump_json(indent=2) + "\n", encoding="utf-8")
|
|
258
263
|
return output
|
|
259
264
|
|
|
260
265
|
|
|
261
|
-
def
|
|
262
|
-
*, name: str, config:
|
|
263
|
-
) ->
|
|
264
|
-
|
|
266
|
+
def _create_configuration_manifest(
|
|
267
|
+
*, name: str, config: MarkovAnalysisConfiguration
|
|
268
|
+
) -> AnalysisConfigurationManifest:
|
|
269
|
+
configuration_payload = json.dumps(
|
|
265
270
|
{
|
|
266
271
|
"analysis_id": MarkovBackend.analysis_id,
|
|
267
272
|
"name": name,
|
|
@@ -269,9 +274,9 @@ def _create_recipe_manifest(
|
|
|
269
274
|
},
|
|
270
275
|
sort_keys=True,
|
|
271
276
|
)
|
|
272
|
-
|
|
273
|
-
return
|
|
274
|
-
|
|
277
|
+
configuration_id = hash_text(configuration_payload)
|
|
278
|
+
return AnalysisConfigurationManifest(
|
|
279
|
+
configuration_id=configuration_id,
|
|
275
280
|
analysis_id=MarkovBackend.analysis_id,
|
|
276
281
|
name=name,
|
|
277
282
|
created_at=utc_now_iso(),
|
|
@@ -279,22 +284,25 @@ def _create_recipe_manifest(
|
|
|
279
284
|
)
|
|
280
285
|
|
|
281
286
|
|
|
282
|
-
def
|
|
283
|
-
*,
|
|
287
|
+
def _analysis_snapshot_id(
|
|
288
|
+
*,
|
|
289
|
+
configuration_id: str,
|
|
290
|
+
extraction_snapshot: ExtractionSnapshotReference,
|
|
291
|
+
catalog_generated_at: str,
|
|
284
292
|
) -> str:
|
|
285
|
-
run_seed = f"{
|
|
293
|
+
run_seed = f"{configuration_id}:{extraction_snapshot.as_string()}:{catalog_generated_at}"
|
|
286
294
|
return hash_text(run_seed)
|
|
287
295
|
|
|
288
296
|
|
|
289
297
|
def _collect_documents(
|
|
290
298
|
*,
|
|
291
299
|
corpus: Corpus,
|
|
292
|
-
|
|
300
|
+
extraction_snapshot: ExtractionSnapshotReference,
|
|
293
301
|
config: MarkovAnalysisTextSourceConfig,
|
|
294
302
|
) -> Tuple[List[_Document], MarkovAnalysisTextCollectionReport]:
|
|
295
|
-
manifest = corpus.
|
|
296
|
-
extractor_id=
|
|
297
|
-
|
|
303
|
+
manifest = corpus.load_extraction_snapshot_manifest(
|
|
304
|
+
extractor_id=extraction_snapshot.extractor_id,
|
|
305
|
+
snapshot_id=extraction_snapshot.snapshot_id,
|
|
298
306
|
)
|
|
299
307
|
warnings: List[str] = []
|
|
300
308
|
errors: List[str] = []
|
|
@@ -302,9 +310,9 @@ def _collect_documents(
|
|
|
302
310
|
skipped_items = 0
|
|
303
311
|
empty_texts = 0
|
|
304
312
|
|
|
305
|
-
run_root = corpus.
|
|
306
|
-
extractor_id=
|
|
307
|
-
|
|
313
|
+
run_root = corpus.extraction_snapshot_dir(
|
|
314
|
+
extractor_id=extraction_snapshot.extractor_id,
|
|
315
|
+
snapshot_id=extraction_snapshot.snapshot_id,
|
|
308
316
|
)
|
|
309
317
|
for item_result in manifest.items:
|
|
310
318
|
if item_result.status != "extracted" or item_result.final_text_relpath is None:
|
|
@@ -342,7 +350,7 @@ def _collect_documents(
|
|
|
342
350
|
|
|
343
351
|
|
|
344
352
|
def _segment_documents(
|
|
345
|
-
*, documents: Sequence[_Document], config:
|
|
353
|
+
*, documents: Sequence[_Document], config: MarkovAnalysisConfiguration
|
|
346
354
|
) -> List[MarkovAnalysisSegment]:
|
|
347
355
|
segments: List[MarkovAnalysisSegment] = []
|
|
348
356
|
method = config.segmentation.method
|
|
@@ -469,7 +477,7 @@ def _fixed_window_segments(
|
|
|
469
477
|
|
|
470
478
|
|
|
471
479
|
def _llm_segments(
|
|
472
|
-
*, item_id: str, text: str, config:
|
|
480
|
+
*, item_id: str, text: str, config: MarkovAnalysisConfiguration
|
|
473
481
|
) -> List[MarkovAnalysisSegment]:
|
|
474
482
|
llm_config = config.segmentation.llm
|
|
475
483
|
if llm_config is None:
|
|
@@ -499,7 +507,7 @@ def _llm_segments(
|
|
|
499
507
|
|
|
500
508
|
|
|
501
509
|
def _span_markup_segments(
|
|
502
|
-
*, item_id: str, text: str, config:
|
|
510
|
+
*, item_id: str, text: str, config: MarkovAnalysisConfiguration
|
|
503
511
|
) -> List[MarkovAnalysisSegment]:
|
|
504
512
|
markup_config = config.segmentation.span_markup
|
|
505
513
|
if markup_config is None:
|
|
@@ -561,7 +569,7 @@ def _span_markup_segments(
|
|
|
561
569
|
|
|
562
570
|
|
|
563
571
|
def _verify_end_label(
|
|
564
|
-
*, text: str, config:
|
|
572
|
+
*, text: str, config: MarkovAnalysisConfiguration
|
|
565
573
|
) -> Optional[Dict[str, object]]:
|
|
566
574
|
markup_config = config.segmentation.span_markup
|
|
567
575
|
if markup_config is None or markup_config.end_label_verifier is None:
|
|
@@ -585,7 +593,7 @@ def _apply_start_end_labels(
|
|
|
585
593
|
*,
|
|
586
594
|
item_id: str,
|
|
587
595
|
payloads: Sequence[Dict[str, object]],
|
|
588
|
-
config:
|
|
596
|
+
config: MarkovAnalysisConfiguration,
|
|
589
597
|
) -> List[MarkovAnalysisSegment]:
|
|
590
598
|
markup_config = config.segmentation.span_markup
|
|
591
599
|
if markup_config is None:
|
|
@@ -670,7 +678,7 @@ def _sequence_lengths(segments: Sequence[MarkovAnalysisSegment]) -> List[int]:
|
|
|
670
678
|
|
|
671
679
|
|
|
672
680
|
def _build_observations(
|
|
673
|
-
*, segments: Sequence[MarkovAnalysisSegment], config:
|
|
681
|
+
*, segments: Sequence[MarkovAnalysisSegment], config: MarkovAnalysisConfiguration
|
|
674
682
|
) -> List[MarkovAnalysisObservation]:
|
|
675
683
|
observations: List[MarkovAnalysisObservation] = []
|
|
676
684
|
for segment in segments:
|
|
@@ -765,13 +773,15 @@ def _topic_document_id(*, item_id: str, segment_index: int) -> str:
|
|
|
765
773
|
def _apply_topic_modeling(
|
|
766
774
|
*,
|
|
767
775
|
observations: Sequence[MarkovAnalysisObservation],
|
|
768
|
-
config:
|
|
776
|
+
config: MarkovAnalysisConfiguration,
|
|
769
777
|
) -> Tuple[List[MarkovAnalysisObservation], Optional[TopicModelingReport]]:
|
|
770
778
|
topic_config = config.topic_modeling
|
|
771
779
|
if not topic_config.enabled:
|
|
772
780
|
return list(observations), None
|
|
773
|
-
if topic_config.
|
|
774
|
-
raise ValueError(
|
|
781
|
+
if topic_config.configuration is None:
|
|
782
|
+
raise ValueError(
|
|
783
|
+
"topic_modeling.configuration is required when topic_modeling.enabled is true"
|
|
784
|
+
)
|
|
775
785
|
|
|
776
786
|
documents: List[TopicModelingDocument] = []
|
|
777
787
|
for observation in observations:
|
|
@@ -793,7 +803,7 @@ def _apply_topic_modeling(
|
|
|
793
803
|
|
|
794
804
|
report = run_topic_modeling_for_documents(
|
|
795
805
|
documents=documents,
|
|
796
|
-
config=topic_config.
|
|
806
|
+
config=topic_config.configuration,
|
|
797
807
|
)
|
|
798
808
|
|
|
799
809
|
topic_lookup: Dict[str, Tuple[int, str]] = {}
|
|
@@ -830,7 +840,7 @@ def _apply_topic_modeling(
|
|
|
830
840
|
|
|
831
841
|
|
|
832
842
|
def _encode_observations(
|
|
833
|
-
*, observations: Sequence[MarkovAnalysisObservation], config:
|
|
843
|
+
*, observations: Sequence[MarkovAnalysisObservation], config: MarkovAnalysisConfiguration
|
|
834
844
|
) -> Tuple[object, List[int]]:
|
|
835
845
|
lengths = _sequence_lengths(
|
|
836
846
|
[
|
|
@@ -960,7 +970,7 @@ def _tfidf_encode(
|
|
|
960
970
|
|
|
961
971
|
|
|
962
972
|
def _fit_and_decode(
|
|
963
|
-
*, observations: object, lengths: List[int], config:
|
|
973
|
+
*, observations: object, lengths: List[int], config: MarkovAnalysisConfiguration
|
|
964
974
|
) -> Tuple[List[int], List[MarkovAnalysisTransition], int]:
|
|
965
975
|
def normalize_startprob(values: Sequence[float]) -> List[float]:
|
|
966
976
|
cleaned = [float(value) if math.isfinite(float(value)) else 0.0 for value in values]
|
|
@@ -1135,7 +1145,7 @@ def _build_states(
|
|
|
1135
1145
|
def _state_naming_context_pack(
|
|
1136
1146
|
*,
|
|
1137
1147
|
states: Sequence[MarkovAnalysisState],
|
|
1138
|
-
config:
|
|
1148
|
+
config: MarkovAnalysisConfiguration,
|
|
1139
1149
|
position_stats: Optional[Dict[int, Dict[str, float]]] = None,
|
|
1140
1150
|
) -> Tuple[ContextPack, ContextPackPolicy]:
|
|
1141
1151
|
naming = config.report.state_naming
|
|
@@ -1165,8 +1175,8 @@ def _state_naming_context_pack(
|
|
|
1165
1175
|
text=f"State {state.state_id}:\n{hint_text}",
|
|
1166
1176
|
stage="state-naming",
|
|
1167
1177
|
stage_scores=None,
|
|
1168
|
-
|
|
1169
|
-
|
|
1178
|
+
configuration_id="state-naming",
|
|
1179
|
+
snapshot_id="state-naming",
|
|
1170
1180
|
hash=None,
|
|
1171
1181
|
)
|
|
1172
1182
|
)
|
|
@@ -1184,8 +1194,8 @@ def _state_naming_context_pack(
|
|
|
1184
1194
|
text=text,
|
|
1185
1195
|
stage="state-naming",
|
|
1186
1196
|
stage_scores=None,
|
|
1187
|
-
|
|
1188
|
-
|
|
1197
|
+
configuration_id="state-naming",
|
|
1198
|
+
snapshot_id="state-naming",
|
|
1189
1199
|
hash=None,
|
|
1190
1200
|
)
|
|
1191
1201
|
)
|
|
@@ -1193,9 +1203,9 @@ def _state_naming_context_pack(
|
|
|
1193
1203
|
retrieval_result = RetrievalResult(
|
|
1194
1204
|
query_text="state-naming",
|
|
1195
1205
|
budget=QueryBudget(max_total_items=max(len(evidence), 1)),
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1206
|
+
snapshot_id="state-naming",
|
|
1207
|
+
configuration_id="state-naming",
|
|
1208
|
+
retriever_id="state-naming",
|
|
1199
1209
|
generated_at=utc_now_iso(),
|
|
1200
1210
|
evidence=evidence,
|
|
1201
1211
|
stats={},
|
|
@@ -1279,7 +1289,7 @@ def _assign_state_names(
|
|
|
1279
1289
|
*,
|
|
1280
1290
|
states: Sequence[MarkovAnalysisState],
|
|
1281
1291
|
decoded_paths: Sequence[MarkovAnalysisDecodedPath],
|
|
1282
|
-
config:
|
|
1292
|
+
config: MarkovAnalysisConfiguration,
|
|
1283
1293
|
) -> List[MarkovAnalysisState]:
|
|
1284
1294
|
naming = config.report.state_naming
|
|
1285
1295
|
if naming is None or not naming.enabled:
|