biblicus 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
biblicus/__init__.py CHANGED
@@ -27,4 +27,4 @@ __all__ = [
27
27
  "RetrievalRun",
28
28
  ]
29
29
 
30
- __version__ = "0.7.0"
30
+ __version__ = "0.9.0"
@@ -0,0 +1,40 @@
1
+ """
2
+ Analysis backend registry for Biblicus.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Dict, Type
8
+
9
+ from .base import CorpusAnalysisBackend
10
+ from .topic_modeling import TopicModelingBackend
11
+
12
+
13
+ def available_analysis_backends() -> Dict[str, Type[CorpusAnalysisBackend]]:
14
+ """
15
+ Return the registered analysis backends.
16
+
17
+ :return: Mapping of analysis identifiers to backend classes.
18
+ :rtype: dict[str, Type[CorpusAnalysisBackend]]
19
+ """
20
+ return {
21
+ TopicModelingBackend.analysis_id: TopicModelingBackend,
22
+ }
23
+
24
+
25
+ def get_analysis_backend(analysis_id: str) -> CorpusAnalysisBackend:
26
+ """
27
+ Instantiate an analysis backend by identifier.
28
+
29
+ :param analysis_id: Analysis backend identifier.
30
+ :type analysis_id: str
31
+ :return: Analysis backend instance.
32
+ :rtype: CorpusAnalysisBackend
33
+ :raises KeyError: If the analysis backend identifier is unknown.
34
+ """
35
+ registry = available_analysis_backends()
36
+ backend_class = registry.get(analysis_id)
37
+ if backend_class is None:
38
+ known = ", ".join(sorted(registry))
39
+ raise KeyError(f"Unknown analysis backend '{analysis_id}'. Known backends: {known}")
40
+ return backend_class()
@@ -0,0 +1,49 @@
1
+ """
2
+ Analysis backend interface for Biblicus.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from abc import ABC, abstractmethod
8
+ from typing import Dict
9
+
10
+ from pydantic import BaseModel
11
+
12
+ from ..corpus import Corpus
13
+ from ..models import ExtractionRunReference
14
+
15
+
16
+ class CorpusAnalysisBackend(ABC):
17
+ """
18
+ Abstract interface for analysis backends.
19
+
20
+ :ivar analysis_id: Identifier string for the analysis backend.
21
+ :vartype analysis_id: str
22
+ """
23
+
24
+ analysis_id: str
25
+
26
+ @abstractmethod
27
+ def run_analysis(
28
+ self,
29
+ corpus: Corpus,
30
+ *,
31
+ recipe_name: str,
32
+ config: Dict[str, object],
33
+ extraction_run: ExtractionRunReference,
34
+ ) -> BaseModel:
35
+ """
36
+ Run an analysis pipeline for a corpus.
37
+
38
+ :param corpus: Corpus to analyze.
39
+ :type corpus: Corpus
40
+ :param recipe_name: Human-readable recipe name.
41
+ :type recipe_name: str
42
+ :param config: Analysis configuration values.
43
+ :type config: dict[str, object]
44
+ :param extraction_run: Extraction run reference for text inputs.
45
+ :type extraction_run: biblicus.models.ExtractionRunReference
46
+ :return: Analysis output model.
47
+ :rtype: pydantic.BaseModel
48
+ """
49
+ raise NotImplementedError
@@ -0,0 +1,106 @@
1
+ """
2
+ Lightweight LLM client configuration for analysis pipelines.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from enum import Enum
8
+ from typing import Optional
9
+
10
+ from pydantic import Field, field_validator
11
+
12
+ from ..user_config import resolve_openai_api_key
13
+ from .schema import AnalysisSchemaModel
14
+
15
+
16
+ class LlmProvider(str, Enum):
17
+ """
18
+ Supported LLM providers.
19
+ """
20
+
21
+ OPENAI = "openai"
22
+
23
+
24
+ class LlmClientConfig(AnalysisSchemaModel):
25
+ """
26
+ Configuration for an LLM client invocation.
27
+
28
+ :ivar provider: LLM provider identifier.
29
+ :vartype provider: LlmProvider
30
+ :ivar model: Model identifier for the provider.
31
+ :vartype model: str
32
+ :ivar api_key: Optional API key override.
33
+ :vartype api_key: str or None
34
+ :ivar temperature: Optional generation temperature.
35
+ :vartype temperature: float or None
36
+ :ivar max_tokens: Optional maximum output tokens.
37
+ :vartype max_tokens: int or None
38
+ :ivar max_retries: Optional maximum retry count for transient failures.
39
+ :vartype max_retries: int
40
+ """
41
+
42
+ provider: LlmProvider
43
+ model: str = Field(min_length=1)
44
+ api_key: Optional[str] = None
45
+ temperature: Optional[float] = Field(default=None, ge=0.0)
46
+ max_tokens: Optional[int] = Field(default=None, ge=1)
47
+ max_retries: int = Field(default=0, ge=0)
48
+
49
+ @field_validator("provider", mode="before")
50
+ @classmethod
51
+ def _parse_provider(cls, value: object) -> LlmProvider:
52
+ if isinstance(value, LlmProvider):
53
+ return value
54
+ if isinstance(value, str):
55
+ return LlmProvider(value)
56
+ raise ValueError("llm client provider must be a string or LlmProvider")
57
+
58
+
59
+ def generate_completion(
60
+ *,
61
+ client: LlmClientConfig,
62
+ system_prompt: Optional[str],
63
+ user_prompt: str,
64
+ ) -> str:
65
+ """
66
+ Generate a completion using the configured LLM provider.
67
+
68
+ :param client: LLM client configuration.
69
+ :type client: LlmClientConfig
70
+ :param system_prompt: Optional system prompt content.
71
+ :type system_prompt: str or None
72
+ :param user_prompt: User prompt content.
73
+ :type user_prompt: str
74
+ :return: Generated completion text.
75
+ :rtype: str
76
+ :raises ValueError: If required dependencies or credentials are missing.
77
+ """
78
+ try:
79
+ from openai import OpenAI
80
+ except ImportError as import_error:
81
+ raise ValueError(
82
+ "OpenAI LLM provider requires an optional dependency. "
83
+ 'Install it with pip install "biblicus[openai]".'
84
+ ) from import_error
85
+ api_key = client.api_key or resolve_openai_api_key()
86
+ if api_key is None:
87
+ raise ValueError(
88
+ "OpenAI LLM provider requires an OpenAI API key. "
89
+ "Set OPENAI_API_KEY or configure it in ~/.biblicus/config.yml or ./.biblicus/config.yml under "
90
+ "openai.api_key."
91
+ )
92
+
93
+ messages = []
94
+ if system_prompt:
95
+ messages.append({"role": "system", "content": system_prompt})
96
+ messages.append({"role": "user", "content": user_prompt})
97
+
98
+ client_instance = OpenAI(api_key=api_key)
99
+ response = client_instance.chat.completions.create(
100
+ model=client.model,
101
+ messages=messages,
102
+ temperature=client.temperature,
103
+ max_tokens=client.max_tokens,
104
+ )
105
+ content = response.choices[0].message.content
106
+ return str(content or "")