pop-python 1.0.3__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. POP/Embedder.py +121 -119
  2. POP/__init__.py +34 -16
  3. POP/api_registry.py +148 -0
  4. POP/context.py +47 -0
  5. POP/env_api_keys.py +33 -0
  6. POP/models.py +20 -0
  7. POP/prompt_function.py +378 -0
  8. POP/prompts/__init__.py +8 -0
  9. POP/prompts/openai-json_schema_generator.md +12 -161
  10. POP/providers/__init__.py +33 -0
  11. POP/providers/deepseek_client.py +69 -0
  12. POP/providers/doubao_client.py +101 -0
  13. POP/providers/gemini_client.py +119 -0
  14. POP/providers/llm_client.py +60 -0
  15. POP/providers/local_client.py +45 -0
  16. POP/providers/ollama_client.py +129 -0
  17. POP/providers/openai_client.py +100 -0
  18. POP/stream.py +77 -0
  19. POP/utils/__init__.py +9 -0
  20. POP/utils/event_stream.py +43 -0
  21. POP/utils/http_proxy.py +16 -0
  22. POP/utils/json_parse.py +21 -0
  23. POP/utils/oauth/__init__.py +31 -0
  24. POP/utils/overflow.py +33 -0
  25. POP/utils/sanitize_unicode.py +18 -0
  26. POP/utils/validation.py +23 -0
  27. POP/utils/web_snapshot.py +108 -0
  28. {pop_python-1.0.3.dist-info → pop_python-1.1.0.dist-info}/METADATA +160 -57
  29. pop_python-1.1.0.dist-info/RECORD +42 -0
  30. {pop_python-1.0.3.dist-info → pop_python-1.1.0.dist-info}/WHEEL +1 -1
  31. pop_python-1.1.0.dist-info/top_level.txt +2 -0
  32. tests/__init__.py +0 -0
  33. tests/conftest.py +47 -0
  34. tests/test_api_registry.py +36 -0
  35. tests/test_context_utils.py +54 -0
  36. tests/test_embedder.py +64 -0
  37. tests/test_env_api_keys.py +15 -0
  38. tests/test_prompt_function.py +98 -0
  39. tests/test_web_snapshot.py +47 -0
  40. POP/LLMClient.py +0 -403
  41. POP/POP.py +0 -392
  42. POP/prompts/2024-11-19-content_finder.md +0 -46
  43. POP/prompts/2024-11-19-get_content.md +0 -71
  44. POP/prompts/2024-11-19-get_title_and_url.md +0 -62
  45. POP/prompts/CLI_AI_helper.md +0 -75
  46. POP/prompts/content_finder.md +0 -42
  47. POP/prompts/corpus_splitter.md +0 -28
  48. POP/prompts/function_code_generator.md +0 -51
  49. POP/prompts/function_description_generator.md +0 -45
  50. POP/prompts/get_content.md +0 -75
  51. POP/prompts/get_title_and_url.md +0 -62
  52. POP/prompts/openai-function_description_generator.md +0 -126
  53. POP/prompts/openai-prompt_generator.md +0 -49
  54. POP/schemas/biomedical_ner_extractor.json +0 -37
  55. POP/schemas/entity_extraction_per_sentence.json +0 -92
  56. pop_python-1.0.3.dist-info/RECORD +0 -26
  57. pop_python-1.0.3.dist-info/top_level.txt +0 -1
  58. {pop_python-1.0.3.dist-info → pop_python-1.1.0.dist-info}/licenses/LICENSE +0 -0
POP/Embedder.py CHANGED
@@ -1,44 +1,74 @@
1
- # Embedder.py
1
+ """
2
+ Embedding utilities for POP.
3
+
4
+ This module implements a unified embedding interface capable of
5
+ fetching embeddings via third‑party APIs (JinaAI, OpenAI) or via
6
+ a local PyTorch model. It is largely derived from the original
7
+ POP project’s ``Embedder.py`` and can be used independently of
8
+ ``PromptFunction``.
9
+
10
+ Example usage:
11
+
12
+ >>> from pop.embedder import Embedder
13
+ >>> embedder = Embedder(use_api='openai')
14
+ >>> vectors = embedder.get_embedding(["Hello, world!"])
15
+
16
+ The return value is a numpy array of shape (n_texts, embedding_dim).
17
+ """
18
+
2
19
  import numpy as np
3
20
  import openai
4
- import requests as HTTPRequests ## some packages already have "requests"
21
+ import requests as HTTPRequests
5
22
  from os import getenv
6
23
  from backoff import on_exception, expo
7
-
24
+ from typing import List
8
25
 
9
26
  from transformers import AutoTokenizer, AutoModel
10
27
 
28
+ # Maximum number of tokens permitted by the Jina segmenter
11
29
  MAX_TOKENS = 8194
12
30
 
13
31
  class Embedder:
14
- def __init__(self, model_name=None, use_api=None, to_cuda=False, attn_implementation=None):
15
- """
16
- Initializes the Embedder class, which supports multiple embedding methods, including Jina API,
17
- OpenAI API, and local model embeddings.
18
-
19
- Args:
20
- model_name (str): Name of the model to use for embedding.
21
- use_api (str): Flag to determine whether to use an API for embedding ('jina', 'openai') or a local model (None).
22
- to_cuda (bool): If True, use GPU; otherwise use CPU. (Some model must run on GPU)
23
- attn_implementation (str): Attention implementation method for the transformer model.
24
- """
32
+ """
33
+ A class supporting multiple embedding methods, including Jina API,
34
+ OpenAI API, and local model embeddings via PyTorch.
35
+
36
+ Parameters
37
+ ----------
38
+ model_name:
39
+ Name of the model to use for embedding. If ``None`` the default
40
+ model for the selected API will be chosen.
41
+ use_api:
42
+ Which API to use for embedding. Supported values are
43
+ ``'jina'``, ``'openai'`` and ``None`` (for local embedding).
44
+ to_cuda:
45
+ If ``True``, use GPU; otherwise use CPU for local embeddings.
46
+ attn_implementation:
47
+ Optional attention implementation to pass to the transformer
48
+ when loading the local model.
49
+ """
50
+
51
+ def __init__(self, model_name: str = None, use_api: str = None,
52
+ to_cuda: bool = False, attn_implementation: str = None):
25
53
  self.use_api = use_api
26
54
  self.model_name = model_name
27
55
  self.to_cuda = to_cuda
28
56
 
29
- # API-based embedding initialization
30
- if self.use_api or self.use_api == "":
31
- supported_apis = ["", 'jina', 'openai',]
57
+ # APIbased embedding initialisation
58
+ if self.use_api is not None:
59
+ supported_apis = ['', 'jina', 'openai']
32
60
  if self.use_api not in supported_apis:
33
61
  raise ValueError(f"API type '{self.use_api}' not supported. Supported APIs: {supported_apis}")
34
-
35
- elif self.use_api == "": # default
36
- self.use_api == 'openai'
37
62
 
38
- elif self.use_api == 'jina':
39
- pass # maybe add something later
63
+ if self.use_api == '':
64
+ # empty string falls back to OpenAI
65
+ self.use_api = 'openai'
40
66
 
67
+ if self.use_api == 'jina':
68
+ # The Jina client requires an API key; nothing to initialise
69
+ self.client = None
41
70
  elif self.use_api == 'openai':
71
+ # Initialise OpenAI client
42
72
  self.client = openai.Client(api_key=getenv("OPENAI_API_KEY"))
43
73
  else:
44
74
  # Load PyTorch model for local embedding generation
@@ -47,92 +77,86 @@ class Embedder:
47
77
  self.attn_implementation = attn_implementation
48
78
  self._initialize_local_model()
49
79
 
50
- def _initialize_local_model(self):
51
- import torch # Importing PyTorch only when needed
80
+ def _initialize_local_model(self) -> None:
81
+ """Initialise the PyTorch model and tokenizer for local embedding generation."""
82
+ import torch
52
83
  import torch.nn.functional as F
53
84
 
54
-
55
- """Initializes the PyTorch model and tokenizer for local embedding generation."""
56
85
  if self.attn_implementation:
57
- self.model = AutoModel.from_pretrained(self.model_name,
58
- trust_remote_code=True,
59
- attn_implementation=self.attn_implementation,
60
- torch_dtype=torch.float16).to('cuda' if self.to_cuda else 'cpu')
86
+ self.model = AutoModel.from_pretrained(
87
+ self.model_name,
88
+ trust_remote_code=True,
89
+ attn_implementation=self.attn_implementation,
90
+ torch_dtype=torch.float16,
91
+ ).to('cuda' if self.to_cuda else 'cpu')
61
92
  else:
62
- self.model = AutoModel.from_pretrained(self.model_name,
63
- trust_remote_code=True,
64
- torch_dtype=torch.float16).to('cuda' if self.to_cuda else 'cpu')
93
+ self.model = AutoModel.from_pretrained(
94
+ self.model_name,
95
+ trust_remote_code=True,
96
+ torch_dtype=torch.float16,
97
+ ).to('cuda' if self.to_cuda else 'cpu')
65
98
  self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
66
99
  self.model.eval()
67
100
 
68
- def get_embedding(self, texts: list) -> np.ndarray:
69
- """
70
- Generates embeddings for a list of texts.
71
-
72
- Args:
73
- texts (list of str): A list of texts to be embedded.
74
-
75
- Returns:
76
- np.ndarray: The embeddings as a numpy array of shape (len(texts), embedding_dim).
101
+ def get_embedding(self, texts: List[str]) -> np.ndarray:
77
102
  """
103
+ Generate embeddings for a list of texts.
78
104
 
105
+ Parameters
106
+ ----------
107
+ texts:
108
+ A list of strings to embed.
109
+
110
+ Returns
111
+ -------
112
+ numpy.ndarray
113
+ Embeddings as a 2‑D array of shape (len(texts), embedding_dim).
114
+ """
79
115
  if not isinstance(texts, list):
80
116
  raise ValueError("Input must be a list of strings.")
81
-
82
117
 
83
118
  if self.use_api:
84
119
  if self.use_api == 'jina':
120
+ # set default model if not provided
85
121
  if not self.model_name:
86
122
  self.model_name = "jina-embeddings-v3"
87
- print(f"use default model: {self.model_name}")
88
123
  return self._get_jina_embedding(texts)
89
124
  elif self.use_api == 'openai':
90
- # set the default to be GPT embedding
91
125
  if not self.model_name:
92
126
  self.model_name = "text-embedding-3-small"
93
- print(f"use default model: {self.model_name}")
94
127
  return self._get_openai_embedding(texts)
95
128
  else:
96
129
  raise ValueError(f"API type '{self.use_api}' is not supported.")
97
130
  else:
98
131
  return self._get_torch_embedding(texts)
99
-
100
- ## Below are model-specific functions
101
132
 
102
133
  @on_exception(expo, HTTPRequests.exceptions.RequestException, max_time=30)
103
- def _get_jina_embedding(self, texts: list) -> np.ndarray:
104
- """Fetches embeddings from the Jina API. Requires Jina API key in .env file."""
134
+ def _get_jina_embedding(self, texts: List[str]) -> np.ndarray:
135
+ """Fetch embeddings from the Jina API. Requires Jina API key in .env."""
105
136
  url = 'https://api.jina.ai/v1/embeddings'
106
-
107
137
  headers = {
108
138
  'Content-Type': 'application/json',
109
- 'Authorization': f'Bearer {getenv("JINAAI_API_KEY")}'
139
+ 'Authorization': f"Bearer {getenv('JINAAI_API_KEY')}"
110
140
  }
111
-
112
- input_texts = [text for text in texts]
113
141
  data = {
114
- "model": "jina-embeddings-v3",
142
+ "model": self.model_name or "jina-embeddings-v3",
115
143
  "task": "text-matching",
116
144
  "dimensions": 1024,
117
145
  "late_chunking": False,
118
146
  "embedding_type": "float",
119
- "input": input_texts
147
+ "input": [text for text in texts],
120
148
  }
121
- response = HTTPRequests.post(url, headers=headers, json=data)
122
-
123
- # Process the response
149
+ response = HTTPRequests.post(url, headers=headers, json=data)
124
150
  if response.status_code == 200:
125
- # Extract embeddings from the response and convert them to a single NumPy array
126
151
  embeddings = response.json().get('data', [])
127
- embeddings_np = np.array([embedding_data['embedding'] for embedding_data in embeddings], dtype="f")
152
+ embeddings_np = np.array([e['embedding'] for e in embeddings], dtype='f')
128
153
  return embeddings_np
129
154
  elif response.status_code == 429:
130
155
  raise HTTPRequests.exceptions.RequestException(
131
156
  f"Rate limit exceeded: {response.status_code}, {response.text}"
132
157
  )
133
-
134
- ## When the input is too long, we need to segment the text
135
158
  elif response.status_code == 400:
159
+ # input too long; segment and average
136
160
  ebd = []
137
161
  for text in texts:
138
162
  chunks = self._Jina_segmenter(text, max_token=MAX_TOKENS)
@@ -140,90 +164,68 @@ class Embedder:
140
164
  chunk_embedding = self.get_embedding(chunks)
141
165
  weighted_avg = np.average(chunk_embedding, weights=token_counts, axis=0)
142
166
  ebd.append(weighted_avg)
143
- return np.array(ebd, dtype="f")
144
-
167
+ return np.array(ebd, dtype='f')
145
168
  else:
146
- print(f"Error: {response.status_code}, {response.text}")
147
169
  raise Exception(f"Failed to get embedding from Jina API: {response.status_code}, {response.text}")
148
-
170
+
149
171
  @on_exception(expo, HTTPRequests.exceptions.RequestException, max_time=30)
150
- def _get_openai_embedding(self, texts: list) -> np.ndarray:
151
- """Fetches embeddings from the OpenAI API and returns them as a NumPy array. Requires OpenAI API key in .env file."""
152
- # openai embedding API has a limit on single batch size of 2048 texts, so we may need to batch here
172
+ def _get_openai_embedding(self, texts: List[str]) -> np.ndarray:
173
+ """Fetch embeddings from the OpenAI API and return them as a NumPy array."""
153
174
  batch_size = 2048
154
175
  if len(texts) > batch_size:
155
176
  all_embeddings = []
156
177
  for i in range(0, len(texts), batch_size):
157
- batch_texts = texts[i:i+batch_size]
178
+ batch_texts = texts[i:i + batch_size]
158
179
  batch_embeddings = self._get_openai_embedding(batch_texts)
159
180
  all_embeddings.append(batch_embeddings)
160
181
  return np.vstack(all_embeddings)
161
-
162
- texts = [text.replace("\n", " ") for text in texts] # Clean text input
182
+ texts = [text.replace("\n", " ") for text in texts]
163
183
  response = self.client.embeddings.create(input=texts, model=self.model_name)
164
-
165
- # Extract embeddings from response
166
184
  embeddings = [item.embedding for item in response.data]
185
+ return np.array(embeddings, dtype='f')
167
186
 
168
- # Convert the list of embeddings to a NumPy array with the desired data type
169
- return np.array(embeddings, dtype="f")
187
+ def _get_torch_embedding(self, texts: List[str]) -> np.ndarray:
188
+ """Generate embeddings using a local PyTorch model."""
189
+ import torch
190
+ import torch.nn.functional as F
170
191
 
171
- def _get_torch_embedding(self, texts: list) -> np.ndarray:
172
- """Generates embeddings using a local PyTorch model."""
173
- import torch # Importing PyTorch only when needed
174
192
  @torch.no_grad()
175
- def _encode(self, input_texts):
176
- """
177
- Generates embeddings for a list of texts using a pytorch local model.
178
-
179
- Args:
180
- input_texts (list of str): A list of texts to encode.
181
-
182
- Returns:
183
- np.ndarray: An array of embeddings.
184
- """
185
- batch_dict = self.tokenizer(input_texts, max_length=512, padding=True, truncation=True, return_tensors='pt', return_attention_mask=True).to('cuda' if self.to_cuda else 'cpu')
186
-
187
- outputs = self.model(**batch_dict)
188
- attention_mask = batch_dict["attention_mask"]
193
+ def _encode(instance: 'Embedder', input_texts: List[str]) -> np.ndarray:
194
+ batch_dict = instance.tokenizer(
195
+ input_texts,
196
+ max_length=512,
197
+ padding=True,
198
+ truncation=True,
199
+ return_tensors='pt',
200
+ return_attention_mask=True,
201
+ ).to('cuda' if instance.to_cuda else 'cpu')
202
+ outputs = instance.model(**batch_dict)
203
+ attention_mask = batch_dict['attention_mask']
189
204
  hidden = outputs.last_hidden_state
190
-
191
- reps = _weighted_mean_pooling(hidden, attention_mask)
205
+ def _weighted_mean_pooling(hidden_states, mask):
206
+ # compute weighted mean over tokens
207
+ mask_ = mask * mask.cumsum(dim=1)
208
+ s = (hidden_states * mask_.unsqueeze(-1).float()).sum(dim=1)
209
+ d = mask_.sum(dim=1, keepdim=True).float()
210
+ return s / d
211
+ reps = _weighted_mean_pooling(hidden, attention_mask)
192
212
  embeddings = F.normalize(reps, p=2, dim=1).detach().cpu().numpy()
193
213
  return embeddings
194
-
195
- def _weighted_mean_pooling(hidden: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
196
- """
197
- Computes weighted mean pooling over the hidden states.
198
-
199
- Args:
200
- hidden (torch.Tensor): The hidden states output from the transformer model.
201
- attention_mask (torch.Tensor): The attention mask for the input sequences.
202
-
203
- Returns:
204
- torch.Tensor: The pooled representation of the input.
205
- """
206
- attention_mask_ = attention_mask * attention_mask.cumsum(dim=1)
207
- s = torch.sum(hidden * attention_mask_.unsqueeze(-1).float(), dim=1)
208
- d = attention_mask_.sum(dim=1, keepdim=True).float()
209
- reps = s / d
210
- return reps
211
-
212
214
  return _encode(self, texts)
213
-
215
+
214
216
  @on_exception(expo, HTTPRequests.exceptions.RequestException, max_time=30)
215
- def _Jina_segmenter(self, text: str, max_token: int) -> list[str]:
216
- """Segments text into chunks using Jina API. (free but need API key)"""
217
+ def _Jina_segmenter(self, text: str, max_token: int) -> List[str]:
218
+ """Segments text into chunks using Jina API. (free but needs API key)"""
217
219
  url = 'https://segment.jina.ai/'
218
220
  headers = {
219
221
  'Content-Type': 'application/json',
220
- 'Authorization': f'Bearer {getenv("JINAAI_API_KEY")}'
222
+ 'Authorization': f"Bearer {getenv('JINAAI_API_KEY')}"
221
223
  }
222
224
  data = {
223
225
  "content": text,
224
226
  "return_tokens": True,
225
227
  "return_chunks": True,
226
- "max_chunk_length": max_token
228
+ "max_chunk_length": max_token,
227
229
  }
228
230
  response = HTTPRequests.post(url, headers=headers, json=data)
229
231
  return response.json().get('chunks', [])
POP/__init__.py CHANGED
@@ -1,22 +1,40 @@
1
- from .POP import PromptFunction, get_text_snapshot
2
- from .Embedder import Embedder
3
- from .LLMClient import (
4
- LLMClient,
5
- OpenAIClient,
6
- GeminiClient,
7
- DeepseekClient,
8
- LocalPyTorchClient,
9
- DoubaoClient,
1
+ """Top‑level package for the restructured POP library.
2
+
3
+ This package exposes the main classes and helper functions for creating
4
+ prompt functions, embeddings and conversation contexts. It also
5
+ re‑exports provider registry functions for convenience.
6
+
7
+ Example usage::
8
+
9
+ from pop import PromptFunction, Context, list_providers
10
+
11
+ ctx = Context(system="You are a helpful assistant")
12
+ pf = PromptFunction(sys_prompt="Translate", prompt="<<<text>>>", client="openai")
13
+ result = pf.execute(text="Hello")
14
+ print(result)
15
+ """
16
+
17
+ from .prompt_function import PromptFunction
18
+ from .embedder import Embedder
19
+ from .context import Context, MessageBlock
20
+ from .api_registry import (
21
+ list_providers,
22
+ list_default_model,
23
+ list_models,
24
+ get_default_model,
25
+ get_model,
26
+ get_client,
10
27
  )
11
28
 
12
29
  __all__ = [
13
30
  "PromptFunction",
14
- "get_text_snapshot",
15
31
  "Embedder",
16
- "LLMClient",
17
- "OpenAIClient",
18
- "GeminiClient",
19
- "DeepseekClient",
20
- "LocalPyTorchClient",
21
- "DoubaoClient",
32
+ "Context",
33
+ "MessageBlock",
34
+ "list_providers",
35
+ "list_default_model",
36
+ "list_models",
37
+ "get_default_model",
38
+ "get_model",
39
+ "get_client",
22
40
  ]
POP/api_registry.py ADDED
@@ -0,0 +1,148 @@
1
+ """Provider and model registry.
2
+
3
+ This module offers helper functions to inspect and instantiate LLM
4
+ providers. It borrows the concept of a central registry from the
5
+ ``api‑registry.ts`` file in the pi‑ai project and exposes:
6
+
7
+ * :func:`list_providers` – return a list of provider identifiers.
8
+ * :func:`list_default_model` – return a mapping of provider identifiers
9
+ to their default model names.
10
+ * :func:`_get_default_model` – return the default model for a provider.
11
+ * :func:`list_models` – return a mapping of provider identifiers to
12
+ all available model names from ``providers.json``.
13
+ * :func:`get_model` – instantiate and return a client based on a model name.
14
+ * :func:`get_client` – instantiate and return a client class given
15
+ its provider identifier.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import json
21
+ from os import path
22
+ from typing import Dict, List, Optional
23
+
24
+ from .providers import DEFAULT_CLIENTS
25
+ from .models import DEFAULT_MODEL
26
+
27
+ _PROVIDERS_JSON = path.join(path.dirname(__file__), "providers", "providers.json")
28
+
29
+
30
+ def _load_provider_catalog() -> Dict[str, Dict[str, object]]:
31
+ if not path.exists(_PROVIDERS_JSON):
32
+ return {}
33
+ with open(_PROVIDERS_JSON, "r", encoding="utf-8") as handle:
34
+ data = json.load(handle)
35
+ if not isinstance(data, dict):
36
+ return {}
37
+ return data
38
+
39
+
40
+ def _flatten_models(provider_data: Dict[str, object]) -> List[str]:
41
+ models: List[str] = []
42
+ seen: set[str] = set()
43
+ for value in provider_data.values():
44
+ if isinstance(value, list) and all(isinstance(item, str) for item in value):
45
+ for item in value:
46
+ if item not in seen:
47
+ seen.add(item)
48
+ models.append(item)
49
+ return models
50
+
51
+
52
+ def list_providers() -> List[str]:
53
+ """Return the list of registered provider identifiers."""
54
+ return list(DEFAULT_CLIENTS.keys())
55
+
56
+
57
+ def list_default_model() -> Dict[str, str]:
58
+ """Return a mapping of provider identifiers to default model names."""
59
+ models: Dict[str, str] = {}
60
+ for name, cls in DEFAULT_CLIENTS.items():
61
+ model_name = DEFAULT_MODEL.get(cls.__name__)
62
+ if model_name:
63
+ models[name] = model_name
64
+ return models
65
+
66
+
67
+ def _get_default_model(provider_name: str) -> Optional[str]:
68
+ """Return the default model name for a provider."""
69
+ cls = DEFAULT_CLIENTS.get(provider_name)
70
+ if cls is None:
71
+ return None
72
+ return DEFAULT_MODEL.get(cls.__name__)
73
+
74
+
75
+ def list_models() -> Dict[str, List[str]]:
76
+ """Return a mapping of provider identifiers to available model names."""
77
+ providers = _load_provider_catalog()
78
+ models: Dict[str, List[str]] = {}
79
+ for provider_name, provider_data in providers.items():
80
+ if isinstance(provider_data, dict):
81
+ flattened = _flatten_models(provider_data)
82
+ if flattened:
83
+ models[provider_name] = flattened
84
+ return models
85
+
86
+ def get_default_model(provider_name: str) -> Optional[str]:
87
+ """Return the default model name for a provider."""
88
+ return _get_default_model(provider_name)
89
+
90
+ def get_model(model_name: str) -> Optional[object]:
91
+ """Instantiate and return a client based on a model name.
92
+
93
+ The lookup searches the provider catalog first, then falls back to
94
+ default model names defined in ``DEFAULT_MODEL``.
95
+ """
96
+ # Search providers.json for a matching model name
97
+ providers = _load_provider_catalog()
98
+ for provider_name, provider_data in providers.items():
99
+ if isinstance(provider_data, dict):
100
+ flattened = _flatten_models(provider_data)
101
+ if model_name in flattened:
102
+ return get_client(provider_name, model_name)
103
+ # Fall back to default model mappings
104
+ for provider_name, cls in DEFAULT_CLIENTS.items():
105
+ default_model = DEFAULT_MODEL.get(cls.__name__)
106
+ if default_model == model_name:
107
+ return get_client(provider_name, model_name)
108
+ return None
109
+
110
+ def get_client(provider_name: str, model_name: Optional[str] = None) -> Optional[object]:
111
+ """Instantiate and return an LLM client for the given provider.
112
+
113
+ Parameters
114
+ ----------
115
+ provider_name : str
116
+ The provider identifier (e.g. ``"openai"``).
117
+
118
+ model_name : str
119
+ The model name (available in provider.json).
120
+
121
+ Returns
122
+ -------
123
+ object | None
124
+ An instance of the provider's client class if recognised,
125
+ otherwise ``None``.
126
+ """
127
+ if model_name is None:
128
+ model_name = _get_default_model(provider_name)
129
+ cls = DEFAULT_CLIENTS.get(provider_name)
130
+
131
+ if cls is not None:
132
+ try:
133
+ return cls(model=model_name)
134
+ except TypeError:
135
+ # Some clients (e.g., LocalPyTorchClient) do not accept model args.
136
+ return cls()
137
+
138
+ return None
139
+
140
+
141
+ __all__ = [
142
+ "list_providers",
143
+ "list_default_model",
144
+ "list_models",
145
+ "get_model",
146
+ "get_default_model",
147
+ "get_client",
148
+ ]
POP/context.py ADDED
@@ -0,0 +1,47 @@
1
+ """Conversation context objects.
2
+
3
+ In order to build and track a conversation with an LLM, this module
4
+ defines simple data classes for system messages and user/assistant
5
+ messages. A :class:`Context` holds a list of :class:`MessageBlock`
6
+ objects and optionally a system prompt and a list of tool
7
+ descriptions. It can be converted into the message list expected by
8
+ ``LLMClient.chat_completion``.
9
+ """
10
+
11
+ from dataclasses import dataclass, field
12
+ from typing import List, Optional, Dict, Any
13
+
14
+
15
+ @dataclass
16
+ class MessageBlock:
17
+ """Represents a single message from a user or assistant."""
18
+ role: str
19
+ content: str
20
+
21
+
22
+ @dataclass
23
+ class Context:
24
+ """Collects conversation messages and metadata."""
25
+ system: Optional[str] = None
26
+ messages: List[MessageBlock] = field(default_factory=list)
27
+ tools: Optional[List[Dict[str, Any]]] = None
28
+
29
+ def append(self, role: str, content: str) -> None:
30
+ """Append a message to the context."""
31
+ self.messages.append(MessageBlock(role=role, content=content))
32
+
33
+ def to_messages(self) -> List[Dict[str, Any]]:
34
+ """Convert the context into a list of message dictionaries.
35
+
36
+ The resulting list begins with a system message if one is set,
37
+ followed by all appended messages in order.
38
+ """
39
+ output: List[Dict[str, Any]] = []
40
+ if self.system:
41
+ output.append({"role": "system", "content": self.system})
42
+ for mb in self.messages:
43
+ output.append({"role": mb.role, "content": mb.content})
44
+ return output
45
+
46
+
47
+ __all__ = ["MessageBlock", "Context"]
POP/env_api_keys.py ADDED
@@ -0,0 +1,33 @@
1
+ """Environment key helpers.
2
+
3
+ Providers may require API keys to function. This module defines
4
+ utility functions to inspect whether the necessary keys are present in
5
+ the environment. Consumers can call :func:`has_api_key` to check
6
+ whether a provider is ready for use.
7
+ """
8
+
9
+ import os
10
+ from typing import Optional
11
+ from dotenv import load_dotenv
12
+
13
+ if not load_dotenv():
14
+ print("No .env file found or could not be loaded.")
15
+
16
+ # Mapping from provider identifier to the environment variable used
17
+ REQUIRED_KEYS = {
18
+ "openai": "OPENAI_API_KEY",
19
+ "gemini": "GEMINI_API_KEY",
20
+ "deepseek": "DEEPSEEK_API_KEY",
21
+ "doubao": "DOUBAO_API_KEY",
22
+ # local and ollama do not require API keys by default
23
+ }
24
+
25
+
26
+
27
+ def has_api_key(provider: str) -> bool:
28
+ """Return True if the required API key for *provider* is set."""
29
+ env_var = REQUIRED_KEYS.get(provider)
30
+ return bool(os.getenv(env_var)) if env_var else True
31
+
32
+
33
+ __all__ = ["has_api_key"]
POP/models.py ADDED
@@ -0,0 +1,20 @@
1
+ """Model names and default values.
2
+
3
+ This module centralises the default model names used for each
4
+ provider. It mirrors the ``default_model`` dictionary from the
5
+ original POP implementation. Other modules import this to obtain a
6
+ provider's default model.
7
+ """
8
+
9
+ # Map provider class names to their default model identifiers
10
+ DEFAULT_MODEL = {
11
+ "OpenAIClient": "gpt-5-nano",
12
+ "GeminiClient": "gemini-2.5-flash",
13
+ "DeepseekClient": "deepseek-chat",
14
+ "DoubaoClient": "doubao-seed-1-6-flash-250715",
15
+ "OllamaClient": "mistral:7b",
16
+ # LocalPyTorchClient is a stub; choose an arbitrary default
17
+ "LocalPyTorchClient": "local-llm",
18
+ }
19
+
20
+ __all__ = ["DEFAULT_MODEL"]