pop-python 1.0.3__tar.gz → 1.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. {pop_python-1.0.3/pop_python.egg-info → pop_python-1.1.0}/PKG-INFO +160 -57
  2. pop_python-1.1.0/POP/Embedder.py +231 -0
  3. pop_python-1.1.0/POP/__init__.py +40 -0
  4. pop_python-1.1.0/POP/api_registry.py +148 -0
  5. pop_python-1.1.0/POP/context.py +47 -0
  6. pop_python-1.1.0/POP/env_api_keys.py +33 -0
  7. pop_python-1.1.0/POP/models.py +20 -0
  8. pop_python-1.1.0/POP/prompt_function.py +378 -0
  9. pop_python-1.1.0/POP/prompts/__init__.py +8 -0
  10. pop_python-1.1.0/POP/prompts/openai-json_schema_generator.md +16 -0
  11. pop_python-1.1.0/POP/providers/__init__.py +33 -0
  12. pop_python-1.1.0/POP/providers/deepseek_client.py +69 -0
  13. pop_python-1.1.0/POP/providers/doubao_client.py +101 -0
  14. pop_python-1.1.0/POP/providers/gemini_client.py +119 -0
  15. pop_python-1.1.0/POP/providers/llm_client.py +60 -0
  16. pop_python-1.1.0/POP/providers/local_client.py +45 -0
  17. pop_python-1.1.0/POP/providers/ollama_client.py +129 -0
  18. pop_python-1.1.0/POP/providers/openai_client.py +100 -0
  19. pop_python-1.1.0/POP/stream.py +77 -0
  20. pop_python-1.1.0/POP/utils/__init__.py +9 -0
  21. pop_python-1.1.0/POP/utils/event_stream.py +43 -0
  22. pop_python-1.1.0/POP/utils/http_proxy.py +16 -0
  23. pop_python-1.1.0/POP/utils/json_parse.py +21 -0
  24. pop_python-1.1.0/POP/utils/oauth/__init__.py +31 -0
  25. pop_python-1.1.0/POP/utils/overflow.py +33 -0
  26. pop_python-1.1.0/POP/utils/sanitize_unicode.py +18 -0
  27. pop_python-1.1.0/POP/utils/validation.py +23 -0
  28. pop_python-1.1.0/POP/utils/web_snapshot.py +108 -0
  29. pop_python-1.1.0/README.md +414 -0
  30. {pop_python-1.0.3 → pop_python-1.1.0/pop_python.egg-info}/PKG-INFO +160 -57
  31. pop_python-1.1.0/pop_python.egg-info/SOURCES.txt +47 -0
  32. pop_python-1.1.0/pop_python.egg-info/top_level.txt +2 -0
  33. {pop_python-1.0.3 → pop_python-1.1.0}/setup.py +1 -1
  34. pop_python-1.1.0/tests/__init__.py +0 -0
  35. pop_python-1.1.0/tests/conftest.py +47 -0
  36. pop_python-1.1.0/tests/test_api_registry.py +36 -0
  37. pop_python-1.1.0/tests/test_context_utils.py +54 -0
  38. pop_python-1.1.0/tests/test_embedder.py +64 -0
  39. pop_python-1.1.0/tests/test_env_api_keys.py +15 -0
  40. pop_python-1.1.0/tests/test_prompt_function.py +98 -0
  41. pop_python-1.1.0/tests/test_web_snapshot.py +47 -0
  42. pop_python-1.0.3/POP/Embedder.py +0 -229
  43. pop_python-1.0.3/POP/LLMClient.py +0 -403
  44. pop_python-1.0.3/POP/POP.py +0 -392
  45. pop_python-1.0.3/POP/__init__.py +0 -22
  46. pop_python-1.0.3/POP/prompts/2024-11-19-content_finder.md +0 -46
  47. pop_python-1.0.3/POP/prompts/2024-11-19-get_content.md +0 -71
  48. pop_python-1.0.3/POP/prompts/2024-11-19-get_title_and_url.md +0 -62
  49. pop_python-1.0.3/POP/prompts/CLI_AI_helper.md +0 -75
  50. pop_python-1.0.3/POP/prompts/content_finder.md +0 -42
  51. pop_python-1.0.3/POP/prompts/corpus_splitter.md +0 -28
  52. pop_python-1.0.3/POP/prompts/function_code_generator.md +0 -51
  53. pop_python-1.0.3/POP/prompts/function_description_generator.md +0 -45
  54. pop_python-1.0.3/POP/prompts/get_content.md +0 -75
  55. pop_python-1.0.3/POP/prompts/get_title_and_url.md +0 -62
  56. pop_python-1.0.3/POP/prompts/openai-function_description_generator.md +0 -126
  57. pop_python-1.0.3/POP/prompts/openai-json_schema_generator.md +0 -165
  58. pop_python-1.0.3/POP/prompts/openai-prompt_generator.md +0 -49
  59. pop_python-1.0.3/POP/schemas/biomedical_ner_extractor.json +0 -37
  60. pop_python-1.0.3/POP/schemas/entity_extraction_per_sentence.json +0 -92
  61. pop_python-1.0.3/README.md +0 -311
  62. pop_python-1.0.3/pop_python.egg-info/SOURCES.txt +0 -31
  63. pop_python-1.0.3/pop_python.egg-info/top_level.txt +0 -1
  64. {pop_python-1.0.3 → pop_python-1.1.0}/LICENSE +0 -0
  65. {pop_python-1.0.3 → pop_python-1.1.0}/MANIFEST.in +0 -0
  66. {pop_python-1.0.3 → pop_python-1.1.0}/POP/prompts/fabric-improve_prompt.md +0 -0
  67. {pop_python-1.0.3 → pop_python-1.1.0}/POP/prompts/json_formatter_prompt.md +0 -0
  68. {pop_python-1.0.3 → pop_python-1.1.0}/pop_python.egg-info/dependency_links.txt +0 -0
  69. {pop_python-1.0.3 → pop_python-1.1.0}/pop_python.egg-info/requires.txt +0 -0
  70. {pop_python-1.0.3 → pop_python-1.1.0}/pyproject.toml +0 -0
  71. {pop_python-1.0.3 → pop_python-1.1.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pop-python
3
- Version: 1.0.3
3
+ Version: 1.1.0
4
4
  Summary: Prompt Oriented Programming (POP): reusable, composable prompt functions for LLMs.
5
5
  Home-page: https://github.com/sgt1796/POP
6
6
  Author: Guotai Shen
@@ -35,11 +35,11 @@ Dynamic: summary
35
35
  # Prompt Oriented Programming (POP)
36
36
 
37
37
  ```python
38
- from POP import PromptFunction
38
+ from pop import PromptFunction
39
39
 
40
40
  pf = PromptFunction(
41
41
  prompt="Draw a simple ASCII art of <<<object>>>.",
42
- client = "openai"
42
+ client="openai",
43
43
  )
44
44
 
45
45
  print(pf.execute(object="a cat"))
@@ -61,7 +61,7 @@ print(pf.execute(object="a rocket"))
61
61
  ---
62
62
  Reusable, composable prompt functions for LLM workflows.
63
63
 
64
- This release cleans the architecture, moves all LLM client logic to a separate `LLMClient` module, and extends multi-LLM backend support.
64
+ This 1.1.0 dev update restructures POP into small, focused modules and adds a provider registry inspired by pi-mono's `ai` package.
65
65
 
66
66
  PyPI:
67
67
  [https://pypi.org/project/pop-python/](https://pypi.org/project/pop-python/)
@@ -74,21 +74,19 @@ GitHub:
74
74
  ## Table of Contents
75
75
 
76
76
  1. [Overview](#1-overview)
77
- 2. [Major Updates](#2-major-updates)
78
- 3. [Features](#3-features)
79
- 4. [Installation](#4-installation)
80
- 5. [Setup](#5-setup)
81
- 6. [PromptFunction](#6-promptfunction)
82
-
83
- * Placeholders
84
- * Reserved Keywords
85
- * Executing prompts
86
- * Improving prompts
87
- 7. [Function Schema Generation](#7-function-schema-generation)
88
- 8. [Embeddings](#8-embeddings)
89
- 9. [Web Snapshot Utility](#9-web-snapshot-utility)
90
- 10. [Examples](#10-examples)
91
- 11. [Contributing](#11-contributing)
77
+ 2. [Update Note](#2-update-note)
78
+ 3. [Major Updates](#3-major-updates)
79
+ 4. [Features](#4-features)
80
+ 5. [Installation](#5-installation)
81
+ 6. [Setup](#6-setup)
82
+ 7. [PromptFunction](#7-promptfunction)
83
+ 8. [Provider Registry](#8-provider-registry)
84
+ 9. [Tool Calling](#9-tool-calling)
85
+ 10. [Function Schema Generation](#10-function-schema-generation)
86
+ 11. [Embeddings](#11-embeddings)
87
+ 12. [Web Snapshot Utility](#12-web-snapshot-utility)
88
+ 13. [Examples](#13-examples)
89
+ 14. [Contributing](#14-contributing)
92
90
  ---
93
91
 
94
92
  # 1. Overview
@@ -102,43 +100,64 @@ Instead of scattering prompt strings across your codebase, POP lets you:
102
100
  * improve prompts using meta-prompting
103
101
  * generate OpenAI-compatible function schemas
104
102
  * use unified embedding tools
105
- * work with multiple LLM providers through `LLMClient` subclasses
103
+ * work with multiple LLM providers through a centralized registry
106
104
 
107
105
  POP is designed to be simple, extensible, and production-friendly.
108
106
 
109
107
  ---
110
108
 
111
- # 2. Major Updates
109
+ # 2. Update Note
112
110
 
113
- This version introduces structural and functional improvements:
111
+ **1.1.0-dev (February 5, 2026)**
114
112
 
115
- ### 2.1. LLMClient moved into its own module
113
+ * **Breaking import path**: use `pop` (lowercase) for imports. Example: `from pop import PromptFunction`.
114
+ * **Provider registry**: clients live under `pop/providers/` and are instantiated via `pop.api_registry`.
115
+ * **LLMClient base class**: now in `pop.providers.llm_client` (kept as an abstract base class).
116
116
 
117
- `LLMClient.py` now holds all LLM backends:
117
+ ---
118
+
119
+ # 3. Major Updates
120
+
121
+ ### 3.1. Modularized architecture
122
+
123
+ The project has been decomposed into small, focused modules:
124
+
125
+ * `pop/prompt_function.py`
126
+ * `pop/embedder.py`
127
+ * `pop/context.py`
128
+ * `pop/api_registry.py`
129
+ * `pop/providers/` (one provider per file)
130
+ * `pop/utils/`
131
+
132
+ This mirrors the structure in the pi-mono `ai` package for clarity and maintainability.
118
133
 
119
- * OpenAI
120
- * Gemini
121
- * Deepseek
122
- * Doubao
123
- * Local PyTorch stub
124
- * Extensible architecture for adding new backends
134
+ ### 3.2. Provider registry + per-provider clients
125
135
 
126
- ### 2.2. Expanded multi-LLM support
136
+ Each provider has its own adaptor (OpenAI, Gemini, DeepSeek, Doubao, Local, Ollama). The registry gives you:
127
137
 
128
- Each backend now has consistent interface behavior and multimodal (text + image) support where applicable.
138
+ * `list_providers()`
139
+ * `list_default_model()`
140
+ * `list_models()`
141
+ * `get_client()`
129
142
 
130
143
  ---
131
144
 
132
- # 3. Features
145
+ # 4. Features
133
146
 
134
147
  * **Reusable Prompt Functions**
135
148
  Use `<<<placeholder>>>` syntax to inject dynamic content.
136
149
 
137
150
  * **Multi-LLM Backend**
138
- Choose between OpenAI, Gemini, Deepseek, Doubao, or local models.
151
+ Choose between OpenAI, Gemini, DeepSeek, Doubao, Local, or Ollama.
152
+
153
+ * **Tool Calling**
154
+ Pass a tool schema list to `execute()` and receive tool-call arguments.
155
+
156
+ * **Multimodal (Text + Image)**
157
+ Pass `images=[...]` (URLs or base64) when the provider supports it.
139
158
 
140
159
  * **Prompt Improvement**
141
- Improve or rewrite prompts using Fabric-style metaprompts.
160
+ Improve or rewrite prompts using Fabric-style meta-prompts.
142
161
 
143
162
  * **Function Schema Generation**
144
163
  Convert natural language descriptions into OpenAI-function schemas.
@@ -151,7 +170,7 @@ Each backend now has consistent interface behavior and multimodal (text + image)
151
170
 
152
171
  ---
153
172
 
154
- # 4. Installation
173
+ # 5. Installation
155
174
 
156
175
  Install from PyPI:
157
176
 
@@ -169,7 +188,7 @@ pip install -e .
169
188
 
170
189
  ---
171
190
 
172
- # 5. Setup
191
+ # 6. Setup
173
192
 
174
193
  Create a `.env` file in your project root:
175
194
 
@@ -185,16 +204,16 @@ All clients automatically read keys from environment variables.
185
204
 
186
205
  ---
187
206
 
188
- # 6. PromptFunction
207
+ # 7. PromptFunction
189
208
 
190
209
  The core abstraction of POP is the `PromptFunction` class.
191
210
 
192
211
  ```python
193
- from POP import PromptFunction
212
+ from pop import PromptFunction
194
213
 
195
214
  pf = PromptFunction(
196
215
  sys_prompt="You are a helpful AI.",
197
- prompt="Give me a summary about <<<topic>>>."
216
+ prompt="Give me a summary about <<<topic>>>.",
198
217
  )
199
218
 
200
219
  print(pf.execute(topic="quantum biology"))
@@ -202,7 +221,7 @@ print(pf.execute(topic="quantum biology"))
202
221
 
203
222
  ---
204
223
 
205
- ## 6.1. Placeholder Syntax
224
+ ## 7.1. Placeholder Syntax
206
225
 
207
226
  Use angle-triple-brackets inside your prompt:
208
227
 
@@ -220,7 +239,7 @@ prompt = "Translate <<<sentence>>> to French."
220
239
 
221
240
  ---
222
241
 
223
- ## 6.2. Reserved Keywords
242
+ ## 7.2. Reserved Keywords
224
243
 
225
244
  Within `.execute()`, the following keyword arguments are **reserved** and should not be used as placeholder names:
226
245
 
@@ -228,6 +247,7 @@ Within `.execute()`, the following keyword arguments are **reserved** and should
228
247
  * `sys`
229
248
  * `fmt`
230
249
  * `tools`
250
+ * `tool_choice`
231
251
  * `temp`
232
252
  * `images`
233
253
  * `ADD_BEFORE`
@@ -237,32 +257,104 @@ Most keywords are used for parameters. `ADD_BEFORE` and `ADD_AFTER` will attach
237
257
 
238
258
  ---
239
259
 
240
- ## 6.3. Executing prompts
260
+ ## 7.3. Executing prompts
241
261
 
242
262
  ```python
243
263
  result = pf.execute(
244
264
  topic="photosynthesis",
245
- model="gpt-4o-mini",
246
- temp=0.3
265
+ model="gpt-5-mini",
266
+ temp=0.3,
247
267
  )
248
268
  ```
249
269
 
250
270
  ---
251
271
 
252
- ## 6.4. Improving Prompts
272
+ ## 7.4. Improving Prompts
253
273
 
254
274
  You can ask POP to rewrite or enhance your system prompt:
255
275
 
256
276
  ```python
257
- better = pf._improve_prompt()
277
+ better = pf.improve_prompt()
258
278
  print(better)
259
279
  ```
260
280
 
261
- This uses a Fabric-inspired meta-prompt bundled in the `prompts/` directory.
281
+ This uses a Fabric-inspired meta-prompt bundled in the `pop/prompts/` directory.
282
+
283
+ ---
284
+
285
+ # 8. Provider Registry
286
+
287
+ Use the registry to list providers/models or instantiate clients.
288
+
289
+ ```python
290
+ from pop import list_providers, list_models, list_default_model, get_client
291
+
292
+ print(list_providers())
293
+ print(list_default_model())
294
+ print(list_models())
295
+
296
+ client = get_client("openai")
297
+ ```
298
+
299
+ Non-default model example:
300
+
301
+ ```python
302
+ from pop import PromptFunction, get_client
303
+
304
+ client = get_client("gemini", "gemini-2.5-pro")
305
+
306
+ pf = PromptFunction(prompt="Draw a rocket.", client=client)
307
+ print(pf.execute())
308
+ ```
309
+
310
+ Direct provider class example:
311
+
312
+ ```python
313
+ from pop import PromptFunction
314
+ from pop.providers.gemini_client import GeminiClient
315
+
316
+ pf = PromptFunction(prompt="Draw a rocket.", client=GeminiClient(model="gemini-2.5-pro"))
317
+ print(pf.execute())
318
+ ```
319
+
320
+ ---
321
+
322
+ # 9. Tool Calling
323
+
324
+ ```python
325
+ from pop import PromptFunction
326
+
327
+ tools = [
328
+ {
329
+ "type": "function",
330
+ "function": {
331
+ "name": "create_reminder",
332
+ "description": "Create a reminder.",
333
+ "parameters": {
334
+ "type": "object",
335
+ "properties": {
336
+ "description": {"type": "string"},
337
+ "when": {"type": "string"},
338
+ },
339
+ "required": ["description"],
340
+ },
341
+ },
342
+ }
343
+ ]
344
+
345
+ pf = PromptFunction(
346
+ sys_prompt="You are a helpful assistant.",
347
+ prompt="<<<input>>>",
348
+ client="openai",
349
+ )
350
+
351
+ result = pf.execute(input="Remind me to walk at 9am.", tools=tools)
352
+ print(result)
353
+ ```
262
354
 
263
355
  ---
264
356
 
265
- # 7. Function Schema Generation
357
+ # 10. Function Schema Generation
266
358
 
267
359
  POP supports generating **OpenAI function-calling schemas** from natural language descriptions.
268
360
 
@@ -279,16 +371,16 @@ What this does:
279
371
  * Applies a standard meta-prompt
280
372
  * Uses the selected LLM backend
281
373
  * Produces a valid JSON Schema for OpenAI function calling
282
- * Optionally saves it under `functions/`
374
+ * Optionally saves it under `schemas/`
283
375
 
284
376
  ---
285
377
 
286
- # 8. Embeddings
378
+ # 11. Embeddings
287
379
 
288
380
  POP includes a unified embedding interface:
289
381
 
290
382
  ```python
291
- from POP.Embedder import Embedder
383
+ from pop import Embedder
292
384
 
293
385
  embedder = Embedder(use_api="openai")
294
386
  vecs = embedder.get_embedding(["hello world"])
@@ -304,10 +396,10 @@ Large inputs are chunked automatically when needed.
304
396
 
305
397
  ---
306
398
 
307
- # 9. Web Snapshot Utility
399
+ # 12. Web Snapshot Utility
308
400
 
309
401
  ```python
310
- from POP import get_text_snapshot
402
+ from pop.utils.web_snapshot import get_text_snapshot
311
403
 
312
404
  text = get_text_snapshot("https://example.com", image_caption=True)
313
405
  print(text[:500])
@@ -322,10 +414,10 @@ Supports:
322
414
 
323
415
  ---
324
416
 
325
- # 10. Examples
417
+ # 13. Examples
326
418
 
327
419
  ```python
328
- from POP import PromptFunction
420
+ from pop import PromptFunction
329
421
 
330
422
  pf = PromptFunction(prompt="Give me 3 creative names for a <<<thing>>>.")
331
423
 
@@ -333,9 +425,20 @@ print(pf.execute(thing="robot"))
333
425
  print(pf.execute(thing="new language"))
334
426
  ```
335
427
 
428
+ Multimodal example (provider must support images):
429
+
430
+ ```python
431
+ from pop import PromptFunction
432
+
433
+ image_b64 = "..." # base64-encoded image
434
+
435
+ pf = PromptFunction(prompt="Describe the image.", client="openai")
436
+ print(pf.execute(images=[image_b64]))
437
+ ```
438
+
336
439
  ---
337
440
 
338
- # 11. Contributing
441
+ # 14. Contributing
339
442
 
340
443
  Steps:
341
444
 
@@ -0,0 +1,231 @@
1
+ """
2
+ Embedding utilities for POP.
3
+
4
+ This module implements a unified embedding interface capable of
5
+ fetching embeddings via third‑party APIs (JinaAI, OpenAI) or via
6
+ a local PyTorch model. It is largely derived from the original
7
+ POP project’s ``Embedder.py`` and can be used independently of
8
+ ``PromptFunction``.
9
+
10
+ Example usage:
11
+
12
+ >>> from pop.embedder import Embedder
13
+ >>> embedder = Embedder(use_api='openai')
14
+ >>> vectors = embedder.get_embedding(["Hello, world!"])
15
+
16
+ The return value is a numpy array of shape (n_texts, embedding_dim).
17
+ """
18
+
19
+ import numpy as np
20
+ import openai
21
+ import requests as HTTPRequests
22
+ from os import getenv
23
+ from backoff import on_exception, expo
24
+ from typing import List
25
+
26
+ from transformers import AutoTokenizer, AutoModel
27
+
28
+ # Maximum number of tokens permitted by the Jina segmenter
29
+ MAX_TOKENS = 8194
30
+
31
+ class Embedder:
32
+ """
33
+ A class supporting multiple embedding methods, including Jina API,
34
+ OpenAI API, and local model embeddings via PyTorch.
35
+
36
+ Parameters
37
+ ----------
38
+ model_name:
39
+ Name of the model to use for embedding. If ``None`` the default
40
+ model for the selected API will be chosen.
41
+ use_api:
42
+ Which API to use for embedding. Supported values are
43
+ ``'jina'``, ``'openai'`` and ``None`` (for local embedding).
44
+ to_cuda:
45
+ If ``True``, use GPU; otherwise use CPU for local embeddings.
46
+ attn_implementation:
47
+ Optional attention implementation to pass to the transformer
48
+ when loading the local model.
49
+ """
50
+
51
+ def __init__(self, model_name: str = None, use_api: str = None,
52
+ to_cuda: bool = False, attn_implementation: str = None):
53
+ self.use_api = use_api
54
+ self.model_name = model_name
55
+ self.to_cuda = to_cuda
56
+
57
+ # API‑based embedding initialisation
58
+ if self.use_api is not None:
59
+ supported_apis = ['', 'jina', 'openai']
60
+ if self.use_api not in supported_apis:
61
+ raise ValueError(f"API type '{self.use_api}' not supported. Supported APIs: {supported_apis}")
62
+
63
+ if self.use_api == '':
64
+ # empty string falls back to OpenAI
65
+ self.use_api = 'openai'
66
+
67
+ if self.use_api == 'jina':
68
+ # The Jina client requires an API key; nothing to initialise
69
+ self.client = None
70
+ elif self.use_api == 'openai':
71
+ # Initialise OpenAI client
72
+ self.client = openai.Client(api_key=getenv("OPENAI_API_KEY"))
73
+ else:
74
+ # Load PyTorch model for local embedding generation
75
+ if not model_name:
76
+ raise ValueError("Model name must be provided when using a local model.")
77
+ self.attn_implementation = attn_implementation
78
+ self._initialize_local_model()
79
+
80
+ def _initialize_local_model(self) -> None:
81
+ """Initialise the PyTorch model and tokenizer for local embedding generation."""
82
+ import torch
83
+ import torch.nn.functional as F
84
+
85
+ if self.attn_implementation:
86
+ self.model = AutoModel.from_pretrained(
87
+ self.model_name,
88
+ trust_remote_code=True,
89
+ attn_implementation=self.attn_implementation,
90
+ torch_dtype=torch.float16,
91
+ ).to('cuda' if self.to_cuda else 'cpu')
92
+ else:
93
+ self.model = AutoModel.from_pretrained(
94
+ self.model_name,
95
+ trust_remote_code=True,
96
+ torch_dtype=torch.float16,
97
+ ).to('cuda' if self.to_cuda else 'cpu')
98
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
99
+ self.model.eval()
100
+
101
+ def get_embedding(self, texts: List[str]) -> np.ndarray:
102
+ """
103
+ Generate embeddings for a list of texts.
104
+
105
+ Parameters
106
+ ----------
107
+ texts:
108
+ A list of strings to embed.
109
+
110
+ Returns
111
+ -------
112
+ numpy.ndarray
113
+ Embeddings as a 2‑D array of shape (len(texts), embedding_dim).
114
+ """
115
+ if not isinstance(texts, list):
116
+ raise ValueError("Input must be a list of strings.")
117
+
118
+ if self.use_api:
119
+ if self.use_api == 'jina':
120
+ # set default model if not provided
121
+ if not self.model_name:
122
+ self.model_name = "jina-embeddings-v3"
123
+ return self._get_jina_embedding(texts)
124
+ elif self.use_api == 'openai':
125
+ if not self.model_name:
126
+ self.model_name = "text-embedding-3-small"
127
+ return self._get_openai_embedding(texts)
128
+ else:
129
+ raise ValueError(f"API type '{self.use_api}' is not supported.")
130
+ else:
131
+ return self._get_torch_embedding(texts)
132
+
133
+ @on_exception(expo, HTTPRequests.exceptions.RequestException, max_time=30)
134
+ def _get_jina_embedding(self, texts: List[str]) -> np.ndarray:
135
+ """Fetch embeddings from the Jina API. Requires Jina API key in .env."""
136
+ url = 'https://api.jina.ai/v1/embeddings'
137
+ headers = {
138
+ 'Content-Type': 'application/json',
139
+ 'Authorization': f"Bearer {getenv('JINAAI_API_KEY')}"
140
+ }
141
+ data = {
142
+ "model": self.model_name or "jina-embeddings-v3",
143
+ "task": "text-matching",
144
+ "dimensions": 1024,
145
+ "late_chunking": False,
146
+ "embedding_type": "float",
147
+ "input": [text for text in texts],
148
+ }
149
+ response = HTTPRequests.post(url, headers=headers, json=data)
150
+ if response.status_code == 200:
151
+ embeddings = response.json().get('data', [])
152
+ embeddings_np = np.array([e['embedding'] for e in embeddings], dtype='f')
153
+ return embeddings_np
154
+ elif response.status_code == 429:
155
+ raise HTTPRequests.exceptions.RequestException(
156
+ f"Rate limit exceeded: {response.status_code}, {response.text}"
157
+ )
158
+ elif response.status_code == 400:
159
+ # input too long; segment and average
160
+ ebd = []
161
+ for text in texts:
162
+ chunks = self._Jina_segmenter(text, max_token=MAX_TOKENS)
163
+ token_counts = [len(chunk) for chunk in chunks]
164
+ chunk_embedding = self.get_embedding(chunks)
165
+ weighted_avg = np.average(chunk_embedding, weights=token_counts, axis=0)
166
+ ebd.append(weighted_avg)
167
+ return np.array(ebd, dtype='f')
168
+ else:
169
+ raise Exception(f"Failed to get embedding from Jina API: {response.status_code}, {response.text}")
170
+
171
+ @on_exception(expo, HTTPRequests.exceptions.RequestException, max_time=30)
172
+ def _get_openai_embedding(self, texts: List[str]) -> np.ndarray:
173
+ """Fetch embeddings from the OpenAI API and return them as a NumPy array."""
174
+ batch_size = 2048
175
+ if len(texts) > batch_size:
176
+ all_embeddings = []
177
+ for i in range(0, len(texts), batch_size):
178
+ batch_texts = texts[i:i + batch_size]
179
+ batch_embeddings = self._get_openai_embedding(batch_texts)
180
+ all_embeddings.append(batch_embeddings)
181
+ return np.vstack(all_embeddings)
182
+ texts = [text.replace("\n", " ") for text in texts]
183
+ response = self.client.embeddings.create(input=texts, model=self.model_name)
184
+ embeddings = [item.embedding for item in response.data]
185
+ return np.array(embeddings, dtype='f')
186
+
187
+ def _get_torch_embedding(self, texts: List[str]) -> np.ndarray:
188
+ """Generate embeddings using a local PyTorch model."""
189
+ import torch
190
+ import torch.nn.functional as F
191
+
192
+ @torch.no_grad()
193
+ def _encode(instance: 'Embedder', input_texts: List[str]) -> np.ndarray:
194
+ batch_dict = instance.tokenizer(
195
+ input_texts,
196
+ max_length=512,
197
+ padding=True,
198
+ truncation=True,
199
+ return_tensors='pt',
200
+ return_attention_mask=True,
201
+ ).to('cuda' if instance.to_cuda else 'cpu')
202
+ outputs = instance.model(**batch_dict)
203
+ attention_mask = batch_dict['attention_mask']
204
+ hidden = outputs.last_hidden_state
205
+ def _weighted_mean_pooling(hidden_states, mask):
206
+ # compute weighted mean over tokens
207
+ mask_ = mask * mask.cumsum(dim=1)
208
+ s = (hidden_states * mask_.unsqueeze(-1).float()).sum(dim=1)
209
+ d = mask_.sum(dim=1, keepdim=True).float()
210
+ return s / d
211
+ reps = _weighted_mean_pooling(hidden, attention_mask)
212
+ embeddings = F.normalize(reps, p=2, dim=1).detach().cpu().numpy()
213
+ return embeddings
214
+ return _encode(self, texts)
215
+
216
+ @on_exception(expo, HTTPRequests.exceptions.RequestException, max_time=30)
217
+ def _Jina_segmenter(self, text: str, max_token: int) -> List[str]:
218
+ """Segments text into chunks using Jina API. (free but needs API key)"""
219
+ url = 'https://segment.jina.ai/'
220
+ headers = {
221
+ 'Content-Type': 'application/json',
222
+ 'Authorization': f"Bearer {getenv('JINAAI_API_KEY')}"
223
+ }
224
+ data = {
225
+ "content": text,
226
+ "return_tokens": True,
227
+ "return_chunks": True,
228
+ "max_chunk_length": max_token,
229
+ }
230
+ response = HTTPRequests.post(url, headers=headers, json=data)
231
+ return response.json().get('chunks', [])
@@ -0,0 +1,40 @@
1
+ """Top‑level package for the restructured POP library.
2
+
3
+ This package exposes the main classes and helper functions for creating
4
+ prompt functions, embeddings and conversation contexts. It also
5
+ re‑exports provider registry functions for convenience.
6
+
7
+ Example usage::
8
+
9
+ from pop import PromptFunction, Context, list_providers
10
+
11
+ ctx = Context(system="You are a helpful assistant")
12
+ pf = PromptFunction(sys_prompt="Translate", prompt="<<<text>>>", client="openai")
13
+ result = pf.execute(text="Hello")
14
+ print(result)
15
+ """
16
+
17
+ from .prompt_function import PromptFunction
18
+ from .embedder import Embedder
19
+ from .context import Context, MessageBlock
20
+ from .api_registry import (
21
+ list_providers,
22
+ list_default_model,
23
+ list_models,
24
+ get_default_model,
25
+ get_model,
26
+ get_client,
27
+ )
28
+
29
+ __all__ = [
30
+ "PromptFunction",
31
+ "Embedder",
32
+ "Context",
33
+ "MessageBlock",
34
+ "list_providers",
35
+ "list_default_model",
36
+ "list_models",
37
+ "get_default_model",
38
+ "get_model",
39
+ "get_client",
40
+ ]