pop-python 1.0.3__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- POP/Embedder.py +121 -119
- POP/__init__.py +34 -16
- POP/api_registry.py +148 -0
- POP/context.py +47 -0
- POP/env_api_keys.py +33 -0
- POP/models.py +20 -0
- POP/prompt_function.py +378 -0
- POP/prompts/__init__.py +8 -0
- POP/prompts/openai-json_schema_generator.md +12 -161
- POP/providers/__init__.py +33 -0
- POP/providers/deepseek_client.py +69 -0
- POP/providers/doubao_client.py +101 -0
- POP/providers/gemini_client.py +119 -0
- POP/providers/llm_client.py +60 -0
- POP/providers/local_client.py +45 -0
- POP/providers/ollama_client.py +129 -0
- POP/providers/openai_client.py +100 -0
- POP/stream.py +77 -0
- POP/utils/__init__.py +9 -0
- POP/utils/event_stream.py +43 -0
- POP/utils/http_proxy.py +16 -0
- POP/utils/json_parse.py +21 -0
- POP/utils/oauth/__init__.py +31 -0
- POP/utils/overflow.py +33 -0
- POP/utils/sanitize_unicode.py +18 -0
- POP/utils/validation.py +23 -0
- POP/utils/web_snapshot.py +108 -0
- {pop_python-1.0.3.dist-info → pop_python-1.1.0.dist-info}/METADATA +160 -57
- pop_python-1.1.0.dist-info/RECORD +42 -0
- {pop_python-1.0.3.dist-info → pop_python-1.1.0.dist-info}/WHEEL +1 -1
- pop_python-1.1.0.dist-info/top_level.txt +2 -0
- tests/__init__.py +0 -0
- tests/conftest.py +47 -0
- tests/test_api_registry.py +36 -0
- tests/test_context_utils.py +54 -0
- tests/test_embedder.py +64 -0
- tests/test_env_api_keys.py +15 -0
- tests/test_prompt_function.py +98 -0
- tests/test_web_snapshot.py +47 -0
- POP/LLMClient.py +0 -403
- POP/POP.py +0 -392
- POP/prompts/2024-11-19-content_finder.md +0 -46
- POP/prompts/2024-11-19-get_content.md +0 -71
- POP/prompts/2024-11-19-get_title_and_url.md +0 -62
- POP/prompts/CLI_AI_helper.md +0 -75
- POP/prompts/content_finder.md +0 -42
- POP/prompts/corpus_splitter.md +0 -28
- POP/prompts/function_code_generator.md +0 -51
- POP/prompts/function_description_generator.md +0 -45
- POP/prompts/get_content.md +0 -75
- POP/prompts/get_title_and_url.md +0 -62
- POP/prompts/openai-function_description_generator.md +0 -126
- POP/prompts/openai-prompt_generator.md +0 -49
- POP/schemas/biomedical_ner_extractor.json +0 -37
- POP/schemas/entity_extraction_per_sentence.json +0 -92
- pop_python-1.0.3.dist-info/RECORD +0 -26
- pop_python-1.0.3.dist-info/top_level.txt +0 -1
- {pop_python-1.0.3.dist-info → pop_python-1.1.0.dist-info}/licenses/LICENSE +0 -0
tests/__init__.py
ADDED
|
File without changes
|
tests/conftest.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import importlib.util
|
|
2
|
+
import os
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from types import SimpleNamespace
|
|
5
|
+
|
|
6
|
+
import pytest
|
|
7
|
+
|
|
8
|
+
from pop.env_api_keys import has_api_key
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _build_fake_response(content: str | None = None, tool_arguments: str | None = None) -> SimpleNamespace:
|
|
12
|
+
tool_calls = None
|
|
13
|
+
if tool_arguments is not None:
|
|
14
|
+
function = SimpleNamespace(arguments=tool_arguments)
|
|
15
|
+
tool_calls = [SimpleNamespace(function=function)]
|
|
16
|
+
message = SimpleNamespace(content=content, tool_calls=tool_calls)
|
|
17
|
+
choice = SimpleNamespace(message=message)
|
|
18
|
+
return SimpleNamespace(choices=[choice])
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@pytest.fixture
|
|
22
|
+
def fake_response():
|
|
23
|
+
return _build_fake_response
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@pytest.fixture(scope="session")
|
|
27
|
+
def tools():
|
|
28
|
+
tools_path = Path(__file__).resolve().parents[1] / "other_doc" / "function_calls.py"
|
|
29
|
+
spec = importlib.util.spec_from_file_location("function_calls", tools_path)
|
|
30
|
+
if spec is None or spec.loader is None:
|
|
31
|
+
raise RuntimeError(f"Failed to load tools module from {tools_path}")
|
|
32
|
+
module = importlib.util.module_from_spec(spec)
|
|
33
|
+
spec.loader.exec_module(module)
|
|
34
|
+
return module.tools
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _is_live_enabled(provider: str | None = None) -> bool:
|
|
38
|
+
if os.getenv("POP_LIVE_TESTS") != "1":
|
|
39
|
+
return False
|
|
40
|
+
if provider:
|
|
41
|
+
return has_api_key(provider)
|
|
42
|
+
return True
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@pytest.fixture(scope="session")
|
|
46
|
+
def live_enabled():
|
|
47
|
+
return _is_live_enabled
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from pop.api_registry import (
|
|
2
|
+
list_providers,
|
|
3
|
+
list_default_model,
|
|
4
|
+
list_models,
|
|
5
|
+
get_client,
|
|
6
|
+
get_model,
|
|
7
|
+
)
|
|
8
|
+
from pop.providers.local_client import LocalPyTorchClient
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def test_list_providers_contains_defaults():
|
|
12
|
+
providers = list_providers()
|
|
13
|
+
assert "openai" in providers
|
|
14
|
+
assert "local" in providers
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def test_list_default_model_contains_mapping():
|
|
18
|
+
defaults = list_default_model()
|
|
19
|
+
assert defaults.get("openai") is not None
|
|
20
|
+
assert defaults.get("local") == "local-llm"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_list_models_includes_known_openai_model():
|
|
24
|
+
models = list_models()
|
|
25
|
+
assert "openai" in models
|
|
26
|
+
assert "gpt-4o" in models["openai"]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def test_get_client_local():
|
|
30
|
+
client = get_client("local")
|
|
31
|
+
assert isinstance(client, LocalPyTorchClient)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def test_get_model_uses_default_mapping():
|
|
35
|
+
client = get_model("local-llm")
|
|
36
|
+
assert isinstance(client, LocalPyTorchClient)
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from pop.context import Context
|
|
4
|
+
from pop.utils.event_stream import to_event_stream
|
|
5
|
+
from pop.utils.json_parse import parse_json, get_value
|
|
6
|
+
from pop.utils.validation import validate_not_empty, validate_json
|
|
7
|
+
from pop.utils.sanitize_unicode import sanitize
|
|
8
|
+
from pop.utils.overflow import truncate_messages
|
|
9
|
+
from pop.utils.http_proxy import get_session_with_proxy
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def test_context_append_and_to_messages():
|
|
13
|
+
ctx = Context(system="System")
|
|
14
|
+
ctx.append("user", "Hi")
|
|
15
|
+
ctx.append("assistant", "Hello")
|
|
16
|
+
assert ctx.to_messages() == [
|
|
17
|
+
{"role": "system", "content": "System"},
|
|
18
|
+
{"role": "user", "content": "Hi"},
|
|
19
|
+
{"role": "assistant", "content": "Hello"},
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_event_stream_iteration():
|
|
24
|
+
def gen():
|
|
25
|
+
yield {"event": "start"}
|
|
26
|
+
yield {"event": "done"}
|
|
27
|
+
|
|
28
|
+
stream = to_event_stream(gen())
|
|
29
|
+
assert list(stream) == [{"event": "start"}, {"event": "done"}]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_json_parse_and_get_value():
|
|
33
|
+
data = parse_json('{"a": 1}')
|
|
34
|
+
assert data["a"] == 1
|
|
35
|
+
assert get_value(data, "missing", default="x") == "x"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def test_validation_helpers():
|
|
39
|
+
with pytest.raises(ValueError):
|
|
40
|
+
validate_not_empty("")
|
|
41
|
+
assert validate_json('{"ok": true}') == {"ok": True}
|
|
42
|
+
with pytest.raises(ValueError):
|
|
43
|
+
validate_json("{bad json}")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def test_sanitize_and_truncate():
|
|
47
|
+
assert sanitize("caf\u00e9") == "cafe"
|
|
48
|
+
msgs = ["a" * 3, "b" * 3, "c" * 3]
|
|
49
|
+
assert truncate_messages(msgs, max_length=6) == ["aaa", "bbb"]
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def test_get_session_with_proxy():
|
|
53
|
+
session = get_session_with_proxy()
|
|
54
|
+
assert session.trust_env is True
|
tests/test_embedder.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
from types import SimpleNamespace
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
import pop.embedder as emb_mod
|
|
7
|
+
from pop.embedder import Embedder
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def test_invalid_use_api_raises():
|
|
11
|
+
with pytest.raises(ValueError):
|
|
12
|
+
Embedder(use_api="bad-api")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def test_openai_embedding_stub(monkeypatch):
|
|
16
|
+
class DummyEmbeddings:
|
|
17
|
+
def create(self, input, model):
|
|
18
|
+
return SimpleNamespace(data=[SimpleNamespace(embedding=[0.1, 0.2]) for _ in input])
|
|
19
|
+
|
|
20
|
+
class DummyClient:
|
|
21
|
+
def __init__(self, api_key=None):
|
|
22
|
+
self.embeddings = DummyEmbeddings()
|
|
23
|
+
|
|
24
|
+
monkeypatch.setattr(emb_mod, "openai", SimpleNamespace(Client=DummyClient))
|
|
25
|
+
embedder = Embedder(use_api="openai", model_name="text-embedding-3-small")
|
|
26
|
+
vecs = embedder.get_embedding(["hello", "world"])
|
|
27
|
+
assert isinstance(vecs, np.ndarray)
|
|
28
|
+
assert vecs.shape == (2, 2)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def test_jina_embedding_stub(monkeypatch):
|
|
32
|
+
class DummyResponse:
|
|
33
|
+
status_code = 200
|
|
34
|
+
|
|
35
|
+
def json(self):
|
|
36
|
+
return {
|
|
37
|
+
"data": [
|
|
38
|
+
{"embedding": [0.1, 0.2, 0.3]},
|
|
39
|
+
{"embedding": [0.4, 0.5, 0.6]},
|
|
40
|
+
]
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
def dummy_post(url, headers=None, json=None):
|
|
44
|
+
return DummyResponse()
|
|
45
|
+
|
|
46
|
+
monkeypatch.setattr(emb_mod.HTTPRequests, "post", dummy_post)
|
|
47
|
+
embedder = Embedder(use_api="jina", model_name="jina-embeddings-v3")
|
|
48
|
+
vecs = embedder.get_embedding(["a", "b"])
|
|
49
|
+
assert vecs.shape == (2, 3)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def test_get_embedding_requires_list(monkeypatch):
|
|
53
|
+
class DummyEmbeddings:
|
|
54
|
+
def create(self, input, model):
|
|
55
|
+
return SimpleNamespace(data=[SimpleNamespace(embedding=[0.1, 0.2]) for _ in input])
|
|
56
|
+
|
|
57
|
+
class DummyClient:
|
|
58
|
+
def __init__(self, api_key=None):
|
|
59
|
+
self.embeddings = DummyEmbeddings()
|
|
60
|
+
|
|
61
|
+
monkeypatch.setattr(emb_mod, "openai", SimpleNamespace(Client=DummyClient))
|
|
62
|
+
embedder = Embedder(use_api="openai", model_name="text-embedding-3-small")
|
|
63
|
+
with pytest.raises(ValueError):
|
|
64
|
+
embedder.get_embedding("not-a-list")
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from pop.env_api_keys import has_api_key
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test_has_api_key_true_when_set(monkeypatch):
|
|
5
|
+
monkeypatch.setenv("OPENAI_API_KEY", "test-key")
|
|
6
|
+
assert has_api_key("openai") is True
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_has_api_key_false_when_unset(monkeypatch):
|
|
10
|
+
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
|
11
|
+
assert has_api_key("openai") is False
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def test_has_api_key_for_keyless_provider():
|
|
15
|
+
assert has_api_key("local") is True
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from types import SimpleNamespace
|
|
5
|
+
|
|
6
|
+
from pop.prompt_function import PromptFunction
|
|
7
|
+
from pop.providers.llm_client import LLMClient
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DummyClient(LLMClient):
|
|
11
|
+
def __init__(self, response):
|
|
12
|
+
self.response = response
|
|
13
|
+
self.model_name = "dummy-model"
|
|
14
|
+
self.last_call = None
|
|
15
|
+
|
|
16
|
+
def chat_completion(self, messages, model, temperature=0.0, **kwargs):
|
|
17
|
+
self.last_call = {
|
|
18
|
+
"messages": messages,
|
|
19
|
+
"model": model,
|
|
20
|
+
"temperature": temperature,
|
|
21
|
+
**kwargs,
|
|
22
|
+
}
|
|
23
|
+
return self.response
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def test_prepare_prompt_replacements(fake_response):
|
|
27
|
+
client = DummyClient(fake_response("ok"))
|
|
28
|
+
pf = PromptFunction(prompt="Hello <<<name>>>", client=client)
|
|
29
|
+
result = pf._prepare_prompt("Extra", name="World", ADD_BEFORE="Before", ADD_AFTER="After")
|
|
30
|
+
assert result == "Before\nHello World\nExtra\nAfter"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def test_prepare_prompt_sys_fallback(fake_response):
|
|
34
|
+
client = DummyClient(fake_response("ok"))
|
|
35
|
+
pf = PromptFunction(sys_prompt="System instructions", prompt="", client=client)
|
|
36
|
+
result = pf._prepare_prompt(foo="bar")
|
|
37
|
+
assert result == "User instruction:\nfoo: bar"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_execute_passes_options_and_defaults(fake_response):
|
|
41
|
+
client = DummyClient(fake_response("ok"))
|
|
42
|
+
pf = PromptFunction(prompt="Hello <<<name>>>", client=client)
|
|
43
|
+
fmt = {"type": "json_schema", "json_schema": {"name": "x", "schema": {"type": "object"}}}
|
|
44
|
+
tools = [{"type": "function", "function": {"name": "ping", "parameters": {"type": "object"}}}]
|
|
45
|
+
images = ["http://example.com/img.png"]
|
|
46
|
+
|
|
47
|
+
pf.execute(name="World", tools=tools, fmt=fmt, images=images)
|
|
48
|
+
|
|
49
|
+
assert client.last_call is not None
|
|
50
|
+
assert client.last_call["tools"] == tools
|
|
51
|
+
assert client.last_call["tool_choice"] == "auto"
|
|
52
|
+
assert client.last_call["response_format"] == fmt
|
|
53
|
+
assert client.last_call["images"] == images
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def test_execute_returns_tool_call_arguments(fake_response):
|
|
57
|
+
tool_args = json.dumps({"description": "walk", "when": "9am"})
|
|
58
|
+
client = DummyClient(fake_response(None, tool_arguments=tool_args))
|
|
59
|
+
pf = PromptFunction(prompt="<<<input>>>", client=client)
|
|
60
|
+
result = pf.execute(input="Remind me to walk at 9am.", tools=[{"type": "function", "function": {"name": "x"}}])
|
|
61
|
+
assert result == tool_args
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def test_improve_prompt_replaces_and_strips_output(monkeypatch, fake_response):
|
|
65
|
+
client = DummyClient(fake_response("unused"))
|
|
66
|
+
pf = PromptFunction(sys_prompt="Base prompt", prompt="", client=client)
|
|
67
|
+
monkeypatch.setattr(pf, "execute", lambda *args, **kwargs: "# OUTPUT\nImproved prompt")
|
|
68
|
+
improved = pf.improve_prompt(replace=True)
|
|
69
|
+
assert improved == "Improved prompt"
|
|
70
|
+
assert pf.sys_prompt == "Improved prompt"
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def test_load_prompt_resolves_relative_path():
|
|
74
|
+
content = PromptFunction.load_prompt("prompts/json_formatter_prompt.md")
|
|
75
|
+
assert "Generate a JSON Schema" in content
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def test_generate_schema_default_prompt_saves(tmp_path, monkeypatch):
|
|
79
|
+
response = SimpleNamespace(
|
|
80
|
+
choices=[SimpleNamespace(message=SimpleNamespace(content=json.dumps({"name": "test", "schema": {"type": "object"}})))]
|
|
81
|
+
)
|
|
82
|
+
client = DummyClient(response)
|
|
83
|
+
pf = PromptFunction(prompt="Return the square of an integer.", client=client)
|
|
84
|
+
monkeypatch.chdir(tmp_path)
|
|
85
|
+
|
|
86
|
+
schema = pf.generate_schema(save=True)
|
|
87
|
+
|
|
88
|
+
assert schema["name"] == "test"
|
|
89
|
+
saved = tmp_path / "schemas" / "test.json"
|
|
90
|
+
assert saved.exists()
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def test_save_writes_prompt(tmp_path, fake_response):
|
|
94
|
+
client = DummyClient(fake_response("ok"))
|
|
95
|
+
pf = PromptFunction(prompt="Hello", client=client)
|
|
96
|
+
target = tmp_path / "prompt.txt"
|
|
97
|
+
pf.save(str(target))
|
|
98
|
+
assert target.read_text(encoding="utf-8") == "Hello"
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import pop.utils.web_snapshot as ws
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test_web_snapshot_headers_and_url(monkeypatch):
|
|
5
|
+
captured = {}
|
|
6
|
+
|
|
7
|
+
class DummyResponse:
|
|
8
|
+
text = "ok"
|
|
9
|
+
|
|
10
|
+
def raise_for_status(self):
|
|
11
|
+
return None
|
|
12
|
+
|
|
13
|
+
def dummy_get(url, headers=None):
|
|
14
|
+
captured["url"] = url
|
|
15
|
+
captured["headers"] = headers or {}
|
|
16
|
+
return DummyResponse()
|
|
17
|
+
|
|
18
|
+
monkeypatch.setattr(ws.requests, "get", dummy_get)
|
|
19
|
+
|
|
20
|
+
ws.get_text_snapshot(
|
|
21
|
+
"https://example.com",
|
|
22
|
+
use_api_key=False,
|
|
23
|
+
target_selector=["main", ".content"],
|
|
24
|
+
wait_for_selector=["#ready"],
|
|
25
|
+
exclude_selector=[".ads"],
|
|
26
|
+
links_at_end=True,
|
|
27
|
+
images_at_end=True,
|
|
28
|
+
image_caption=True,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
assert captured["url"] == "https://r.jina.ai/https://example.com"
|
|
32
|
+
headers = captured["headers"]
|
|
33
|
+
assert headers["X-Target-Selector"] == "main,.content"
|
|
34
|
+
assert headers["X-Wait-For-Selector"] == "#ready"
|
|
35
|
+
assert headers["X-Remove-Selector"] == ".ads"
|
|
36
|
+
assert headers["X-With-Links-Summary"] == "true"
|
|
37
|
+
assert headers["X-With-Images-Summary"] == "true"
|
|
38
|
+
assert headers["X-With-Generated-Alt"] == "true"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_web_snapshot_request_exception(monkeypatch):
|
|
42
|
+
def dummy_get(url, headers=None):
|
|
43
|
+
raise ws.requests.exceptions.RequestException("boom")
|
|
44
|
+
|
|
45
|
+
monkeypatch.setattr(ws.requests, "get", dummy_get)
|
|
46
|
+
result = ws.get_text_snapshot("https://example.com", use_api_key=False)
|
|
47
|
+
assert "Error fetching text snapshot" in result
|