brokit 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- brokit-0.1.0/LICENSE +21 -0
- brokit-0.1.0/PKG-INFO +54 -0
- brokit-0.1.0/README.md +45 -0
- brokit-0.1.0/brokit/__init__.py +0 -0
- brokit-0.1.0/brokit/primitives/__init__.py +0 -0
- brokit-0.1.0/brokit/primitives/formatter.py +166 -0
- brokit-0.1.0/brokit/primitives/lm.py +115 -0
- brokit-0.1.0/brokit/primitives/predictor.py +109 -0
- brokit-0.1.0/brokit/primitives/prompt.py +158 -0
- brokit-0.1.0/brokit/primitives/shot.py +48 -0
- brokit-0.1.0/brokit.egg-info/PKG-INFO +54 -0
- brokit-0.1.0/brokit.egg-info/SOURCES.txt +14 -0
- brokit-0.1.0/brokit.egg-info/dependency_links.txt +1 -0
- brokit-0.1.0/brokit.egg-info/top_level.txt +1 -0
- brokit-0.1.0/pyproject.toml +14 -0
- brokit-0.1.0/setup.cfg +4 -0
brokit-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 datanooblol
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
brokit-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: brokit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: brokit, think it like a playing lego
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Dynamic: license-file
|
|
9
|
+
|
|
10
|
+
# brokit
|
|
11
|
+
|
|
12
|
+
Inspired by big bro DSPy, brokit is a minimal Python toolkit of composable, LEGO-like primitives for working with language models. Build what you need, skip the bloat.
|
|
13
|
+
|
|
14
|
+
## What's This About?
|
|
15
|
+
|
|
16
|
+
A lightweight library for working with LMs across any use case. Just the essential building blocks, nothing more.
|
|
17
|
+
|
|
18
|
+
## Core Concepts
|
|
19
|
+
|
|
20
|
+
Coming from DSPy? You already know what's up:
|
|
21
|
+
|
|
22
|
+
- **Prompt** = `dspy.Signature` — Define your input/output structure
|
|
23
|
+
- **Predictor** = `dspy.Predict` — Execute prompts with your LM
|
|
24
|
+
- **LM** = `dspy.LM` — Language model interface
|
|
25
|
+
- **Shot** — Few-shot examples made simple
|
|
26
|
+
|
|
27
|
+
## Design Philosophy
|
|
28
|
+
|
|
29
|
+
### Plug and Play
|
|
30
|
+
|
|
31
|
+
Everything's a base class. Compose, extend, swap out whatever you want. The LM module? Bring your own.
|
|
32
|
+
|
|
33
|
+
### Pure Python
|
|
34
|
+
|
|
35
|
+
Zero required dependencies. Want to use `requests`, `httpx`, or `boto3`? Go for it. Check the notebooks for integration examples.
|
|
36
|
+
|
|
37
|
+
## Features
|
|
38
|
+
|
|
39
|
+
- Text and image support (more formats coming)
|
|
40
|
+
- Few-shot learning with Shot
|
|
41
|
+
- Build custom LM implementations
|
|
42
|
+
- Structured prompts with type hints
|
|
43
|
+
|
|
44
|
+
## Getting Started
|
|
45
|
+
|
|
46
|
+
Peep the notebooks:
|
|
47
|
+
- Custom Prompt signatures
|
|
48
|
+
- Your own LM implementations
|
|
49
|
+
- Few-shot examples
|
|
50
|
+
- External library integrations
|
|
51
|
+
|
|
52
|
+
## What's Next?
|
|
53
|
+
|
|
54
|
+
Check out [ROADMAP.md](ROADMAP.md) for what's coming and [VERSIONS.md](VERSIONS.md) for release notes.
|
brokit-0.1.0/README.md
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# brokit
|
|
2
|
+
|
|
3
|
+
Inspired by big bro DSPy, brokit is a minimal Python toolkit of composable, LEGO-like primitives for working with language models. Build what you need, skip the bloat.
|
|
4
|
+
|
|
5
|
+
## What's This About?
|
|
6
|
+
|
|
7
|
+
A lightweight library for working with LMs across any use case. Just the essential building blocks, nothing more.
|
|
8
|
+
|
|
9
|
+
## Core Concepts
|
|
10
|
+
|
|
11
|
+
Coming from DSPy? You already know what's up:
|
|
12
|
+
|
|
13
|
+
- **Prompt** = `dspy.Signature` — Define your input/output structure
|
|
14
|
+
- **Predictor** = `dspy.Predict` — Execute prompts with your LM
|
|
15
|
+
- **LM** = `dspy.LM` — Language model interface
|
|
16
|
+
- **Shot** — Few-shot examples made simple
|
|
17
|
+
|
|
18
|
+
## Design Philosophy
|
|
19
|
+
|
|
20
|
+
### Plug and Play
|
|
21
|
+
|
|
22
|
+
Everything's a base class. Compose, extend, swap out whatever you want. The LM module? Bring your own.
|
|
23
|
+
|
|
24
|
+
### Pure Python
|
|
25
|
+
|
|
26
|
+
Zero required dependencies. Want to use `requests`, `httpx`, or `boto3`? Go for it. Check the notebooks for integration examples.
|
|
27
|
+
|
|
28
|
+
## Features
|
|
29
|
+
|
|
30
|
+
- Text and image support (more formats coming)
|
|
31
|
+
- Few-shot learning with Shot
|
|
32
|
+
- Build custom LM implementations
|
|
33
|
+
- Structured prompts with type hints
|
|
34
|
+
|
|
35
|
+
## Getting Started
|
|
36
|
+
|
|
37
|
+
Peep the notebooks:
|
|
38
|
+
- Custom Prompt signatures
|
|
39
|
+
- Your own LM implementations
|
|
40
|
+
- Few-shot examples
|
|
41
|
+
- External library integrations
|
|
42
|
+
|
|
43
|
+
## What's Next?
|
|
44
|
+
|
|
45
|
+
Check out [ROADMAP.md](ROADMAP.md) for what's coming and [VERSIONS.md](VERSIONS.md) for release notes.
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
from brokit.primitives.prompt import Prompt
|
|
2
|
+
from brokit.primitives.shot import Shot
|
|
3
|
+
from brokit.primitives.lm import Message
|
|
4
|
+
from typing import List, Optional
|
|
5
|
+
|
|
6
|
+
class PromptFormatter:
|
|
7
|
+
def __call__(
|
|
8
|
+
self,
|
|
9
|
+
input_fields:dict,
|
|
10
|
+
output_fields:dict,
|
|
11
|
+
instructions:str,
|
|
12
|
+
inputs:dict,
|
|
13
|
+
shots:Optional[List[Shot]]=None,
|
|
14
|
+
special_token:str="<||{field}||>"
|
|
15
|
+
):
|
|
16
|
+
system_prompt = []
|
|
17
|
+
input_prompt = []
|
|
18
|
+
shot_prompt = []
|
|
19
|
+
self.format_system_in_out(system_prompt, input_fields, output_fields)
|
|
20
|
+
self.format_system_structure(system_prompt, input_fields, output_fields, special_token)
|
|
21
|
+
self.format_system_instruction(system_prompt, instructions)
|
|
22
|
+
self.format_shot_prompt(shot_prompt, input_fields, output_fields, shots, special_token)
|
|
23
|
+
self.format_input_prompt(input_prompt, input_fields, output_fields, inputs, special_token)
|
|
24
|
+
return system_prompt, shot_prompt, input_prompt
|
|
25
|
+
|
|
26
|
+
def format_shot_prompt(self, shot_prompt: list, input_fields: dict, output_fields: dict, shots: Optional[List], special_token: str) -> list:
|
|
27
|
+
"""Format shots as user/assistant message pairs."""
|
|
28
|
+
from brokit.primitives.lm import Message
|
|
29
|
+
|
|
30
|
+
if not shots:
|
|
31
|
+
return shot_prompt
|
|
32
|
+
|
|
33
|
+
for shot in shots:
|
|
34
|
+
# User message with inputs
|
|
35
|
+
user_content = []
|
|
36
|
+
for field_name in input_fields.keys():
|
|
37
|
+
value = shot.inputs.get(field_name, "")
|
|
38
|
+
user_content.append(f"{special_token.format(field=field_name)}\n{value}")
|
|
39
|
+
|
|
40
|
+
shot_prompt.append(Message(
|
|
41
|
+
role="user",
|
|
42
|
+
content="\n".join(user_content)
|
|
43
|
+
))
|
|
44
|
+
|
|
45
|
+
# Assistant message with outputs
|
|
46
|
+
assistant_content = []
|
|
47
|
+
for field_name in output_fields.keys():
|
|
48
|
+
value = shot.outputs.get(field_name, "Intentionally left blank.")
|
|
49
|
+
assistant_content.append(f"{special_token.format(field=field_name)}\n{value}\n")
|
|
50
|
+
assistant_content.append(special_token.format(field="completed"))
|
|
51
|
+
|
|
52
|
+
shot_prompt.append(Message(
|
|
53
|
+
role="assistant",
|
|
54
|
+
content="\n".join(assistant_content)
|
|
55
|
+
))
|
|
56
|
+
|
|
57
|
+
return shot_prompt
|
|
58
|
+
|
|
59
|
+
def _format_in_out(self, system_prompt:list, input_dict:dict)->list:
|
|
60
|
+
idx = 1
|
|
61
|
+
for field_name, field_value in input_dict.items():
|
|
62
|
+
dtype = field_value.type
|
|
63
|
+
desc = field_value.description
|
|
64
|
+
system_prompt.append(f"{idx}. {field_name} ({dtype}): {desc}")
|
|
65
|
+
idx += 1
|
|
66
|
+
return system_prompt
|
|
67
|
+
|
|
68
|
+
def format_system_in_out(self, system_prompt, input_fields, output_fields)->list:
|
|
69
|
+
if input_fields:
|
|
70
|
+
system_prompt.append("Your input fields are:")
|
|
71
|
+
self._format_in_out(system_prompt, input_fields)
|
|
72
|
+
if output_fields:
|
|
73
|
+
system_prompt.append("Your output fields are:")
|
|
74
|
+
self._format_in_out(system_prompt, output_fields)
|
|
75
|
+
return system_prompt
|
|
76
|
+
|
|
77
|
+
def _format_structure(self, system_prompt:list, input_dict:dict, special_token:str)->list:
|
|
78
|
+
for field_name, field_value in input_dict.items():
|
|
79
|
+
system_prompt.append(f"{special_token.format(field=field_name)}\n{{{field_name}}}\n")
|
|
80
|
+
return system_prompt
|
|
81
|
+
|
|
82
|
+
def format_system_structure(self, system_prompt:list, input_fields:dict, output_fields:dict, special_token:str)->list:
|
|
83
|
+
system_prompt.append("\nAll interactions will be structured in the following way, with the appropriate values filled in.\n")
|
|
84
|
+
self._format_structure(system_prompt, input_fields, special_token)
|
|
85
|
+
self._format_structure(system_prompt, output_fields, special_token)
|
|
86
|
+
system_prompt.append(special_token.format(field="completed"))
|
|
87
|
+
return system_prompt
|
|
88
|
+
|
|
89
|
+
def format_system_instruction(self, system_prompt:list, instructions:str)->list:
|
|
90
|
+
system_prompt.append("In adhering to this structure, your objective is: ")
|
|
91
|
+
system_prompt.append(instructions)
|
|
92
|
+
return system_prompt
|
|
93
|
+
|
|
94
|
+
def format_input_prompt(self, input_prompt: list, input_fields: dict, output_fields: dict, inputs: dict, special_token: str) -> list:
|
|
95
|
+
for input_name, input_value in inputs.items():
|
|
96
|
+
if input_name in input_fields:
|
|
97
|
+
input_prompt.append(f"{special_token.format(field=input_name)}\n{input_value}\n")
|
|
98
|
+
|
|
99
|
+
input_prompt.append(
|
|
100
|
+
"Respond with the corresponding output fields, starting with the field: " +
|
|
101
|
+
', '.join([f"`{special_token.format(field=field_name)}`" for field_name in output_fields.keys()]) +
|
|
102
|
+
f" and then ending with the marker for `{special_token.format(field='completed')}`."
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
return input_prompt
|
|
106
|
+
|
|
107
|
+
# @staticmethod
|
|
108
|
+
# def format_chat(system_prompt: list, input_prompt: list, images: Optional[list] = None) -> list[Message]:
|
|
109
|
+
# messages = []
|
|
110
|
+
# if system_prompt:
|
|
111
|
+
# messages.append(Message(role="system", content="\n".join(system_prompt)))
|
|
112
|
+
# if input_prompt:
|
|
113
|
+
# messages.append(Message(
|
|
114
|
+
# role="user",
|
|
115
|
+
# content="\n".join(input_prompt),
|
|
116
|
+
# images=images
|
|
117
|
+
# ))
|
|
118
|
+
# return messages
|
|
119
|
+
@staticmethod
|
|
120
|
+
def format_chat(system_prompt: list, shot_prompt: list, input_prompt: list, images: Optional[list] = None) -> list[Message]:
|
|
121
|
+
"""
|
|
122
|
+
Format messages in order: system → shots → user input.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
system_prompt: System instructions
|
|
126
|
+
shot_prompt: List of Message objects (user/assistant pairs)
|
|
127
|
+
input_prompt: User input
|
|
128
|
+
images: Optional images for user input
|
|
129
|
+
"""
|
|
130
|
+
from brokit.primitives.lm import Message
|
|
131
|
+
|
|
132
|
+
messages = []
|
|
133
|
+
|
|
134
|
+
# 1. System message
|
|
135
|
+
if system_prompt:
|
|
136
|
+
messages.append(Message(role="system", content="\n".join(system_prompt)))
|
|
137
|
+
|
|
138
|
+
# 2. Shot messages (already Message objects)
|
|
139
|
+
if shot_prompt:
|
|
140
|
+
messages.extend(shot_prompt)
|
|
141
|
+
|
|
142
|
+
# 3. User input message
|
|
143
|
+
if input_prompt:
|
|
144
|
+
messages.append(Message(
|
|
145
|
+
role="user",
|
|
146
|
+
content="\n".join(input_prompt),
|
|
147
|
+
images=images
|
|
148
|
+
))
|
|
149
|
+
|
|
150
|
+
return messages
|
|
151
|
+
|
|
152
|
+
@staticmethod
|
|
153
|
+
def parse_prediction(response:str, output_fields:dict, special_token:str="<||{field}||>")->dict:
|
|
154
|
+
outputs = {}
|
|
155
|
+
for field_name in output_fields.keys():
|
|
156
|
+
start_token = special_token.format(field=field_name)
|
|
157
|
+
end_token = special_token.format(field="completed")
|
|
158
|
+
start_idx = response.find(start_token)
|
|
159
|
+
if start_idx != -1:
|
|
160
|
+
start_idx += len(start_token)
|
|
161
|
+
end_idx = response.find(end_token, start_idx)
|
|
162
|
+
if end_idx == -1:
|
|
163
|
+
end_idx = len(response)
|
|
164
|
+
field_value = response[start_idx:end_idx].strip()
|
|
165
|
+
outputs[field_name] = field_value
|
|
166
|
+
return outputs
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import List, Dict, Optional, Any, Literal
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from enum import Enum
|
|
5
|
+
import time
|
|
6
|
+
from collections import OrderedDict
|
|
7
|
+
import json
|
|
8
|
+
import hashlib
|
|
9
|
+
|
|
10
|
+
class ModelType(str, Enum):
|
|
11
|
+
CHAT = "chat"
|
|
12
|
+
# COMPLETION = "completion" # not implement yet
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class Message:
|
|
16
|
+
role: Literal["system", "user", "assistant"]
|
|
17
|
+
content: str
|
|
18
|
+
images: Optional[List[Any]] = None
|
|
19
|
+
|
|
20
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
21
|
+
"""Convert to dict for serialization."""
|
|
22
|
+
d: Dict[str, Any] = {"role": self.role, "content": self.content}
|
|
23
|
+
if self.images:
|
|
24
|
+
d["images"] = self.images
|
|
25
|
+
return d
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class Usage:
|
|
29
|
+
input_tokens: int
|
|
30
|
+
output_tokens: int
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class ModelResponse:
|
|
34
|
+
model_name:str
|
|
35
|
+
model_type:ModelType
|
|
36
|
+
response:str
|
|
37
|
+
usage:Usage
|
|
38
|
+
response_ms: Optional[float] = None
|
|
39
|
+
cached:bool = False
|
|
40
|
+
metadata:Optional[Dict[str, Any]] = None
|
|
41
|
+
request:Optional[Dict[str, Any]] = None
|
|
42
|
+
parsed_response: Optional[Dict[str, Any]] = None
|
|
43
|
+
|
|
44
|
+
class LM(ABC):
|
|
45
|
+
def __init__(self, model_name: str, model_type:ModelType, cache_size:int=10):
|
|
46
|
+
self.model_name = model_name
|
|
47
|
+
self.model_type = model_type
|
|
48
|
+
self._cache = OrderedDict()
|
|
49
|
+
self._cache_size = cache_size
|
|
50
|
+
self.history = []
|
|
51
|
+
|
|
52
|
+
@abstractmethod
|
|
53
|
+
def request(self, prompt:Optional[str]=None, messages:Optional[List[Message]]=None, **kwargs) -> Any:
|
|
54
|
+
raise NotImplementedError
|
|
55
|
+
|
|
56
|
+
@abstractmethod
|
|
57
|
+
def parse_response(self, original_response:dict) -> ModelResponse:
|
|
58
|
+
raise NotImplementedError
|
|
59
|
+
|
|
60
|
+
def _validate_input(self, prompt:Optional[str], messages:Optional[List[Message]]):
|
|
61
|
+
if prompt is None and messages is None:
|
|
62
|
+
raise ValueError("Either prompt or messages must be provided")
|
|
63
|
+
if prompt is not None and messages is not None:
|
|
64
|
+
raise ValueError("Cannot provide both prompt and messages")
|
|
65
|
+
|
|
66
|
+
def _cache_key(self, prompt: Optional[str], messages: Optional[List[Message]], kwargs: dict) -> str:
|
|
67
|
+
"""Generate cache key from request parameters."""
|
|
68
|
+
# Convert messages to serializable format
|
|
69
|
+
serializable_messages = None
|
|
70
|
+
if messages:
|
|
71
|
+
serializable_messages = [
|
|
72
|
+
{
|
|
73
|
+
"role": msg.role,
|
|
74
|
+
"content": msg.content,
|
|
75
|
+
"images": msg.images
|
|
76
|
+
} if isinstance(msg, Message) else msg
|
|
77
|
+
for msg in messages
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
cache_data = {
|
|
81
|
+
"model": self.model_name,
|
|
82
|
+
"prompt": prompt,
|
|
83
|
+
"messages": serializable_messages,
|
|
84
|
+
**kwargs
|
|
85
|
+
}
|
|
86
|
+
# Convert to JSON string (sorted for consistency)
|
|
87
|
+
json_str = json.dumps(cache_data, sort_keys=True)
|
|
88
|
+
# Hash to fixed-length key
|
|
89
|
+
return hashlib.sha256(json_str.encode()).hexdigest()
|
|
90
|
+
|
|
91
|
+
def __call__(self, prompt: Optional[str] = None, messages: Optional[List[Message]] = None, **kwargs) -> ModelResponse:
|
|
92
|
+
self._validate_input(prompt, messages)
|
|
93
|
+
key = self._cache_key(prompt, messages, kwargs)
|
|
94
|
+
# Check cache
|
|
95
|
+
if key in self._cache:
|
|
96
|
+
self._cache.move_to_end(key) # Mark as recently used
|
|
97
|
+
cached_response = self._cache[key]
|
|
98
|
+
cached_response.cached = True
|
|
99
|
+
return cached_response
|
|
100
|
+
|
|
101
|
+
# Automatic timing
|
|
102
|
+
start = time.perf_counter()
|
|
103
|
+
original_response = self.request(prompt=prompt, messages=messages, **kwargs)
|
|
104
|
+
elapsed_ms = (time.perf_counter() - start) * 1000
|
|
105
|
+
|
|
106
|
+
# Parse response
|
|
107
|
+
schema_response = self.parse_response(original_response)
|
|
108
|
+
|
|
109
|
+
# Inject timing
|
|
110
|
+
schema_response.response_ms = elapsed_ms
|
|
111
|
+
# Add to cache with LRU eviction
|
|
112
|
+
self._cache[key] = schema_response
|
|
113
|
+
if len(self._cache) > self._cache_size:
|
|
114
|
+
self._cache.popitem(last=False) # Remove oldest
|
|
115
|
+
return schema_response
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
from brokit.primitives.prompt import Prompt
|
|
2
|
+
from brokit.primitives.lm import LM, ModelType, ModelResponse
|
|
3
|
+
from typing import Type, List, Optional, Any
|
|
4
|
+
import re
|
|
5
|
+
from brokit.primitives.formatter import PromptFormatter
|
|
6
|
+
from brokit.primitives.prompt import Image
|
|
7
|
+
from brokit.primitives.shot import Shot
|
|
8
|
+
|
|
9
|
+
def parse_outputs(response: str, output_fields: dict, special_token: str = "<||{field}||>") -> dict:
|
|
10
|
+
"""Parse LM response with dynamic special tokens."""
|
|
11
|
+
outputs = {}
|
|
12
|
+
|
|
13
|
+
# Extract prefix and suffix from special_token template
|
|
14
|
+
# e.g., "<||{field}||>" -> prefix="<||", suffix="||>"
|
|
15
|
+
prefix, suffix = special_token.split("{field}")
|
|
16
|
+
|
|
17
|
+
for field_name in output_fields.keys():
|
|
18
|
+
# Escape special regex chars and build pattern
|
|
19
|
+
escaped_prefix = re.escape(prefix)
|
|
20
|
+
escaped_suffix = re.escape(suffix)
|
|
21
|
+
pattern = rf"{escaped_prefix}{field_name}{escaped_suffix}\s*\n(.*?)(?={escaped_prefix}|$)"
|
|
22
|
+
|
|
23
|
+
match = re.search(pattern, response, re.DOTALL)
|
|
24
|
+
if match:
|
|
25
|
+
outputs[field_name] = match.group(1).strip()
|
|
26
|
+
|
|
27
|
+
return outputs
|
|
28
|
+
|
|
29
|
+
class Prediction:
|
|
30
|
+
def __init__(self, **kwargs: Any) -> None:
|
|
31
|
+
self._data = kwargs
|
|
32
|
+
for key, value in kwargs.items():
|
|
33
|
+
setattr(self, key, value)
|
|
34
|
+
|
|
35
|
+
def __repr__(self) -> str:
|
|
36
|
+
items = ",\n ".join(f"{k}={v!r}" for k, v in self._data.items())
|
|
37
|
+
return f"Prediction(\n {items}\n)"
|
|
38
|
+
|
|
39
|
+
def __getattr__(self, name: str) -> Any:
|
|
40
|
+
raise AttributeError(f"'{type(self).__name__}' has no attribute '{name}'")
|
|
41
|
+
|
|
42
|
+
def to_dict(self) -> dict:
|
|
43
|
+
return self._data
|
|
44
|
+
|
|
45
|
+
class Predictor:
|
|
46
|
+
def __init__(self, prompt: Type[Prompt], lm:Optional[LM]=None, shots:Optional[list[Shot]]=None):
|
|
47
|
+
self.prompt = prompt
|
|
48
|
+
self.lm = lm
|
|
49
|
+
self.shots = shots
|
|
50
|
+
self.prompt_formatter = PromptFormatter()
|
|
51
|
+
|
|
52
|
+
def structure_output(self, response: ModelResponse, output_fields, special_token: str = "<||{field}||>") -> Prediction:
|
|
53
|
+
output = parse_outputs(response.response, output_fields, special_token)
|
|
54
|
+
|
|
55
|
+
# Convert types based on field definitions
|
|
56
|
+
converted = {}
|
|
57
|
+
for field_name, value in output.items():
|
|
58
|
+
if field_name in output_fields:
|
|
59
|
+
field_type = output_fields[field_name].type
|
|
60
|
+
converted[field_name] = self._convert_type(value, field_type)
|
|
61
|
+
else:
|
|
62
|
+
converted[field_name] = value
|
|
63
|
+
|
|
64
|
+
return Prediction(**converted)
|
|
65
|
+
|
|
66
|
+
def _convert_type(self, value: str, target_type: type):
|
|
67
|
+
"""Convert string value to target type."""
|
|
68
|
+
if target_type == str:
|
|
69
|
+
return value
|
|
70
|
+
elif target_type == int:
|
|
71
|
+
return int(value.strip())
|
|
72
|
+
elif target_type == float:
|
|
73
|
+
return float(value.strip())
|
|
74
|
+
elif target_type == bool:
|
|
75
|
+
return value.strip().lower() in ('true', '1', 'yes')
|
|
76
|
+
elif target_type == list:
|
|
77
|
+
# Simple list parsing - can be enhanced
|
|
78
|
+
return [item.strip() for item in value.strip('[]').split(',')]
|
|
79
|
+
else:
|
|
80
|
+
return value
|
|
81
|
+
|
|
82
|
+
def _call_chat(self, lm, system_prompt, shot_prompt, input_prompt, images):
|
|
83
|
+
messages = self.prompt_formatter.format_chat(system_prompt, shot_prompt, input_prompt, images)
|
|
84
|
+
response = lm(messages=messages)
|
|
85
|
+
output = self.structure_output(response, self.prompt.output_fields)
|
|
86
|
+
response.parsed_response = output.to_dict()
|
|
87
|
+
response.request = messages
|
|
88
|
+
lm.history.append(response)
|
|
89
|
+
return output
|
|
90
|
+
|
|
91
|
+
def __call__(self, images: Optional[list[Image]]=None, **kwargs):
|
|
92
|
+
# prompt_instance = self.prompt.to_dict()
|
|
93
|
+
input_fields = self.prompt.input_fields
|
|
94
|
+
output_fields = self.prompt.output_fields
|
|
95
|
+
instructions = self.prompt.instructions
|
|
96
|
+
lm = self.lm
|
|
97
|
+
system_prompt, input_prompt, shot_prompt = self.prompt_formatter(input_fields, output_fields, instructions, kwargs, self.shots)
|
|
98
|
+
|
|
99
|
+
base64_images = None
|
|
100
|
+
if images:
|
|
101
|
+
from brokit.primitives.prompt import Image
|
|
102
|
+
base64_images = [
|
|
103
|
+
img.to_base64() if isinstance(img, Image) else img
|
|
104
|
+
for img in images
|
|
105
|
+
]
|
|
106
|
+
|
|
107
|
+
if lm.model_type == ModelType.CHAT:
|
|
108
|
+
return self._call_chat(lm, system_prompt, input_prompt, shot_prompt, base64_images)
|
|
109
|
+
raise NotImplementedError("Only CHAT model type is implemented.")
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Prompt is where we keep the prompt contract for the program
|
|
3
|
+
|
|
4
|
+
What should Prompt do:
|
|
5
|
+
- construct new class from MetaClass concept
|
|
6
|
+
- in new class, have input_fields, output_fields, instructions as properties
|
|
7
|
+
- the class itself should be composable with/without output_fields
|
|
8
|
+
- Prompt is simply the way to construct the prompt which will be formatted later with Predictor
|
|
9
|
+
- Since prompt here has both input/output, I think we can make used of it as example or demo?
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from typing import Type, Any, Dict, Union
|
|
14
|
+
import base64
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
import httpx
|
|
17
|
+
import mimetypes
|
|
18
|
+
|
|
19
|
+
class Image:
|
|
20
|
+
def __init__(self, source: Union[str, bytes, Path]):
|
|
21
|
+
"""
|
|
22
|
+
Initialize Image from path, URL, or bytes.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
source: File path, URL (http/https), or raw bytes
|
|
26
|
+
"""
|
|
27
|
+
self.source = source
|
|
28
|
+
if isinstance(source, bytes):
|
|
29
|
+
self._base64 = base64.b64encode(source).decode('utf-8')
|
|
30
|
+
self._mime_type = "image/jpeg" # Default
|
|
31
|
+
elif isinstance(source, (str, Path)):
|
|
32
|
+
source_str = str(source)
|
|
33
|
+
if source_str.startswith(('http://', 'https://')):
|
|
34
|
+
self._base64 = self._from_url(source_str)
|
|
35
|
+
self._mime_type = "image/jpeg"
|
|
36
|
+
else:
|
|
37
|
+
self._base64 = self._from_path(source_str)
|
|
38
|
+
self._mime_type = mimetypes.guess_type(source_str)[0] or "image/jpeg"
|
|
39
|
+
else:
|
|
40
|
+
raise ValueError(f"Unsupported source type: {type(source)}")
|
|
41
|
+
|
|
42
|
+
def _from_path(self, path: str) -> str:
|
|
43
|
+
"""Load image from file path."""
|
|
44
|
+
with open(path, 'rb') as f:
|
|
45
|
+
return base64.b64encode(f.read()).decode('utf-8')
|
|
46
|
+
|
|
47
|
+
def _from_url(self, url: str) -> str:
|
|
48
|
+
"""Download image from URL."""
|
|
49
|
+
response = httpx.get(url)
|
|
50
|
+
response.raise_for_status()
|
|
51
|
+
return base64.b64encode(response.content).decode('utf-8')
|
|
52
|
+
|
|
53
|
+
def to_base64(self) -> str:
|
|
54
|
+
"""Get base64 encoded string."""
|
|
55
|
+
return self._base64
|
|
56
|
+
|
|
57
|
+
def __repr__(self) -> str:
|
|
58
|
+
"""DSPy-style representation."""
|
|
59
|
+
data_url = f"data:{self._mime_type};base64,<IMAGE_BASE64_ENCODED({len(self._base64)})>"
|
|
60
|
+
return f"Image(url={data_url})"
|
|
61
|
+
|
|
62
|
+
@dataclass
|
|
63
|
+
class FieldInfo:
|
|
64
|
+
name: str
|
|
65
|
+
description: str
|
|
66
|
+
type: Type
|
|
67
|
+
is_input: bool
|
|
68
|
+
|
|
69
|
+
def InputField(description:str = "") -> Any:
|
|
70
|
+
return FieldInfo(name="", description=description, type=str, is_input=True)
|
|
71
|
+
|
|
72
|
+
def OutputField(description:str = "") -> Any:
|
|
73
|
+
return FieldInfo(name="", description=description, type=str, is_input=False)
|
|
74
|
+
|
|
75
|
+
class PromptMeta(type):
|
|
76
|
+
# Add type hints for metaclass attributes
|
|
77
|
+
_input_fields: Dict[str, FieldInfo]
|
|
78
|
+
_output_fields: Dict[str, FieldInfo]
|
|
79
|
+
_instructions: str
|
|
80
|
+
def __new__(cls, name, bases, namespace):
|
|
81
|
+
# Check if this is from from_dict (already processed)
|
|
82
|
+
if '_input_fields' in namespace and '_output_fields' in namespace and '_instructions' in namespace:
|
|
83
|
+
# Already processed, just create the class
|
|
84
|
+
return super().__new__(cls, name, bases, namespace)
|
|
85
|
+
input_fields = {}
|
|
86
|
+
output_fields = {}
|
|
87
|
+
# Get type annotations
|
|
88
|
+
annotations = namespace.get('__annotations__', {})
|
|
89
|
+
for field_name, field_value in list(namespace.items()):
|
|
90
|
+
if isinstance(field_value, FieldInfo):
|
|
91
|
+
field_value.name = field_name
|
|
92
|
+
|
|
93
|
+
# Extract type from annotation
|
|
94
|
+
if field_name in annotations:
|
|
95
|
+
field_value.type = annotations[field_name]
|
|
96
|
+
|
|
97
|
+
if field_value.is_input:
|
|
98
|
+
input_fields[field_name] = field_value
|
|
99
|
+
else:
|
|
100
|
+
output_fields[field_name] = field_value
|
|
101
|
+
|
|
102
|
+
del namespace[field_name]
|
|
103
|
+
|
|
104
|
+
namespace['_input_fields'] = input_fields
|
|
105
|
+
namespace['_output_fields'] = output_fields
|
|
106
|
+
namespace['_instructions'] = (namespace.get('__doc__', '') or '').strip()
|
|
107
|
+
return super().__new__(cls, name, bases, namespace)
|
|
108
|
+
|
|
109
|
+
@property
|
|
110
|
+
def input_fields(cls):
|
|
111
|
+
return cls._input_fields
|
|
112
|
+
|
|
113
|
+
@property
|
|
114
|
+
def output_fields(cls):
|
|
115
|
+
return cls._output_fields
|
|
116
|
+
|
|
117
|
+
@property
|
|
118
|
+
def instructions(cls):
|
|
119
|
+
return cls._instructions
|
|
120
|
+
|
|
121
|
+
class Prompt(metaclass=PromptMeta):
|
|
122
|
+
|
|
123
|
+
"""Base class for defining prompts with input and output fields."""
|
|
124
|
+
# Type hints for class attributes set by metaclass
|
|
125
|
+
_input_fields: Dict[str, FieldInfo]
|
|
126
|
+
_output_fields: Dict[str, FieldInfo]
|
|
127
|
+
_instructions: str
|
|
128
|
+
|
|
129
|
+
def __init__(self, **kwargs):
|
|
130
|
+
for name, value in kwargs.items():
|
|
131
|
+
if name in self._input_fields or name in self._output_fields:
|
|
132
|
+
setattr(self, name, value)
|
|
133
|
+
else:
|
|
134
|
+
raise ValueError(f"Unknown field: {name}")
|
|
135
|
+
|
|
136
|
+
for name in self._input_fields:
|
|
137
|
+
if name not in kwargs:
|
|
138
|
+
raise ValueError(f"Missing required input: {name}")
|
|
139
|
+
|
|
140
|
+
def __getattr__(self, name):
|
|
141
|
+
if name in self._output_fields:
|
|
142
|
+
return "Intentionally left blank."
|
|
143
|
+
raise AttributeError(f"'{type(self).__name__}' has no attribute '{name}'")
|
|
144
|
+
|
|
145
|
+
@property
|
|
146
|
+
def inputs(self):
|
|
147
|
+
"""Get all input values (X)."""
|
|
148
|
+
return {name: getattr(self, name) for name in self._input_fields}
|
|
149
|
+
|
|
150
|
+
@property
|
|
151
|
+
def outputs(self):
|
|
152
|
+
"""Get all output values (y)."""
|
|
153
|
+
return {name: getattr(self, name) for name in self._output_fields
|
|
154
|
+
if hasattr(self, name)}
|
|
155
|
+
|
|
156
|
+
def is_complete(self):
|
|
157
|
+
"""Check if all outputs are provided."""
|
|
158
|
+
return len(self.outputs) == len(self._output_fields)
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
from brokit.primitives.prompt import Prompt
|
|
2
|
+
from typing import Optional, Type
|
|
3
|
+
|
|
4
|
+
class Shot:
|
|
5
|
+
def __init__(self, prompt_class:Optional[Type[Prompt]]=None, **kwargs):
|
|
6
|
+
"""
|
|
7
|
+
Create a shot with automatic input/output separation.
|
|
8
|
+
|
|
9
|
+
Args:
|
|
10
|
+
prompt_class: Optional Prompt class to validate fields
|
|
11
|
+
**kwargs: Field values
|
|
12
|
+
"""
|
|
13
|
+
self._prompt_class = prompt_class
|
|
14
|
+
self._data = kwargs
|
|
15
|
+
|
|
16
|
+
for key, value in kwargs.items():
|
|
17
|
+
setattr(self, key, value)
|
|
18
|
+
|
|
19
|
+
@property
|
|
20
|
+
def inputs(self):
|
|
21
|
+
"""Get input fields based on prompt class."""
|
|
22
|
+
if self._prompt_class:
|
|
23
|
+
return {k: v for k, v in self._data.items()
|
|
24
|
+
if k in self._prompt_class.input_fields}
|
|
25
|
+
return {}
|
|
26
|
+
|
|
27
|
+
@property
|
|
28
|
+
def outputs(self):
|
|
29
|
+
"""Get output fields with defaults for missing ones."""
|
|
30
|
+
if self._prompt_class:
|
|
31
|
+
result = {}
|
|
32
|
+
for field_name in self._prompt_class.output_fields.keys():
|
|
33
|
+
if field_name in self._data:
|
|
34
|
+
result[field_name] = self._data[field_name]
|
|
35
|
+
else:
|
|
36
|
+
result[field_name] = "Intentionally left blank."
|
|
37
|
+
return result
|
|
38
|
+
return {}
|
|
39
|
+
|
|
40
|
+
def __getattr__(self, name):
|
|
41
|
+
# Return default for missing output fields
|
|
42
|
+
if self._prompt_class and name in self._prompt_class.output_fields:
|
|
43
|
+
return "Intentionally left blank."
|
|
44
|
+
raise AttributeError(f"'{type(self).__name__}' has no attribute '{name}'")
|
|
45
|
+
|
|
46
|
+
def __repr__(self):
|
|
47
|
+
items = ", ".join(f"{k}={v!r}" for k, v in self._data.items())
|
|
48
|
+
return f"Shot({items})"
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: brokit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: brokit, think it like a playing lego
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Dynamic: license-file
|
|
9
|
+
|
|
10
|
+
# brokit
|
|
11
|
+
|
|
12
|
+
Inspired by big bro DSPy, brokit is a minimal Python toolkit of composable, LEGO-like primitives for working with language models. Build what you need, skip the bloat.
|
|
13
|
+
|
|
14
|
+
## What's This About?
|
|
15
|
+
|
|
16
|
+
A lightweight library for working with LMs across any use case. Just the essential building blocks, nothing more.
|
|
17
|
+
|
|
18
|
+
## Core Concepts
|
|
19
|
+
|
|
20
|
+
Coming from DSPy? You already know what's up:
|
|
21
|
+
|
|
22
|
+
- **Prompt** = `dspy.Signature` — Define your input/output structure
|
|
23
|
+
- **Predictor** = `dspy.Predict` — Execute prompts with your LM
|
|
24
|
+
- **LM** = `dspy.LM` — Language model interface
|
|
25
|
+
- **Shot** — Few-shot examples made simple
|
|
26
|
+
|
|
27
|
+
## Design Philosophy
|
|
28
|
+
|
|
29
|
+
### Plug and Play
|
|
30
|
+
|
|
31
|
+
Everything's a base class. Compose, extend, swap out whatever you want. The LM module? Bring your own.
|
|
32
|
+
|
|
33
|
+
### Pure Python
|
|
34
|
+
|
|
35
|
+
Zero required dependencies. Want to use `requests`, `httpx`, or `boto3`? Go for it. Check the notebooks for integration examples.
|
|
36
|
+
|
|
37
|
+
## Features
|
|
38
|
+
|
|
39
|
+
- Text and image support (more formats coming)
|
|
40
|
+
- Few-shot learning with Shot
|
|
41
|
+
- Build custom LM implementations
|
|
42
|
+
- Structured prompts with type hints
|
|
43
|
+
|
|
44
|
+
## Getting Started
|
|
45
|
+
|
|
46
|
+
Peep the notebooks:
|
|
47
|
+
- Custom Prompt signatures
|
|
48
|
+
- Your own LM implementations
|
|
49
|
+
- Few-shot examples
|
|
50
|
+
- External library integrations
|
|
51
|
+
|
|
52
|
+
## What's Next?
|
|
53
|
+
|
|
54
|
+
Check out [ROADMAP.md](ROADMAP.md) for what's coming and [VERSIONS.md](VERSIONS.md) for release notes.
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
brokit/__init__.py
|
|
5
|
+
brokit.egg-info/PKG-INFO
|
|
6
|
+
brokit.egg-info/SOURCES.txt
|
|
7
|
+
brokit.egg-info/dependency_links.txt
|
|
8
|
+
brokit.egg-info/top_level.txt
|
|
9
|
+
brokit/primitives/__init__.py
|
|
10
|
+
brokit/primitives/formatter.py
|
|
11
|
+
brokit/primitives/lm.py
|
|
12
|
+
brokit/primitives/predictor.py
|
|
13
|
+
brokit/primitives/prompt.py
|
|
14
|
+
brokit/primitives/shot.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
brokit
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "brokit"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "brokit, think it like a playing lego"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.10"
|
|
7
|
+
dependencies = []
|
|
8
|
+
|
|
9
|
+
[dependency-groups]
|
|
10
|
+
dev = [
|
|
11
|
+
"boto3>=1.42.39",
|
|
12
|
+
"httpx>=0.28.1",
|
|
13
|
+
"ipykernel>=7.1.0",
|
|
14
|
+
]
|
brokit-0.1.0/setup.cfg
ADDED