pixie-examples 0.2.6.dev5__tar.gz → 0.2.10.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/PKG-INFO +2 -2
  2. pixie_examples-0.2.10.dev1/examples/openai/__init__.py +5 -0
  3. pixie_examples-0.2.10.dev1/examples/openai/openai_rag.py +398 -0
  4. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/pyproject.toml +2 -2
  5. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/LICENSE +0 -0
  6. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/README.md +0 -0
  7. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/examples/__init__.py +0 -0
  8. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/examples/langchain/README.md +0 -0
  9. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/examples/langchain/__init__.py +0 -0
  10. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/examples/langchain/basic_agent.py +0 -0
  11. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/examples/langchain/customer_support.py +0 -0
  12. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/examples/langchain/personal_assistant.py +0 -0
  13. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/examples/langchain/sql_agent.py +0 -0
  14. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/examples/langgraph/__init__.py +0 -0
  15. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/examples/langgraph/langgraph_rag.py +0 -0
  16. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/examples/langgraph/langgraph_sql_agent.py +0 -0
  17. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/examples/openai_agents_sdk/README.md +0 -0
  18. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/examples/openai_agents_sdk/__init__.py +0 -0
  19. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/examples/openai_agents_sdk/customer_service.py +0 -0
  20. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/examples/openai_agents_sdk/financial_research_agent.py +0 -0
  21. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/examples/openai_agents_sdk/llm_as_a_judge.py +0 -0
  22. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/examples/openai_agents_sdk/routing.py +0 -0
  23. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/examples/pydantic_ai/.env.example +0 -0
  24. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/examples/pydantic_ai/README.md +0 -0
  25. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/examples/pydantic_ai/__init__.py +0 -0
  26. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/examples/pydantic_ai/bank_support.py +0 -0
  27. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/examples/pydantic_ai/flight_booking.py +0 -0
  28. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/examples/pydantic_ai/question_graph.py +0 -0
  29. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/examples/pydantic_ai/sql_gen.py +0 -0
  30. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/examples/pydantic_ai/structured_output.py +0 -0
  31. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/examples/quickstart/__init__.py +0 -0
  32. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/examples/quickstart/chatbot.py +0 -0
  33. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/examples/quickstart/problem_solver.py +0 -0
  34. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/examples/quickstart/sleepy_poet.py +0 -0
  35. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/examples/quickstart/weather_agent.py +0 -0
  36. {pixie_examples-0.2.6.dev5 → pixie_examples-0.2.10.dev1}/examples/sql_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pixie-examples
3
- Version: 0.2.6.dev5
3
+ Version: 0.2.10.dev1
4
4
  Summary: examples for using Pixie
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -24,7 +24,7 @@ Requires-Dist: openinference-instrumentation-crewai (>=0.1.17,<0.2.0)
24
24
  Requires-Dist: openinference-instrumentation-dspy (>=0.1.33,<0.2.0)
25
25
  Requires-Dist: openinference-instrumentation-google-adk (>=0.1.8,<0.2.0)
26
26
  Requires-Dist: openinference-instrumentation-openai-agents (>=1.4.0,<2.0.0)
27
- Requires-Dist: pixie-sdk (>=0.2.6,<0.3.0)
27
+ Requires-Dist: pixie-sdk (>=0.2.9,<0.3.0)
28
28
  Requires-Dist: pydantic (>=2.7.4,<3.0.0)
29
29
  Requires-Dist: pydantic-ai-slim (>=1.39.0,<2.0.0)
30
30
  Requires-Dist: pymarkdownlnt (>=0.9.34,<0.10.0)
@@ -0,0 +1,5 @@
1
+ """OpenAI examples using the OpenAI API directly (without agents SDK)."""
2
+
3
+ from examples.openai.openai_rag import openai_rag_agent
4
+
5
+ __all__ = ["openai_rag_agent"]
@@ -0,0 +1,398 @@
1
+ """
2
+ OpenAI RAG Agent (Retrieval Augmented Generation)
3
+
4
+ This example demonstrates building an agentic RAG system using the OpenAI Responses API
5
+ that can:
6
+ 1. Decide when to use retrieval vs. respond directly
7
+ 2. Grade retrieved documents for relevance
8
+ 3. Rewrite questions if documents aren't relevant
9
+ 4. Generate answers based on retrieved context
10
+
11
+ This is a direct port of the LangGraph RAG example, using native OpenAI API
12
+ instead of LangGraph/LangChain framework (except for vectorstore utilities).
13
+ """
14
+
15
+ import json
16
+ from openai.types.responses import ResponseInputItemParam
17
+ from openai.types.responses.tool_param import ToolParam
18
+ from pydantic import BaseModel, Field
19
+ import langfuse.openai
20
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
21
+ from langchain_core.vectorstores import InMemoryVectorStore
22
+ from langchain_core.documents import Document
23
+ from langchain_openai import OpenAIEmbeddings
24
+
25
+ import pixie.sdk as pixie
26
+ import requests
27
+ from bs4 import BeautifulSoup
28
+
29
+
30
+ # ============================================================================
31
+ # PROMPT DEFINITIONS
32
+ # ============================================================================
33
+
34
+
35
+ class GradeVariables(pixie.Variables):
36
+ context: str
37
+ question: str
38
+
39
+
40
+ class RewriteVariables(pixie.Variables):
41
+ question: str
42
+
43
+
44
+ class GenerateVariables(pixie.Variables):
45
+ question: str
46
+ context: str
47
+
48
+
49
+ rag_grade_prompt = pixie.create_prompt(
50
+ "rag_grade_documents",
51
+ GradeVariables,
52
+ description="Grades relevance of retrieved documents to user questions",
53
+ )
54
+ rag_rewrite_prompt = pixie.create_prompt(
55
+ "rag_rewrite_question",
56
+ RewriteVariables,
57
+ description="Rewrites questions to improve semantic understanding",
58
+ )
59
+ rag_generate_prompt = pixie.create_prompt(
60
+ "rag_generate_answer",
61
+ GenerateVariables,
62
+ description="Generates concise answers from retrieved context",
63
+ )
64
+
65
+
66
+ # ============================================================================
67
+ # DOCUMENT LOADING & VECTORSTORE
68
+ # ============================================================================
69
+
70
+
71
+ def load_web_page(url: str) -> list[Document]:
72
+ """Simple web page loader using requests and BeautifulSoup.
73
+
74
+ Replaces langchain_community.document_loaders.WebBaseLoader
75
+ to avoid the langchain-community dependency.
76
+ """
77
+ response = requests.get(url)
78
+ response.raise_for_status()
79
+ soup = BeautifulSoup(response.content, "html.parser")
80
+
81
+ # Extract text from the page
82
+ text = soup.get_text(separator="\n", strip=True)
83
+
84
+ return [Document(page_content=text, metadata={"source": url})]
85
+
86
+
87
+ def setup_vectorstore():
88
+ """Setup vectorstore with documents from Lilian Weng's blog."""
89
+ print("Loading documents from web...")
90
+
91
+ urls = [
92
+ "https://lilianweng.github.io/posts/2024-11-28-reward-hacking/",
93
+ "https://lilianweng.github.io/posts/2024-07-07-hallucination/",
94
+ "https://lilianweng.github.io/posts/2024-04-12-diffusion-video/",
95
+ ]
96
+
97
+ docs = [load_web_page(url) for url in urls]
98
+ docs_list = [item for sublist in docs for item in sublist]
99
+
100
+ print("Splitting documents...")
101
+ text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
102
+ chunk_size=100, chunk_overlap=50
103
+ )
104
+ doc_splits = text_splitter.split_documents(docs_list)
105
+
106
+ print("Creating vectorstore...")
107
+ vectorstore = InMemoryVectorStore.from_documents(
108
+ documents=doc_splits, embedding=OpenAIEmbeddings()
109
+ )
110
+
111
+ return vectorstore.as_retriever()
112
+
113
+
114
+ # ============================================================================
115
+ # SCHEMAS
116
+ # ============================================================================
117
+
118
+
119
+ class GradeDocuments(BaseModel):
120
+ """Grade documents using a binary score for relevance check."""
121
+
122
+ binary_score: str = Field(
123
+ description="Relevance score: 'yes' if relevant, or 'no' if not relevant"
124
+ )
125
+
126
+
127
+ # Tool definition for the retrieval
128
+ RETRIEVE_TOOL: ToolParam = {
129
+ "type": "function",
130
+ "name": "retrieve_blog_posts",
131
+ "description": (
132
+ "Search and return information about Lilian Weng blog posts on AI topics like reward hacking, "
133
+ "hallucination, and diffusion video."
134
+ ),
135
+ "parameters": {
136
+ "type": "object",
137
+ "properties": {
138
+ "query": {
139
+ "type": "string",
140
+ "description": "The search query to find relevant blog post content",
141
+ }
142
+ },
143
+ "required": ["query"],
144
+ "additionalProperties": False,
145
+ },
146
+ "strict": True,
147
+ }
148
+
149
+
150
+ # ============================================================================
151
+ # RAG AGENT CLASS
152
+ # ============================================================================
153
+
154
+
155
+ class OpenAIRAGAgent:
156
+ """Agentic RAG system using OpenAI Responses API."""
157
+
158
+ def __init__(self, retriever, model: str = "gpt-4o-mini"):
159
+ self.retriever = retriever
160
+ self.model = model
161
+ self.client = langfuse.openai.AsyncOpenAI() # type: ignore
162
+ self.message_history: list[ResponseInputItemParam] = []
163
+
164
+ def retrieve(self, query: str) -> str:
165
+ """Search and return information about Lilian Weng blog posts."""
166
+ docs = self.retriever.invoke(query)
167
+ return "\n\n".join([doc.page_content for doc in docs])
168
+
169
+ async def generate_query_or_respond(
170
+ self, messages: list[ResponseInputItemParam]
171
+ ) -> tuple[str | None, str | None]:
172
+ """
173
+ Call the model to decide whether to retrieve or respond directly.
174
+
175
+ Returns:
176
+ Tuple of (response_text, tool_query):
177
+ - If responding directly: (response_text, None)
178
+ - If tool call: (None, tool_query)
179
+ """
180
+ response = await self.client.responses.create(
181
+ model=self.model,
182
+ input=messages,
183
+ tools=[RETRIEVE_TOOL],
184
+ )
185
+
186
+ # Check if the model wants to use a tool
187
+ for item in response.output:
188
+ if item.type == "function_call" and item.name == "retrieve_blog_posts":
189
+ args = json.loads(item.arguments)
190
+ return None, args["query"]
191
+
192
+ # Otherwise, return the text response
193
+ for item in response.output:
194
+ if item.type == "message":
195
+ for content in item.content:
196
+ if content.type == "output_text":
197
+ return content.text, None
198
+
199
+ return None, None
200
+
201
+ async def grade_documents(self, question: str, context: str) -> bool:
202
+ """
203
+ Determine whether the retrieved documents are relevant to the question.
204
+
205
+ Returns:
206
+ True if documents are relevant, False otherwise
207
+ """
208
+ prompt = rag_grade_prompt.compile(
209
+ GradeVariables(question=question, context=context)
210
+ )
211
+
212
+ # Add instruction for JSON format (required when using json_object format)
213
+ prompt_with_json = f"{prompt}\n\nProvide your response in JSON format with a 'binary_score' field."
214
+
215
+ response = await self.client.responses.create(
216
+ model="gpt-4o",
217
+ input=[{"role": "user", "content": prompt_with_json}],
218
+ text={"format": {"type": "json_object"}},
219
+ )
220
+
221
+ # Parse the response
222
+ for item in response.output:
223
+ if item.type == "message":
224
+ for content in item.content:
225
+ if content.type == "output_text":
226
+ try:
227
+ result = json.loads(content.text)
228
+ return result.get("binary_score", "").lower() == "yes"
229
+ except json.JSONDecodeError:
230
+ # If parsing fails, check for "yes" in text
231
+ return "yes" in content.text.lower()
232
+
233
+ return False
234
+
235
+ async def rewrite_question(self, question: str) -> str:
236
+ """Rewrite the original user question for better retrieval."""
237
+ prompt = rag_rewrite_prompt.compile(RewriteVariables(question=question))
238
+
239
+ response = await self.client.responses.create(
240
+ model=self.model,
241
+ input=[{"role": "user", "content": prompt}],
242
+ )
243
+
244
+ for item in response.output:
245
+ if item.type == "message":
246
+ for content in item.content:
247
+ if content.type == "output_text":
248
+ return content.text
249
+
250
+ return question # Return original if rewrite fails
251
+
252
+ async def generate_answer(self, question: str, context: str) -> str:
253
+ """Generate an answer based on the retrieved context."""
254
+ prompt = rag_generate_prompt.compile(
255
+ GenerateVariables(question=question, context=context)
256
+ )
257
+
258
+ response = await self.client.responses.create(
259
+ model=self.model,
260
+ input=[{"role": "user", "content": prompt}],
261
+ )
262
+
263
+ for item in response.output:
264
+ if item.type == "message":
265
+ for content in item.content:
266
+ if content.type == "output_text":
267
+ return content.text
268
+
269
+ return "Unable to generate answer."
270
+
271
+ async def run(
272
+ self,
273
+ question: str,
274
+ max_rewrites: int = 2,
275
+ ) -> str:
276
+ """
277
+ Run the RAG agent workflow.
278
+
279
+ The workflow:
280
+ 1. Decide whether to retrieve or respond directly
281
+ 2. If retrieving, grade documents for relevance
282
+ 3. If documents aren't relevant, rewrite question and retry
283
+ 4. Generate answer based on relevant context
284
+
285
+ Args:
286
+ question: The user's question
287
+ max_rewrites: Maximum number of question rewrites to attempt
288
+
289
+ Returns:
290
+ The generated answer
291
+ """
292
+ current_question = question
293
+ rewrites = 0
294
+
295
+ while True:
296
+ print(f"Processing question: {current_question}")
297
+
298
+ # Step 1: Decide whether to retrieve or respond directly
299
+ user_msg: ResponseInputItemParam = {
300
+ "role": "user",
301
+ "content": current_question,
302
+ }
303
+ messages: list[ResponseInputItemParam] = [*self.message_history, user_msg]
304
+ direct_response, tool_query = await self.generate_query_or_respond(messages)
305
+
306
+ if direct_response is not None:
307
+ # Model decided to respond directly without retrieval
308
+ self.message_history.append(user_msg)
309
+ self.message_history.append(
310
+ {"role": "assistant", "content": direct_response}
311
+ )
312
+ print("Model responded directly without retrieval")
313
+ return direct_response
314
+
315
+ if tool_query is None:
316
+ # Unexpected state - no response or tool call
317
+ return "Unable to process the question."
318
+
319
+ # Step 2: Retrieve documents
320
+ print(f"Retrieving documents for query: {tool_query}")
321
+ context = self.retrieve(tool_query)
322
+
323
+ # Step 3: Grade documents for relevance
324
+ print("Grading documents for relevance...")
325
+ is_relevant = await self.grade_documents(question, context)
326
+
327
+ if is_relevant:
328
+ # Step 4: Generate answer from relevant context
329
+ print("Documents are relevant. Generating answer...")
330
+ answer = await self.generate_answer(question, context)
331
+ self.message_history.append(user_msg)
332
+ self.message_history.append({"role": "assistant", "content": answer})
333
+ return answer
334
+ else:
335
+ # Documents not relevant - try rewriting
336
+ if rewrites >= max_rewrites:
337
+ print(
338
+ f"Max rewrites ({max_rewrites}) reached. "
339
+ "Generating answer with available context..."
340
+ )
341
+ answer = await self.generate_answer(question, context)
342
+ self.message_history.append(user_msg)
343
+ self.message_history.append(
344
+ {"role": "assistant", "content": answer}
345
+ )
346
+ return answer
347
+
348
+ print("Documents not relevant. Rewriting question...")
349
+ current_question = await self.rewrite_question(current_question)
350
+ rewrites += 1
351
+ print(f"Rewritten question: {current_question}")
352
+
353
+
354
+ # ============================================================================
355
+ # PIXIE APP
356
+ # ============================================================================
357
+
358
+
359
+ @pixie.app
360
+ async def openai_rag_agent() -> pixie.PixieGenerator[str, str]:
361
+ """Interactive agentic RAG system for questions about Lilian Weng's blog posts.
362
+
363
+ Uses OpenAI Responses API directly (without LangGraph/LangChain agent framework).
364
+
365
+ The agent:
366
+ 1. Decides whether to retrieve or respond directly
367
+ 2. Grades retrieved documents for relevance
368
+ 3. Rewrites questions if needed
369
+ 4. Generates answers based on context
370
+
371
+ Yields:
372
+ AI-generated responses
373
+
374
+ Receives:
375
+ User questions about the blog content via InputRequired
376
+ """
377
+ # Setup retriever (this will take a moment on first run)
378
+ retriever = setup_vectorstore()
379
+
380
+ # Create the RAG agent
381
+ agent = OpenAIRAGAgent(retriever, model="gpt-4o-mini")
382
+
383
+ yield "Hello! I can answer questions about Lilian Weng's blog posts on AI topics."
384
+ yield "Ask me anything about reward hacking, hallucination, or diffusion video!"
385
+ yield "(Type 'exit' to quit)"
386
+
387
+ while True:
388
+ # Get user question
389
+ user_question = yield pixie.InputRequired(str)
390
+
391
+ # Check for exit
392
+ if user_question.lower() in {"exit", "quit", "bye"}:
393
+ yield "Goodbye! Thanks for chatting!"
394
+ break
395
+
396
+ # Run the RAG workflow
397
+ answer = await agent.run(user_question)
398
+ yield answer
@@ -4,7 +4,7 @@ packages = [
4
4
  { include = "examples" }
5
5
  ]
6
6
 
7
- version = "0.2.6.dev5"
7
+ version = "0.2.10.dev1"
8
8
  description = "examples for using Pixie"
9
9
  authors = ["Yiou Li <yol@gopixie.ai>"]
10
10
  license = "MIT"
@@ -21,7 +21,7 @@ Changelog = "https://github.com/yiouli/pixie-examples/commits/main/"
21
21
  python = ">=3.11,<3.14"
22
22
  pydantic-ai-slim = "^1.39.0"
23
23
  pydantic = {version = "^2.7.4", extras = []}
24
- pixie-sdk = "^0.2.6"
24
+ pixie-sdk = "^0.2.9"
25
25
  openai-agents = "^0.6.5"
26
26
  pymarkdownlnt = "^0.9.34"
27
27
  langchain = "^1.2.3"