@musashishao/agent-kit 1.8.1 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/.agent/agents/ai-architect.md +39 -0
  2. package/.agent/agents/cloud-engineer.md +39 -0
  3. package/.agent/agents/game-asset-curator.md +317 -0
  4. package/.agent/agents/game-developer.md +190 -89
  5. package/.agent/agents/game-narrative-designer.md +310 -0
  6. package/.agent/agents/game-qa-agent.md +441 -0
  7. package/.agent/agents/marketing-specialist.md +41 -0
  8. package/.agent/agents/penetration-tester.md +15 -1
  9. package/.agent/rules/CODEX.md +26 -2
  10. package/.agent/rules/GEMINI.md +7 -5
  11. package/.agent/rules/REFERENCE.md +92 -2
  12. package/.agent/scripts/ak_cli.py +1 -1
  13. package/.agent/scripts/localize_workflows.py +54 -0
  14. package/.agent/scripts/memory_manager.py +24 -1
  15. package/.agent/skills/3d-web-experience/SKILL.md +386 -0
  16. package/.agent/skills/DEPENDENCIES.md +54 -0
  17. package/.agent/skills/ab-test-setup/SKILL.md +77 -0
  18. package/.agent/skills/active-directory-attacks/SKILL.md +59 -0
  19. package/.agent/skills/agent-evaluation/SKILL.md +430 -0
  20. package/.agent/skills/agent-memory-systems/SKILL.md +426 -0
  21. package/.agent/skills/agent-tool-builder/SKILL.md +139 -0
  22. package/.agent/skills/ai-agents-architect/SKILL.md +115 -0
  23. package/.agent/skills/ai-product/SKILL.md +86 -0
  24. package/.agent/skills/ai-wrapper-product/SKILL.md +90 -0
  25. package/.agent/skills/analytics-tracking/SKILL.md +88 -0
  26. package/.agent/skills/api-fuzzing-bug-bounty/SKILL.md +66 -0
  27. package/.agent/skills/app-store-optimization/SKILL.md +66 -0
  28. package/.agent/skills/autonomous-agent-patterns/SKILL.md +414 -0
  29. package/.agent/skills/aws-penetration-testing/SKILL.md +50 -0
  30. package/.agent/skills/aws-serverless/SKILL.md +327 -0
  31. package/.agent/skills/azure-functions/SKILL.md +340 -0
  32. package/.agent/skills/broken-authentication/SKILL.md +53 -0
  33. package/.agent/skills/browser-automation/SKILL.md +408 -0
  34. package/.agent/skills/browser-extension-builder/SKILL.md +422 -0
  35. package/.agent/skills/bullmq-specialist/SKILL.md +424 -0
  36. package/.agent/skills/bun-development/SKILL.md +386 -0
  37. package/.agent/skills/burp-suite-testing/SKILL.md +60 -0
  38. package/.agent/skills/clerk-auth/SKILL.md +432 -0
  39. package/.agent/skills/cloud-penetration-testing/SKILL.md +51 -0
  40. package/.agent/skills/copywriting/SKILL.md +66 -0
  41. package/.agent/skills/crewai/SKILL.md +470 -0
  42. package/.agent/skills/discord-bot-architect/SKILL.md +447 -0
  43. package/.agent/skills/email-sequence/SKILL.md +73 -0
  44. package/.agent/skills/ethical-hacking-methodology/SKILL.md +67 -0
  45. package/.agent/skills/firebase/SKILL.md +377 -0
  46. package/.agent/skills/game-development/godot-expert/SKILL.md +462 -0
  47. package/.agent/skills/game-development/npc-ai-integration/SKILL.md +110 -0
  48. package/.agent/skills/game-development/procedural-generation/SKILL.md +168 -0
  49. package/.agent/skills/game-development/unity-integration/SKILL.md +358 -0
  50. package/.agent/skills/game-development/webgpu-shading/SKILL.md +209 -0
  51. package/.agent/skills/gcp-cloud-run/SKILL.md +358 -0
  52. package/.agent/skills/graphql/SKILL.md +492 -0
  53. package/.agent/skills/idor-testing/SKILL.md +64 -0
  54. package/.agent/skills/inngest/SKILL.md +128 -0
  55. package/.agent/skills/langfuse/SKILL.md +415 -0
  56. package/.agent/skills/langgraph/SKILL.md +360 -0
  57. package/.agent/skills/launch-strategy/SKILL.md +68 -0
  58. package/.agent/skills/linux-privilege-escalation/SKILL.md +62 -0
  59. package/.agent/skills/llm-app-patterns/SKILL.md +367 -0
  60. package/.agent/skills/marketing-ideas/SKILL.md +66 -0
  61. package/.agent/skills/metasploit-framework/SKILL.md +60 -0
  62. package/.agent/skills/micro-saas-launcher/SKILL.md +93 -0
  63. package/.agent/skills/neon-postgres/SKILL.md +339 -0
  64. package/.agent/skills/paid-ads/SKILL.md +64 -0
  65. package/.agent/skills/supabase-integration/SKILL.md +411 -0
  66. package/.agent/workflows/ai-agent.md +36 -0
  67. package/.agent/workflows/autofix.md +1 -0
  68. package/.agent/workflows/brainstorm.md +1 -0
  69. package/.agent/workflows/context.md +1 -0
  70. package/.agent/workflows/create.md +1 -0
  71. package/.agent/workflows/dashboard.md +1 -0
  72. package/.agent/workflows/debug.md +1 -0
  73. package/.agent/workflows/deploy.md +1 -0
  74. package/.agent/workflows/enhance.md +1 -0
  75. package/.agent/workflows/game-prototype.md +154 -0
  76. package/.agent/workflows/marketing.md +37 -0
  77. package/.agent/workflows/next.md +1 -0
  78. package/.agent/workflows/orchestrate.md +1 -0
  79. package/.agent/workflows/pentest.md +37 -0
  80. package/.agent/workflows/plan.md +1 -0
  81. package/.agent/workflows/preview.md +2 -1
  82. package/.agent/workflows/quality.md +1 -0
  83. package/.agent/workflows/saas.md +36 -0
  84. package/.agent/workflows/spec.md +1 -0
  85. package/.agent/workflows/status.md +1 -0
  86. package/.agent/workflows/test.md +1 -0
  87. package/.agent/workflows/ui-ux-pro-max.md +1 -0
  88. package/README.md +52 -24
  89. package/bin/cli.js +68 -3
  90. package/docs/CHANGELOG_AI_INFRA.md +30 -0
  91. package/docs/MIGRATION_GUIDE_V1.9.md +55 -0
  92. package/package.json +1 -1
@@ -0,0 +1,415 @@
1
+ ---
2
+ name: langfuse
3
+ description: "Langfuse LLM observability and tracing. Track, debug, and analyze LLM applications with detailed traces, user feedback, and cost monitoring. Integrates with OpenAI, LangChain, and custom LLM calls."
4
+ version: "1.0.0"
5
+ source: "antigravity-awesome-skills (adapted)"
6
+ ---
7
+
8
+ # 📊 Langfuse
9
+
10
+ **Role**: LLM Observability Expert
11
+
12
+ You are an expert in LLM observability using Langfuse. You understand that production LLM applications need visibility into performance, costs, and quality. You instrument applications properly, track user feedback, and use data to improve prompts.
13
+
14
+ ---
15
+
16
+ ## When to Use This Skill
17
+
18
+ - Setting up LLM observability for production
19
+ - Debugging LLM application issues
20
+ - Tracking costs and performance metrics
21
+ - Collecting user feedback for improvement
22
+ - A/B testing prompts in production
23
+
24
+ ---
25
+
26
+ ## Capabilities
27
+
28
+ - `langfuse`
29
+ - `llm-tracing`
30
+ - `llm-observability`
31
+ - `prompt-management`
32
+ - `user-feedback`
33
+ - `cost-tracking`
34
+ - `evaluation`
35
+
36
+ ---
37
+
38
+ ## Requirements
39
+
40
+ ```bash
41
+ pip install langfuse
42
+ ```
43
+
44
+ ```python
45
+ # Environment variables
46
+ LANGFUSE_PUBLIC_KEY="pk-..."
47
+ LANGFUSE_SECRET_KEY="sk-..."
48
+ LANGFUSE_HOST="https://cloud.langfuse.com" # or self-hosted
49
+ ```
50
+
51
+ ---
52
+
53
+ ## 1. Core Concepts
54
+
55
+ ### Tracing Hierarchy
56
+
57
+ ```
58
+ ┌─────────────────────────────────────────────────────────────┐
59
+ │ TRACE │
60
+ │ (One user request / conversation turn) │
61
+ │ │
62
+ │ ┌─────────────────────────────────────────────────────┐ │
63
+ │ │ SPAN │ │
64
+ │ │ (A logical operation within the trace) │ │
65
+ │ │ │ │
66
+ │ │ ┌──────────────┐ ┌──────────────┐ │ │
67
+ │ │ │ GENERATION │ │ GENERATION │ │ │
68
+ │ │ │ (LLM call) │ │ (LLM call) │ │ │
69
+ │ │ └──────────────┘ └──────────────┘ │ │
70
+ │ └─────────────────────────────────────────────────────┘ │
71
+ │ │
72
+ │ ┌─────────────────────────────────────────────────────┐ │
73
+ │ │ SCORE │ │
74
+ │ │ (User feedback, evaluation result) │ │
75
+ │ └─────────────────────────────────────────────────────┘ │
76
+ └─────────────────────────────────────────────────────────────┘
77
+ ```
78
+
79
+ | Component | Description |
80
+ |-----------|-------------|
81
+ | **Trace** | Top-level container for a user request |
82
+ | **Span** | Logical operation within a trace |
83
+ | **Generation** | A single LLM call with input/output |
84
+ | **Score** | Feedback or evaluation attached to trace |
85
+
86
+ ---
87
+
88
+ ## 2. Patterns
89
+
90
+ ### 2.1 Basic Tracing Setup
91
+
92
+ Manual instrumentation for any LLM.
93
+
94
+ ```python
95
+ from langfuse import Langfuse
96
+ import openai
97
+
98
+ # Initialize client
99
+ langfuse = Langfuse(
100
+ public_key="pk-...",
101
+ secret_key="sk-...",
102
+ host="https://cloud.langfuse.com"
103
+ )
104
+
105
+ def chat_with_tracing(user_message: str, user_id: str, session_id: str):
106
+ # Create a trace for this request
107
+ trace = langfuse.trace(
108
+ name="chat-completion",
109
+ user_id=user_id,
110
+ session_id=session_id,
111
+ metadata={"feature": "customer-support"},
112
+ tags=["production", "v2"]
113
+ )
114
+
115
+ # Log the generation (LLM call)
116
+ generation = trace.generation(
117
+ name="gpt-4o-response",
118
+ model="gpt-4o",
119
+ model_parameters={"temperature": 0.7},
120
+ input={"messages": [{"role": "user", "content": user_message}]},
121
+ metadata={"attempt": 1}
122
+ )
123
+
124
+ # Make actual LLM call
125
+ response = openai.chat.completions.create(
126
+ model="gpt-4o",
127
+ messages=[{"role": "user", "content": user_message}]
128
+ )
129
+
130
+ # Complete the generation with output
131
+ generation.end(
132
+ output=response.choices[0].message.content,
133
+ usage={
134
+ "input": response.usage.prompt_tokens,
135
+ "output": response.usage.completion_tokens
136
+ }
137
+ )
138
+
139
+ return response.choices[0].message.content, trace.id
140
+
141
+ # Score the trace based on user feedback
142
+ def record_feedback(trace_id: str, is_helpful: bool):
143
+ langfuse.score(
144
+ trace_id=trace_id,
145
+ name="user-feedback",
146
+ value=1 if is_helpful else 0,
147
+ comment="User clicked helpful" if is_helpful else "User clicked not helpful"
148
+ )
149
+
150
+ # IMPORTANT: Flush before exit (especially in serverless)
151
+ langfuse.flush()
152
+ ```
153
+
154
+ ### 2.2 OpenAI Integration (Drop-in)
155
+
156
+ Automatic tracing with OpenAI SDK.
157
+
158
+ ```python
159
+ from langfuse.openai import openai # Drop-in replacement
160
+
161
+ # All calls automatically traced!
162
+ response = openai.chat.completions.create(
163
+ model="gpt-4o",
164
+ messages=[{"role": "user", "content": "Hello"}],
165
+
166
+ # Langfuse-specific parameters
167
+ name="greeting",
168
+ session_id="session-123",
169
+ user_id="user-456",
170
+ tags=["test"],
171
+ metadata={"feature": "chat"}
172
+ )
173
+
174
+ # Works with streaming
175
+ stream = openai.chat.completions.create(
176
+ model="gpt-4o",
177
+ messages=[{"role": "user", "content": "Tell me a story"}],
178
+ stream=True,
179
+ name="story-generation"
180
+ )
181
+
182
+ for chunk in stream:
183
+ print(chunk.choices[0].delta.content, end="")
184
+
185
+ # Works with async
186
+ from langfuse.openai import AsyncOpenAI
187
+
188
+ async_client = AsyncOpenAI()
189
+
190
+ async def main():
191
+ response = await async_client.chat.completions.create(
192
+ model="gpt-4o",
193
+ messages=[{"role": "user", "content": "Hello"}],
194
+ name="async-greeting"
195
+ )
196
+ ```
197
+
198
+ ### 2.3 LangChain Integration
199
+
200
+ ```python
201
+ from langchain_openai import ChatOpenAI
202
+ from langchain_core.prompts import ChatPromptTemplate
203
+ from langfuse.callback import CallbackHandler
204
+
205
+ # Create Langfuse callback handler
206
+ langfuse_handler = CallbackHandler(
207
+ public_key="pk-...",
208
+ secret_key="sk-...",
209
+ host="https://cloud.langfuse.com",
210
+ session_id="session-123",
211
+ user_id="user-456"
212
+ )
213
+
214
+ # Use with any LangChain component
215
+ llm = ChatOpenAI(model="gpt-4o")
216
+
217
+ prompt = ChatPromptTemplate.from_messages([
218
+ ("system", "You are a helpful assistant."),
219
+ ("user", "{input}")
220
+ ])
221
+
222
+ chain = prompt | llm
223
+
224
+ # Pass handler to invoke
225
+ response = chain.invoke(
226
+ {"input": "Hello"},
227
+ config={"callbacks": [langfuse_handler]}
228
+ )
229
+
230
+ # Works with agents, retrievers, etc.
231
+ from langchain.agents import create_openai_tools_agent, AgentExecutor
232
+
233
+ agent = create_openai_tools_agent(llm, tools, prompt)
234
+ agent_executor = AgentExecutor(agent=agent, tools=tools)
235
+
236
+ result = agent_executor.invoke(
237
+ {"input": "What's the weather?"},
238
+ config={"callbacks": [langfuse_handler]}
239
+ )
240
+ ```
241
+
242
+ ### 2.4 Decorator Pattern
243
+
244
+ Clean tracing with decorators.
245
+
246
+ ```python
247
+ from langfuse.decorators import observe, langfuse_context
248
+
249
+ @observe() # Automatically creates trace
250
+ def process_request(user_input: str):
251
+ # Add metadata to current trace
252
+ langfuse_context.update_current_trace(
253
+ user_id="user-123",
254
+ tags=["production"]
255
+ )
256
+
257
+ # Nested spans created automatically
258
+ result = analyze(user_input)
259
+ response = generate_response(result)
260
+
261
+ return response
262
+
263
+ @observe() # Creates child span
264
+ def analyze(text: str):
265
+ # Analysis logic
266
+ return {"sentiment": "positive"}
267
+
268
+ @observe() # Creates child span
269
+ def generate_response(analysis: dict):
270
+ # Use LLM
271
+ response = openai.chat.completions.create(...)
272
+ return response.choices[0].message.content
273
+
274
+ # Evaluate and score
275
+ @observe()
276
+ def evaluate_response(response: str, expected: str):
277
+ score = calculate_similarity(response, expected)
278
+
279
+ langfuse_context.score_current_trace(
280
+ name="accuracy",
281
+ value=score
282
+ )
283
+
284
+ return score
285
+ ```
286
+
287
+ ### 2.5 Prompt Management
288
+
289
+ Version and A/B test prompts in production.
290
+
291
+ ```python
292
+ from langfuse import Langfuse
293
+
294
+ langfuse = Langfuse()
295
+
296
+ # Fetch prompt from Langfuse (versioned)
297
+ prompt = langfuse.get_prompt("customer-support-v2")
298
+
299
+ # Use the prompt
300
+ messages = prompt.compile(
301
+ customer_name="John",
302
+ issue="billing"
303
+ )
304
+
305
+ # Prompt is automatically linked to trace
306
+ response = openai.chat.completions.create(
307
+ model="gpt-4o",
308
+ messages=messages,
309
+ langfuse_prompt=prompt # Links prompt to generation
310
+ )
311
+
312
+ # In Langfuse UI:
313
+ # - See which prompt version was used
314
+ # - Compare performance across versions
315
+ # - A/B test prompts
316
+ ```
317
+
318
+ ---
319
+
320
+ ## 3. Metrics to Track
321
+
322
+ ### Dashboard Metrics
323
+
324
+ | Metric | Description | Target |
325
+ |--------|-------------|--------|
326
+ | **Latency P50** | Median response time | < 2s |
327
+ | **Latency P99** | 99th percentile | < 10s |
328
+ | **Token Usage** | Avg tokens per request | Monitor |
329
+ | **Cost per Request** | $ per API call | Optimize |
330
+ | **Error Rate** | % failed requests | < 1% |
331
+ | **User Satisfaction** | Feedback score | > 80% |
332
+
333
+ ### Custom Scores
334
+
335
+ ```python
336
+ # Numeric scores
337
+ langfuse.score(trace_id=trace_id, name="accuracy", value=0.95)
338
+ langfuse.score(trace_id=trace_id, name="relevance", value=0.88)
339
+
340
+ # Categorical scores
341
+ langfuse.score(trace_id=trace_id, name="quality", value="good")
342
+
343
+ # Boolean scores
344
+ langfuse.score(trace_id=trace_id, name="hallucination", value=0)
345
+ ```
346
+
347
+ ---
348
+
349
+ ## 4. Anti-Patterns
350
+
351
+ ### ❌ Not Flushing in Serverless
352
+
353
+ ```python
354
+ # WRONG: Traces lost in serverless
355
+ def handler(event, context):
356
+ response = call_llm()
357
+ return response # Function ends, traces not sent!
358
+
359
+ # CORRECT: Always flush
360
+ def handler(event, context):
361
+ response = call_llm()
362
+ langfuse.flush() # Send traces before exit
363
+ return response
364
+ ```
365
+
366
+ ### ❌ Tracing Everything
367
+
368
+ ```python
369
+ # WRONG: Trace every internal function
370
+ @observe()
371
+ def helper_function(): # Noise!
372
+ pass
373
+
374
+ # CORRECT: Trace meaningful operations
375
+ @observe()
376
+ def process_user_request(): # Meaningful
377
+ pass
378
+ ```
379
+
380
+ ### ❌ No User/Session IDs
381
+
382
+ ```python
383
+ # WRONG: Anonymous traces
384
+ trace = langfuse.trace(name="chat") # Can't group!
385
+
386
+ # CORRECT: Include identifiers
387
+ trace = langfuse.trace(
388
+ name="chat",
389
+ user_id="user-123", # Group by user
390
+ session_id="session-456" # Group by session
391
+ )
392
+ ```
393
+
394
+ ---
395
+
396
+ ## 5. Production Checklist
397
+
398
+ | Check | Status |
399
+ |-------|--------|
400
+ | ✅ Traces have user_id and session_id | |
401
+ | ✅ All LLM calls are generations | |
402
+ | ✅ Flush called in serverless | |
403
+ | ✅ User feedback collected | |
404
+ | ✅ Costs monitored | |
405
+ | ✅ Error handling doesn't break traces | |
406
+ | ✅ PII redacted from traces | |
407
+
408
+ ---
409
+
410
+ ## Related Skills
411
+
412
+ - `llm-app-patterns` - LLM architecture patterns
413
+ - `observability-patterns` - General observability
414
+ - `opentelemetry-expert` - OpenTelemetry tracing
415
+ - `langgraph` - Agent frameworks