omgkit 2.5.1 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/plugin/skills/ai-engineering/SKILL.md +65 -0
- package/plugin/skills/ai-engineering/ai-agents/SKILL.md +157 -0
- package/plugin/skills/ai-engineering/ai-architecture/SKILL.md +133 -0
- package/plugin/skills/ai-engineering/ai-system-evaluation/SKILL.md +95 -0
- package/plugin/skills/ai-engineering/dataset-engineering/SKILL.md +135 -0
- package/plugin/skills/ai-engineering/evaluation-methodology/SKILL.md +93 -0
- package/plugin/skills/ai-engineering/finetuning/SKILL.md +133 -0
- package/plugin/skills/ai-engineering/foundation-models/SKILL.md +90 -0
- package/plugin/skills/ai-engineering/guardrails-safety/SKILL.md +153 -0
- package/plugin/skills/ai-engineering/inference-optimization/SKILL.md +150 -0
- package/plugin/skills/ai-engineering/prompt-engineering/SKILL.md +133 -0
- package/plugin/skills/ai-engineering/rag-systems/SKILL.md +137 -0
- package/plugin/skills/ai-engineering/user-feedback/SKILL.md +162 -0
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: rag-systems
|
|
3
|
+
description: Retrieval-Augmented Generation - chunking strategies, embedding, vector search, hybrid retrieval, reranking, query transformation. Use when building RAG pipelines, knowledge bases, or context-augmented applications.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# RAG Systems
|
|
7
|
+
|
|
8
|
+
Building Retrieval-Augmented Generation systems.
|
|
9
|
+
|
|
10
|
+
## RAG Architecture
|
|
11
|
+
|
|
12
|
+
```
|
|
13
|
+
INDEXING (Offline)
|
|
14
|
+
Documents → Chunking → Embedding → Vector DB
|
|
15
|
+
|
|
16
|
+
QUERYING (Online)
|
|
17
|
+
Query → Embed → Search → Retrieved Docs
|
|
18
|
+
↓
|
|
19
|
+
Response ← LLM ← Context + Query
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Retrieval Algorithms
|
|
23
|
+
|
|
24
|
+
### Term-Based (BM25)
|
|
25
|
+
```python
|
|
26
|
+
from rank_bm25 import BM25Okapi
|
|
27
|
+
|
|
28
|
+
tokenized_docs = [doc.split() for doc in documents]
|
|
29
|
+
bm25 = BM25Okapi(tokenized_docs)
|
|
30
|
+
scores = bm25.get_scores(query.split())
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
### Embedding-Based
|
|
34
|
+
```python
|
|
35
|
+
from sentence_transformers import SentenceTransformer
|
|
36
|
+
import faiss
|
|
37
|
+
|
|
38
|
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
39
|
+
embeddings = model.encode(documents)
|
|
40
|
+
|
|
41
|
+
index = faiss.IndexFlatIP(embeddings.shape[1])
|
|
42
|
+
faiss.normalize_L2(embeddings)
|
|
43
|
+
index.add(embeddings)
|
|
44
|
+
|
|
45
|
+
# Query
|
|
46
|
+
query_emb = model.encode([query])
|
|
47
|
+
faiss.normalize_L2(query_emb)
|
|
48
|
+
distances, indices = index.search(query_emb, k=5)
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### Hybrid Retrieval
|
|
52
|
+
```python
|
|
53
|
+
def hybrid_retrieve(query, k=5, alpha=0.5):
|
|
54
|
+
bm25_scores = normalize(bm25.get_scores(query.split()))
|
|
55
|
+
dense_scores = normalize(index.search(embed(query), len(docs))[0])
|
|
56
|
+
|
|
57
|
+
hybrid = alpha * bm25_scores + (1-alpha) * dense_scores
|
|
58
|
+
return [docs[i] for i in np.argsort(hybrid)[::-1][:k]]
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Chunking Strategies
|
|
62
|
+
|
|
63
|
+
### Fixed Size
|
|
64
|
+
```python
|
|
65
|
+
def fixed_chunk(text, size=500, overlap=50):
|
|
66
|
+
chunks = []
|
|
67
|
+
for i in range(0, len(text), size - overlap):
|
|
68
|
+
chunks.append(text[i:i+size])
|
|
69
|
+
return chunks
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Semantic Chunking
|
|
73
|
+
```python
|
|
74
|
+
def semantic_chunk(text, model, threshold=0.5):
|
|
75
|
+
sentences = sent_tokenize(text)
|
|
76
|
+
chunks, current = [], []
|
|
77
|
+
|
|
78
|
+
for sent in sentences:
|
|
79
|
+
current.append(sent)
|
|
80
|
+
if len(current) > 1:
|
|
81
|
+
sim = similarity(current[-2], current[-1], model)
|
|
82
|
+
if sim < threshold:
|
|
83
|
+
chunks.append(" ".join(current[:-1]))
|
|
84
|
+
current = [sent]
|
|
85
|
+
|
|
86
|
+
if current:
|
|
87
|
+
chunks.append(" ".join(current))
|
|
88
|
+
return chunks
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Retrieval Optimization
|
|
92
|
+
|
|
93
|
+
### Query Expansion
|
|
94
|
+
```python
|
|
95
|
+
def expand_query(query, model):
|
|
96
|
+
prompt = f"Generate 3 alternative phrasings:\n{query}"
|
|
97
|
+
return [query] + model.generate(prompt).split("\n")
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### HyDE (Hypothetical Document)
|
|
101
|
+
```python
|
|
102
|
+
def hyde(query, model):
|
|
103
|
+
prompt = f"Write a paragraph answering:\n{query}"
|
|
104
|
+
return model.generate(prompt) # Use this for retrieval
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### Reranking
|
|
108
|
+
```python
|
|
109
|
+
from sentence_transformers import CrossEncoder
|
|
110
|
+
|
|
111
|
+
reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
|
|
112
|
+
|
|
113
|
+
def rerank(query, docs, k=5):
|
|
114
|
+
pairs = [(query, doc) for doc in docs]
|
|
115
|
+
scores = reranker.predict(pairs)
|
|
116
|
+
return sorted(zip(docs, scores), key=lambda x: -x[1])[:k]
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## RAG Evaluation
|
|
120
|
+
|
|
121
|
+
```python
|
|
122
|
+
def rag_metrics(query, response, context, ground_truth):
|
|
123
|
+
return {
|
|
124
|
+
"retrieval_precision": precision(retrieved, relevant),
|
|
125
|
+
"retrieval_recall": recall(retrieved, relevant),
|
|
126
|
+
"answer_relevance": similarity(response, ground_truth),
|
|
127
|
+
"faithfulness": check_hallucination(response, context),
|
|
128
|
+
}
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## Best Practices
|
|
132
|
+
|
|
133
|
+
1. Use hybrid retrieval (BM25 + dense)
|
|
134
|
+
2. Add reranking for quality
|
|
135
|
+
3. Chunk with overlap (10-20%)
|
|
136
|
+
4. Experiment with chunk sizes (200-1000 tokens)
|
|
137
|
+
5. Evaluate retrieval separately from generation
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: user-feedback
|
|
3
|
+
description: Collecting and using user feedback - explicit/implicit signals, feedback analysis, improvement loops, A/B testing. Use when improving AI systems, understanding user satisfaction, or iterating on quality.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# User Feedback Skill
|
|
7
|
+
|
|
8
|
+
Leveraging feedback to improve AI systems.
|
|
9
|
+
|
|
10
|
+
## Feedback Collection
|
|
11
|
+
|
|
12
|
+
### Explicit Feedback
|
|
13
|
+
```python
|
|
14
|
+
class FeedbackCollector:
|
|
15
|
+
def collect_explicit(self, response_id, feedback):
|
|
16
|
+
self.db.save({
|
|
17
|
+
"type": "explicit",
|
|
18
|
+
"response_id": response_id,
|
|
19
|
+
"rating": feedback.get("rating"), # 1-5
|
|
20
|
+
"thumbs": feedback.get("thumbs"), # up/down
|
|
21
|
+
"comment": feedback.get("comment"),
|
|
22
|
+
"timestamp": datetime.now()
|
|
23
|
+
})
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
### Implicit Feedback
|
|
27
|
+
```python
|
|
28
|
+
def extract_implicit(conversation):
|
|
29
|
+
signals = []
|
|
30
|
+
|
|
31
|
+
for i, turn in enumerate(conversation[1:], 1):
|
|
32
|
+
prev = conversation[i-1]
|
|
33
|
+
|
|
34
|
+
# Negative signals
|
|
35
|
+
if is_correction(turn, prev):
|
|
36
|
+
signals.append(("correction", i))
|
|
37
|
+
if is_repetition(turn, prev):
|
|
38
|
+
signals.append(("repetition", i))
|
|
39
|
+
if is_abandonment(turn):
|
|
40
|
+
signals.append(("abandonment", i))
|
|
41
|
+
|
|
42
|
+
# Positive signals
|
|
43
|
+
if is_acceptance(turn, prev):
|
|
44
|
+
signals.append(("acceptance", i))
|
|
45
|
+
if is_follow_up(turn, prev):
|
|
46
|
+
signals.append(("engagement", i))
|
|
47
|
+
|
|
48
|
+
return signals
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### Natural Language Feedback
|
|
52
|
+
```python
|
|
53
|
+
def extract_from_text(turn, model):
|
|
54
|
+
prompt = f"""Extract feedback signal from user message.
|
|
55
|
+
|
|
56
|
+
Message: {turn}
|
|
57
|
+
|
|
58
|
+
Sentiment (positive/negative/neutral):
|
|
59
|
+
Specific issue (if any):
|
|
60
|
+
Suggestion (if any):"""
|
|
61
|
+
|
|
62
|
+
return model.generate(prompt)
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Feedback Analysis
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
class FeedbackAnalyzer:
|
|
69
|
+
def categorize(self, feedbacks):
|
|
70
|
+
prompt = f"""Categorize these feedback items:
|
|
71
|
+
|
|
72
|
+
{json.dumps(feedbacks)}
|
|
73
|
+
|
|
74
|
+
Categories:
|
|
75
|
+
1. Accuracy issues
|
|
76
|
+
2. Format issues
|
|
77
|
+
3. Relevance issues
|
|
78
|
+
4. Safety issues
|
|
79
|
+
5. Missing features
|
|
80
|
+
|
|
81
|
+
Summary:"""
|
|
82
|
+
return self.llm.generate(prompt)
|
|
83
|
+
|
|
84
|
+
def find_patterns(self, feedbacks):
|
|
85
|
+
# Cluster similar complaints
|
|
86
|
+
embeddings = [self.embed(f["text"]) for f in feedbacks]
|
|
87
|
+
clusters = self.cluster(embeddings)
|
|
88
|
+
|
|
89
|
+
patterns = {}
|
|
90
|
+
for cluster_id, indices in clusters.items():
|
|
91
|
+
cluster_feedback = [feedbacks[i] for i in indices]
|
|
92
|
+
patterns[cluster_id] = {
|
|
93
|
+
"count": len(cluster_feedback),
|
|
94
|
+
"summary": self.summarize(cluster_feedback),
|
|
95
|
+
"examples": cluster_feedback[:3]
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
return patterns
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## Improvement Loop
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
class FeedbackLoop:
|
|
105
|
+
def run_cycle(self):
|
|
106
|
+
# 1. Collect
|
|
107
|
+
recent = self.db.get_recent(days=7)
|
|
108
|
+
analysis = self.analyze(recent)
|
|
109
|
+
|
|
110
|
+
# 2. Identify improvements
|
|
111
|
+
if analysis["accuracy_issues"] > threshold:
|
|
112
|
+
training_data = self.create_training_data(
|
|
113
|
+
analysis["corrections"]
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
# 3. Improve
|
|
117
|
+
if len(training_data) > 1000:
|
|
118
|
+
self.finetune(training_data)
|
|
119
|
+
else:
|
|
120
|
+
self.update_prompts(analysis)
|
|
121
|
+
|
|
122
|
+
# 4. Evaluate
|
|
123
|
+
metrics = self.evaluate(self.test_set)
|
|
124
|
+
|
|
125
|
+
# 5. Deploy if improved
|
|
126
|
+
if metrics["quality"] > self.baseline:
|
|
127
|
+
self.deploy()
|
|
128
|
+
|
|
129
|
+
return metrics
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
## A/B Testing
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
class ABTest:
|
|
136
|
+
def __init__(self, variants):
|
|
137
|
+
self.variants = variants
|
|
138
|
+
self.results = {v: {"count": 0, "positive": 0} for v in variants}
|
|
139
|
+
|
|
140
|
+
def assign(self, user_id):
|
|
141
|
+
# Consistent assignment
|
|
142
|
+
return self.variants[hash(user_id) % len(self.variants)]
|
|
143
|
+
|
|
144
|
+
def record(self, user_id, positive):
|
|
145
|
+
variant = self.assign(user_id)
|
|
146
|
+
self.results[variant]["count"] += 1
|
|
147
|
+
if positive:
|
|
148
|
+
self.results[variant]["positive"] += 1
|
|
149
|
+
|
|
150
|
+
def analyze(self):
|
|
151
|
+
for variant, data in self.results.items():
|
|
152
|
+
rate = data["positive"] / max(data["count"], 1)
|
|
153
|
+
print(f"{variant}: {rate:.2%} ({data['count']} samples)")
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
## Best Practices
|
|
157
|
+
|
|
158
|
+
1. Collect both explicit and implicit feedback
|
|
159
|
+
2. Analyze patterns, not individual feedback
|
|
160
|
+
3. Close the loop (feedback → improvement)
|
|
161
|
+
4. A/B test changes
|
|
162
|
+
5. Monitor long-term trends
|