devrel-origin 0.2.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- devrel_origin/__init__.py +15 -0
- devrel_origin/cli/__init__.py +92 -0
- devrel_origin/cli/_common.py +243 -0
- devrel_origin/cli/analytics.py +28 -0
- devrel_origin/cli/argus.py +497 -0
- devrel_origin/cli/auth.py +227 -0
- devrel_origin/cli/config.py +108 -0
- devrel_origin/cli/content.py +259 -0
- devrel_origin/cli/cost.py +108 -0
- devrel_origin/cli/cro.py +298 -0
- devrel_origin/cli/deliverables.py +65 -0
- devrel_origin/cli/docs.py +91 -0
- devrel_origin/cli/doctor.py +178 -0
- devrel_origin/cli/experiment.py +29 -0
- devrel_origin/cli/growth.py +97 -0
- devrel_origin/cli/init.py +472 -0
- devrel_origin/cli/intel.py +27 -0
- devrel_origin/cli/kb.py +96 -0
- devrel_origin/cli/listen.py +31 -0
- devrel_origin/cli/marketing.py +66 -0
- devrel_origin/cli/migrate.py +45 -0
- devrel_origin/cli/run.py +46 -0
- devrel_origin/cli/sales.py +57 -0
- devrel_origin/cli/schedule.py +62 -0
- devrel_origin/cli/synthesize.py +28 -0
- devrel_origin/cli/triage.py +29 -0
- devrel_origin/cli/video.py +35 -0
- devrel_origin/core/__init__.py +58 -0
- devrel_origin/core/agent_config.py +75 -0
- devrel_origin/core/argus.py +964 -0
- devrel_origin/core/atlas.py +1450 -0
- devrel_origin/core/base.py +372 -0
- devrel_origin/core/cyra.py +563 -0
- devrel_origin/core/dex.py +708 -0
- devrel_origin/core/echo.py +614 -0
- devrel_origin/core/growth/__init__.py +27 -0
- devrel_origin/core/growth/recommendations.py +219 -0
- devrel_origin/core/growth/target_kinds.py +51 -0
- devrel_origin/core/iris.py +513 -0
- devrel_origin/core/kai.py +1367 -0
- devrel_origin/core/llm.py +542 -0
- devrel_origin/core/llm_backends.py +274 -0
- devrel_origin/core/mox.py +514 -0
- devrel_origin/core/nova.py +349 -0
- devrel_origin/core/pax.py +1205 -0
- devrel_origin/core/rex.py +532 -0
- devrel_origin/core/sage.py +486 -0
- devrel_origin/core/sentinel.py +385 -0
- devrel_origin/core/types.py +98 -0
- devrel_origin/core/video/__init__.py +22 -0
- devrel_origin/core/video/assembler.py +131 -0
- devrel_origin/core/video/browser_recorder.py +118 -0
- devrel_origin/core/video/desktop_recorder.py +254 -0
- devrel_origin/core/video/overlay_renderer.py +143 -0
- devrel_origin/core/video/script_parser.py +147 -0
- devrel_origin/core/video/tts_engine.py +82 -0
- devrel_origin/core/vox.py +268 -0
- devrel_origin/core/watchdog.py +321 -0
- devrel_origin/project/__init__.py +1 -0
- devrel_origin/project/config.py +75 -0
- devrel_origin/project/cost_sink.py +61 -0
- devrel_origin/project/init.py +104 -0
- devrel_origin/project/paths.py +75 -0
- devrel_origin/project/state.py +241 -0
- devrel_origin/project/templates/__init__.py +4 -0
- devrel_origin/project/templates/config.toml +24 -0
- devrel_origin/project/templates/devrel.gitignore +10 -0
- devrel_origin/project/templates/slop-blocklist.md +45 -0
- devrel_origin/project/templates/style.md +24 -0
- devrel_origin/project/templates/voice.md +29 -0
- devrel_origin/quality/__init__.py +66 -0
- devrel_origin/quality/editorial.py +357 -0
- devrel_origin/quality/persona.py +84 -0
- devrel_origin/quality/readability.py +148 -0
- devrel_origin/quality/slop.py +167 -0
- devrel_origin/quality/style.py +110 -0
- devrel_origin/quality/voice.py +15 -0
- devrel_origin/tools/__init__.py +9 -0
- devrel_origin/tools/analytics.py +304 -0
- devrel_origin/tools/api_client.py +393 -0
- devrel_origin/tools/apollo_client.py +305 -0
- devrel_origin/tools/code_validator.py +428 -0
- devrel_origin/tools/github_tools.py +297 -0
- devrel_origin/tools/instantly_client.py +412 -0
- devrel_origin/tools/kb_harvester.py +340 -0
- devrel_origin/tools/mcp_server.py +578 -0
- devrel_origin/tools/notifications.py +245 -0
- devrel_origin/tools/run_report.py +193 -0
- devrel_origin/tools/scheduler.py +231 -0
- devrel_origin/tools/search_tools.py +321 -0
- devrel_origin/tools/self_improve.py +168 -0
- devrel_origin/tools/sheets.py +236 -0
- devrel_origin-0.2.14.dist-info/METADATA +354 -0
- devrel_origin-0.2.14.dist-info/RECORD +98 -0
- devrel_origin-0.2.14.dist-info/WHEEL +5 -0
- devrel_origin-0.2.14.dist-info/entry_points.txt +2 -0
- devrel_origin-0.2.14.dist-info/licenses/LICENSE +21 -0
- devrel_origin-0.2.14.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,513 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Iris — Feedback Synthesizer Agent
|
|
3
|
+
|
|
4
|
+
Extracts themes from developer feedback across GitHub, Discourse, and support
|
|
5
|
+
channels. Ranks pain points and maps the developer journey.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import logging
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any, Optional
|
|
13
|
+
|
|
14
|
+
from devrel_origin.core.base import strip_markdown_fences
|
|
15
|
+
from devrel_origin.core.llm import LLMClient
|
|
16
|
+
from devrel_origin.tools.api_client import PostHogClient
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
# Max signals to send in a single LLM call to avoid oversized/truncated responses
|
|
21
|
+
_MAX_SIGNALS_PER_CALL = 30
|
|
22
|
+
|
|
23
|
+
# Jaccard similarity threshold for merging near-duplicate themes.
|
|
24
|
+
# Calibrated for theme titles in the 4-8 word range (typical LLM output).
|
|
25
|
+
# Two themes whose normalized title token sets share >= 50% are merged.
|
|
26
|
+
# Lower this if you see near-duplicate themes proliferating; raise it if
|
|
27
|
+
# distinct themes are being incorrectly merged.
|
|
28
|
+
SIMILARITY_THRESHOLD = 0.5
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _safe_json_loads(text: str) -> dict:
|
|
32
|
+
"""Parse JSON from LLM output with regex fallback for malformed responses."""
|
|
33
|
+
# Try direct parse first
|
|
34
|
+
try:
|
|
35
|
+
return json.loads(text)
|
|
36
|
+
except json.JSONDecodeError:
|
|
37
|
+
pass
|
|
38
|
+
|
|
39
|
+
# Fallback: find the outermost JSON object via brace matching
|
|
40
|
+
start = text.find("{")
|
|
41
|
+
if start == -1:
|
|
42
|
+
raise json.JSONDecodeError("No JSON object found", text, 0)
|
|
43
|
+
|
|
44
|
+
depth = 0
|
|
45
|
+
in_string = False
|
|
46
|
+
escape = False
|
|
47
|
+
for i in range(start, len(text)):
|
|
48
|
+
c = text[i]
|
|
49
|
+
if escape:
|
|
50
|
+
escape = False
|
|
51
|
+
continue
|
|
52
|
+
if c == "\\":
|
|
53
|
+
escape = True
|
|
54
|
+
continue
|
|
55
|
+
if c == '"':
|
|
56
|
+
in_string = not in_string
|
|
57
|
+
continue
|
|
58
|
+
if in_string:
|
|
59
|
+
continue
|
|
60
|
+
if c == "{":
|
|
61
|
+
depth += 1
|
|
62
|
+
elif c == "}":
|
|
63
|
+
depth -= 1
|
|
64
|
+
if depth == 0:
|
|
65
|
+
try:
|
|
66
|
+
return json.loads(text[start : i + 1])
|
|
67
|
+
except json.JSONDecodeError:
|
|
68
|
+
break
|
|
69
|
+
|
|
70
|
+
raise json.JSONDecodeError("Could not extract valid JSON", text, 0)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@dataclass
|
|
74
|
+
class FeedbackTheme:
|
|
75
|
+
"""A recurring theme extracted from developer feedback."""
|
|
76
|
+
|
|
77
|
+
theme_id: str
|
|
78
|
+
title: str
|
|
79
|
+
description: str
|
|
80
|
+
frequency: int # Number of mentions
|
|
81
|
+
severity: float # 1-10 scale
|
|
82
|
+
composite_score: float # frequency * severity
|
|
83
|
+
sources: list[str] # Where this theme appeared
|
|
84
|
+
representative_quotes: list[str]
|
|
85
|
+
product_areas: list[str]
|
|
86
|
+
recommended_actions: list[str]
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@dataclass
|
|
90
|
+
class DeveloperJourneyStage:
|
|
91
|
+
"""Pain points mapped to a stage in the developer journey."""
|
|
92
|
+
|
|
93
|
+
stage: str # discovery, evaluation, onboarding, integration, scaling
|
|
94
|
+
pain_points: list[str]
|
|
95
|
+
friction_score: float # 1-10
|
|
96
|
+
drop_off_risk: str # low, medium, high
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
@dataclass
|
|
100
|
+
class FeedbackSynthesis:
|
|
101
|
+
"""Complete feedback synthesis report."""
|
|
102
|
+
|
|
103
|
+
period: str
|
|
104
|
+
total_signals: int
|
|
105
|
+
themes: list[FeedbackTheme]
|
|
106
|
+
journey_map: list[DeveloperJourneyStage]
|
|
107
|
+
product_recommendations: list[dict[str, str]]
|
|
108
|
+
content_opportunities: list[str]
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class Iris:
|
|
112
|
+
"""
|
|
113
|
+
Feedback Synthesizer agent for cross-channel developer insight extraction.
|
|
114
|
+
|
|
115
|
+
Capabilities:
|
|
116
|
+
- Extract recurring themes from GitHub issues, Discourse, and support
|
|
117
|
+
- Rank pain points by frequency x severity composite score
|
|
118
|
+
- Map pain points to developer journey stages
|
|
119
|
+
- Generate product recommendations backed by evidence
|
|
120
|
+
- Identify content opportunities (tutorials that would address top pain points)
|
|
121
|
+
|
|
122
|
+
Tools:
|
|
123
|
+
1. github_issues_analyzer — Batch-analyze issue titles/bodies for themes
|
|
124
|
+
2. discourse_fetcher — Pull recent Discourse posts and replies
|
|
125
|
+
3. support_ticket_reader — Read support channel messages
|
|
126
|
+
4. theme_extractor — NLP-based theme clustering
|
|
127
|
+
5. sentiment_aggregator — Aggregate sentiment across sources
|
|
128
|
+
6. pain_point_ranker — Score pain points by frequency x severity
|
|
129
|
+
7. journey_mapper — Map pain points to developer journey stages
|
|
130
|
+
8. quote_selector — Select representative quotes for each theme
|
|
131
|
+
9. product_recommender — Generate evidence-backed product recommendations
|
|
132
|
+
10. content_gap_finder — Identify tutorials/docs that would address pain points
|
|
133
|
+
11. trend_detector — Compare themes week-over-week for emerging issues
|
|
134
|
+
12. report_compiler — Generate the final synthesis document
|
|
135
|
+
"""
|
|
136
|
+
|
|
137
|
+
SYSTEM_PROMPT = """You are Iris, a feedback synthesizer for OpenClaw. You analyze
|
|
138
|
+
developer feedback from multiple channels to extract actionable insights.
|
|
139
|
+
|
|
140
|
+
Your synthesis principles:
|
|
141
|
+
1. EVIDENCE-BASED — Every theme must be backed by specific quotes and counts
|
|
142
|
+
2. ACTIONABLE — Don't just describe problems, recommend solutions
|
|
143
|
+
3. JOURNEY-AWARE — Map pain points to where developers are in their journey
|
|
144
|
+
4. PRIORITIZED — Rank by composite score (frequency x severity), not just volume
|
|
145
|
+
5. CROSS-CHANNEL — Same theme in GitHub AND Discourse is stronger than either alone
|
|
146
|
+
|
|
147
|
+
Developer journey stages for OpenClaw:
|
|
148
|
+
1. Discovery — Finding the agent system, comparing to manual DevRel or competitors (Orbit, Common Room)
|
|
149
|
+
2. Evaluation — Cloning the repo, reading docs, running a single agent
|
|
150
|
+
3. Onboarding — Configuring knowledge base, connecting APIs, running first weekly cycle
|
|
151
|
+
4. Integration — Customizing agent prompts, adding MCP tools, tuning scoring/eval
|
|
152
|
+
5. Scaling — Team rollout, multi-product deployment, advanced orchestration configurations
|
|
153
|
+
|
|
154
|
+
Pain point severity scale:
|
|
155
|
+
- 1-3: Minor friction (confusing docs, UI annoyance)
|
|
156
|
+
- 4-6: Moderate blocker (feature gap, integration difficulty)
|
|
157
|
+
- 7-9: Major blocker (data loss risk, performance at scale)
|
|
158
|
+
- 10: Critical (security issue, complete failure)"""
|
|
159
|
+
|
|
160
|
+
JOURNEY_KEYWORDS: dict[str, list[str]] = {
|
|
161
|
+
"discovery": ["comparison", "alternative", "vs", "evaluate"],
|
|
162
|
+
"evaluation": ["docs", "documentation", "tutorial", "example", "trial"],
|
|
163
|
+
"onboarding": ["install", "setup", "init", "gateway", "first message", "getting started"],
|
|
164
|
+
"integration": ["skill", "plugin", "voice", "channel", "provider", "llm"],
|
|
165
|
+
"scaling": ["scale", "performance", "self-host", "team", "multi-device"],
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
def __init__(
|
|
169
|
+
self,
|
|
170
|
+
api_client: PostHogClient,
|
|
171
|
+
knowledge_base_path: Path,
|
|
172
|
+
llm_client: Optional[LLMClient] = None,
|
|
173
|
+
):
|
|
174
|
+
self.api_client = api_client
|
|
175
|
+
self.knowledge_base_path = knowledge_base_path
|
|
176
|
+
self.llm_client = llm_client
|
|
177
|
+
|
|
178
|
+
async def execute(
|
|
179
|
+
self,
|
|
180
|
+
task: str,
|
|
181
|
+
context: Optional[dict[str, Any]] = None,
|
|
182
|
+
) -> dict[str, Any]:
|
|
183
|
+
"""
|
|
184
|
+
Execute a feedback synthesis task.
|
|
185
|
+
|
|
186
|
+
Pulls from Sage's triage data and additional sources to
|
|
187
|
+
produce a ranked, evidence-backed synthesis.
|
|
188
|
+
"""
|
|
189
|
+
logger.info(f"Iris executing: {task[:80]}...")
|
|
190
|
+
|
|
191
|
+
sage_issues = []
|
|
192
|
+
if context and "sage_triage" in context:
|
|
193
|
+
sage_data = context["sage_triage"]
|
|
194
|
+
if isinstance(sage_data, dict):
|
|
195
|
+
sage_issues = sage_data.get("issues", [])
|
|
196
|
+
|
|
197
|
+
themes = await self._extract_themes(sage_issues)
|
|
198
|
+
journey_map = self._map_to_journey(themes)
|
|
199
|
+
recommendations = self._generate_recommendations(themes)
|
|
200
|
+
content_gaps = self._find_content_opportunities(themes)
|
|
201
|
+
|
|
202
|
+
return {
|
|
203
|
+
"agent": "iris",
|
|
204
|
+
"task": task,
|
|
205
|
+
"themes": [
|
|
206
|
+
{
|
|
207
|
+
"theme_id": t.theme_id,
|
|
208
|
+
"title": t.title,
|
|
209
|
+
"description": t.description,
|
|
210
|
+
"frequency": t.frequency,
|
|
211
|
+
"severity": t.severity,
|
|
212
|
+
"composite_score": t.composite_score,
|
|
213
|
+
"sources": t.sources,
|
|
214
|
+
"product_areas": t.product_areas,
|
|
215
|
+
"recommended_actions": t.recommended_actions,
|
|
216
|
+
}
|
|
217
|
+
for t in themes
|
|
218
|
+
],
|
|
219
|
+
"journey_map": {
|
|
220
|
+
stage.stage: {
|
|
221
|
+
"friction_score": stage.friction_score,
|
|
222
|
+
"pain_points": stage.pain_points,
|
|
223
|
+
"drop_off_risk": stage.drop_off_risk,
|
|
224
|
+
}
|
|
225
|
+
for stage in journey_map
|
|
226
|
+
},
|
|
227
|
+
"product_recommendations": recommendations,
|
|
228
|
+
"content_opportunities": content_gaps,
|
|
229
|
+
"upstream_issues_processed": len(sage_issues),
|
|
230
|
+
"status": "synthesized",
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
async def synthesize_weekly(
|
|
234
|
+
self,
|
|
235
|
+
sage_triage: dict[str, Any],
|
|
236
|
+
discourse_posts: Optional[list[dict]] = None,
|
|
237
|
+
support_tickets: Optional[list[dict]] = None,
|
|
238
|
+
) -> FeedbackSynthesis:
|
|
239
|
+
"""Run a full weekly feedback synthesis."""
|
|
240
|
+
all_signals = []
|
|
241
|
+
|
|
242
|
+
# Ingest from all sources
|
|
243
|
+
if sage_triage.get("issues"):
|
|
244
|
+
all_signals.extend(sage_triage["issues"])
|
|
245
|
+
if discourse_posts:
|
|
246
|
+
all_signals.extend(discourse_posts)
|
|
247
|
+
if support_tickets:
|
|
248
|
+
all_signals.extend(support_tickets)
|
|
249
|
+
|
|
250
|
+
# Extract and rank themes
|
|
251
|
+
themes = await self._extract_themes(all_signals)
|
|
252
|
+
journey_map = self._map_to_journey(themes)
|
|
253
|
+
recommendations = self._generate_recommendations(themes)
|
|
254
|
+
content_gaps = self._find_content_opportunities(themes)
|
|
255
|
+
|
|
256
|
+
return FeedbackSynthesis(
|
|
257
|
+
period="weekly",
|
|
258
|
+
total_signals=len(all_signals),
|
|
259
|
+
themes=themes,
|
|
260
|
+
journey_map=journey_map,
|
|
261
|
+
product_recommendations=recommendations,
|
|
262
|
+
content_opportunities=content_gaps,
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
async def _extract_themes(self, signals: list[dict]) -> list[FeedbackTheme]:
|
|
266
|
+
"""Extract recurring themes from all feedback signals via LLM.
|
|
267
|
+
|
|
268
|
+
Processes signals in chunks of _MAX_SIGNALS_PER_CALL, extracts
|
|
269
|
+
themes from each chunk, then merges overlapping themes by title
|
|
270
|
+
similarity. This ensures no signals are silently dropped.
|
|
271
|
+
"""
|
|
272
|
+
# Distinguish the two early-return paths in logs so "no themes
|
|
273
|
+
# this week" can be diagnosed without re-running the agent.
|
|
274
|
+
if not signals:
|
|
275
|
+
logger.info("Iris._extract_themes: no signals provided; returning empty themes list")
|
|
276
|
+
return []
|
|
277
|
+
if not self.llm_client:
|
|
278
|
+
logger.warning("Iris._extract_themes: no LLM client available; cannot extract themes")
|
|
279
|
+
return []
|
|
280
|
+
|
|
281
|
+
# Process in chunks
|
|
282
|
+
all_themes: list[FeedbackTheme] = []
|
|
283
|
+
for i in range(0, len(signals), _MAX_SIGNALS_PER_CALL):
|
|
284
|
+
chunk = signals[i : i + _MAX_SIGNALS_PER_CALL]
|
|
285
|
+
chunk_themes = await self._extract_themes_from_chunk(chunk)
|
|
286
|
+
all_themes.extend(chunk_themes)
|
|
287
|
+
|
|
288
|
+
if len(signals) > _MAX_SIGNALS_PER_CALL:
|
|
289
|
+
logger.info(
|
|
290
|
+
"Processed %d signals in %d chunks, got %d raw themes",
|
|
291
|
+
len(signals),
|
|
292
|
+
(len(signals) + _MAX_SIGNALS_PER_CALL - 1) // _MAX_SIGNALS_PER_CALL,
|
|
293
|
+
len(all_themes),
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
# Merge themes with the same or similar titles
|
|
297
|
+
merged = self._merge_themes(all_themes)
|
|
298
|
+
return sorted(merged, key=lambda t: t.composite_score, reverse=True)
|
|
299
|
+
|
|
300
|
+
async def _extract_themes_from_chunk(
|
|
301
|
+
self,
|
|
302
|
+
signals: list[dict],
|
|
303
|
+
) -> list[FeedbackTheme]:
|
|
304
|
+
"""Extract themes from a single chunk of signals."""
|
|
305
|
+
issues_text = "\n".join(
|
|
306
|
+
f"- #{s.get('number', '?')}: {s.get('title', '')} — {s.get('category', 'unknown')}"
|
|
307
|
+
for s in signals
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
prompt = f"""Analyze these developer feedback signals and extract recurring themes.
|
|
311
|
+
|
|
312
|
+
Signals:
|
|
313
|
+
{issues_text}
|
|
314
|
+
|
|
315
|
+
Return a JSON object with a "themes" array. Each theme has:
|
|
316
|
+
- theme_id: short unique string
|
|
317
|
+
- title: concise theme name
|
|
318
|
+
- description: 1-2 sentence explanation
|
|
319
|
+
- frequency: how many signals relate to this theme (integer)
|
|
320
|
+
- severity: 1-10 severity score (float)
|
|
321
|
+
- sources: list of platforms observed in the signals you classified (typically a subset of github, discourse, twitter, support_tickets — infer from the signals above)
|
|
322
|
+
- representative_issues: list of issue numbers (e.g. ["#123", "#456"]) from the signals above
|
|
323
|
+
- product_areas: which areas are affected (orchestration, agent SDK, MCP tools, knowledge base, scoring/eval, prompt optimization, onboarding/docs, security)
|
|
324
|
+
- recommended_actions: 1-2 concrete actions to address this
|
|
325
|
+
|
|
326
|
+
Return ONLY valid JSON, no markdown fences."""
|
|
327
|
+
|
|
328
|
+
try:
|
|
329
|
+
raw = await self.llm_client.generate(
|
|
330
|
+
system_prompt=self.SYSTEM_PROMPT,
|
|
331
|
+
user_prompt=prompt,
|
|
332
|
+
temperature=0.3,
|
|
333
|
+
max_tokens=2048,
|
|
334
|
+
model="haiku",
|
|
335
|
+
)
|
|
336
|
+
raw = strip_markdown_fences(raw)
|
|
337
|
+
data = _safe_json_loads(raw)
|
|
338
|
+
themes = []
|
|
339
|
+
for t in data.get("themes", []):
|
|
340
|
+
freq = t.get("frequency", 1)
|
|
341
|
+
sev = t.get("severity", 5.0)
|
|
342
|
+
themes.append(
|
|
343
|
+
FeedbackTheme(
|
|
344
|
+
theme_id=t.get("theme_id", ""),
|
|
345
|
+
title=t.get("title", ""),
|
|
346
|
+
description=t.get("description", ""),
|
|
347
|
+
frequency=freq,
|
|
348
|
+
severity=sev,
|
|
349
|
+
composite_score=freq * sev,
|
|
350
|
+
sources=t.get("sources", []),
|
|
351
|
+
representative_quotes=t.get(
|
|
352
|
+
"representative_issues", t.get("representative_quotes", [])
|
|
353
|
+
),
|
|
354
|
+
product_areas=t.get("product_areas", []),
|
|
355
|
+
recommended_actions=t.get("recommended_actions", []),
|
|
356
|
+
)
|
|
357
|
+
)
|
|
358
|
+
return themes
|
|
359
|
+
except Exception as exc:
|
|
360
|
+
logger.warning(f"Theme extraction failed for chunk: {exc}")
|
|
361
|
+
return []
|
|
362
|
+
|
|
363
|
+
@staticmethod
|
|
364
|
+
def _token_jaccard(a: str, b: str) -> float:
|
|
365
|
+
"""Compute Jaccard similarity between two title strings."""
|
|
366
|
+
tokens_a = set(a.lower().split())
|
|
367
|
+
tokens_b = set(b.lower().split())
|
|
368
|
+
if not tokens_a or not tokens_b:
|
|
369
|
+
return 0.0
|
|
370
|
+
intersection = tokens_a & tokens_b
|
|
371
|
+
union = tokens_a | tokens_b
|
|
372
|
+
return len(intersection) / len(union)
|
|
373
|
+
|
|
374
|
+
@classmethod
|
|
375
|
+
def _merge_themes(cls, themes: list[FeedbackTheme]) -> list[FeedbackTheme]:
|
|
376
|
+
"""Merge themes with similar titles using fuzzy matching.
|
|
377
|
+
|
|
378
|
+
Uses token-overlap Jaccard similarity (threshold from module-level
|
|
379
|
+
``SIMILARITY_THRESHOLD``) to group themes that the LLM named
|
|
380
|
+
differently across chunks. When themes merge, frequencies are
|
|
381
|
+
summed and severity is averaged.
|
|
382
|
+
"""
|
|
383
|
+
# Build groups using greedy fuzzy matching
|
|
384
|
+
groups: list[list[FeedbackTheme]] = []
|
|
385
|
+
for t in themes:
|
|
386
|
+
placed = False
|
|
387
|
+
for group in groups:
|
|
388
|
+
# Check against the group representative (first element)
|
|
389
|
+
if cls._token_jaccard(t.title, group[0].title) >= SIMILARITY_THRESHOLD:
|
|
390
|
+
group.append(t)
|
|
391
|
+
placed = True
|
|
392
|
+
break
|
|
393
|
+
if not placed:
|
|
394
|
+
groups.append([t])
|
|
395
|
+
|
|
396
|
+
merged: list[FeedbackTheme] = []
|
|
397
|
+
for group in groups:
|
|
398
|
+
if len(group) == 1:
|
|
399
|
+
merged.append(group[0])
|
|
400
|
+
continue
|
|
401
|
+
# Combine
|
|
402
|
+
total_freq = sum(t.frequency for t in group)
|
|
403
|
+
avg_sev = sum(t.severity for t in group) / len(group)
|
|
404
|
+
all_quotes = []
|
|
405
|
+
all_areas = set()
|
|
406
|
+
all_actions = []
|
|
407
|
+
all_sources = set()
|
|
408
|
+
for t in group:
|
|
409
|
+
all_quotes.extend(t.representative_quotes)
|
|
410
|
+
all_areas.update(t.product_areas)
|
|
411
|
+
all_actions.extend(t.recommended_actions)
|
|
412
|
+
all_sources.update(t.sources)
|
|
413
|
+
|
|
414
|
+
merged.append(
|
|
415
|
+
FeedbackTheme(
|
|
416
|
+
theme_id=group[0].theme_id,
|
|
417
|
+
title=group[0].title,
|
|
418
|
+
description=group[0].description,
|
|
419
|
+
frequency=total_freq,
|
|
420
|
+
severity=round(avg_sev, 1),
|
|
421
|
+
composite_score=round(total_freq * avg_sev, 1),
|
|
422
|
+
sources=list(all_sources),
|
|
423
|
+
representative_quotes=all_quotes[:10],
|
|
424
|
+
product_areas=list(all_areas),
|
|
425
|
+
recommended_actions=list(dict.fromkeys(all_actions))[:3],
|
|
426
|
+
)
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
return merged
|
|
430
|
+
|
|
431
|
+
def _map_to_journey(self, themes: list[FeedbackTheme]) -> list[DeveloperJourneyStage]:
|
|
432
|
+
"""Map themes to developer journey stages based on product areas and keywords."""
|
|
433
|
+
# Include an explicit "other" bucket. Defaulting unmatched themes to
|
|
434
|
+
# "onboarding" systematically inflates onboarding-friction signal
|
|
435
|
+
# for mature products, where most themes are about scaling and
|
|
436
|
+
# integration. "other" is honest about not knowing.
|
|
437
|
+
stage_data: dict[str, list[FeedbackTheme]] = {stage: [] for stage in self.JOURNEY_KEYWORDS}
|
|
438
|
+
stage_data["other"] = []
|
|
439
|
+
|
|
440
|
+
for theme in themes:
|
|
441
|
+
text = f"{theme.title} {theme.description} {' '.join(theme.product_areas)}".lower()
|
|
442
|
+
matched = False
|
|
443
|
+
for stage, keywords in self.JOURNEY_KEYWORDS.items():
|
|
444
|
+
if any(kw in text for kw in keywords):
|
|
445
|
+
stage_data[stage].append(theme)
|
|
446
|
+
matched = True
|
|
447
|
+
break
|
|
448
|
+
if not matched:
|
|
449
|
+
stage_data["other"].append(theme)
|
|
450
|
+
|
|
451
|
+
# One summary log per call rather than per theme — keeps signal
|
|
452
|
+
# actionable without flooding logs on a 50-theme run.
|
|
453
|
+
unmatched = stage_data["other"]
|
|
454
|
+
if unmatched:
|
|
455
|
+
logger.info(
|
|
456
|
+
"%d theme(s) routed to 'other' journey stage: %s",
|
|
457
|
+
len(unmatched),
|
|
458
|
+
[t.title for t in unmatched],
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
result = []
|
|
462
|
+
for stage, matched_themes in stage_data.items():
|
|
463
|
+
if matched_themes:
|
|
464
|
+
avg_severity = sum(t.severity for t in matched_themes) / len(matched_themes)
|
|
465
|
+
risk = "high" if avg_severity >= 7 else "medium" if avg_severity >= 4 else "low"
|
|
466
|
+
else:
|
|
467
|
+
avg_severity = 0.0
|
|
468
|
+
risk = "low"
|
|
469
|
+
|
|
470
|
+
result.append(
|
|
471
|
+
DeveloperJourneyStage(
|
|
472
|
+
stage=stage,
|
|
473
|
+
pain_points=[t.title for t in matched_themes],
|
|
474
|
+
friction_score=round(avg_severity, 1),
|
|
475
|
+
drop_off_risk=risk,
|
|
476
|
+
)
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
return result
|
|
480
|
+
|
|
481
|
+
def _generate_recommendations(self, themes: list[FeedbackTheme]) -> list[dict[str, str]]:
|
|
482
|
+
"""Generate product recommendations from themes."""
|
|
483
|
+
return [
|
|
484
|
+
{
|
|
485
|
+
"theme": theme.title,
|
|
486
|
+
"recommendation": action,
|
|
487
|
+
"evidence": f"{theme.frequency} mentions, severity {theme.severity}/10",
|
|
488
|
+
}
|
|
489
|
+
for theme in themes
|
|
490
|
+
for action in theme.recommended_actions[:1]
|
|
491
|
+
]
|
|
492
|
+
|
|
493
|
+
def _find_content_opportunities(self, themes: list[FeedbackTheme]) -> list[str]:
|
|
494
|
+
"""Build content briefs from themes — title + top recommended action.
|
|
495
|
+
|
|
496
|
+
Each brief is a short string Kai can use as a writing prompt without
|
|
497
|
+
further synthesis. When a theme has no recommended_actions, the
|
|
498
|
+
fallback surfaces severity and frequency so Kai's KB-search has
|
|
499
|
+
enough context to find related material.
|
|
500
|
+
"""
|
|
501
|
+
ranked = sorted(themes, key=lambda t: t.composite_score, reverse=True)[:5]
|
|
502
|
+
opportunities: list[str] = []
|
|
503
|
+
for theme in ranked:
|
|
504
|
+
actions = getattr(theme, "recommended_actions", None) or []
|
|
505
|
+
top_action = actions[0] if actions else None
|
|
506
|
+
if top_action:
|
|
507
|
+
opportunities.append(f"Tutorial on '{theme.title}': {top_action}")
|
|
508
|
+
else:
|
|
509
|
+
opportunities.append(
|
|
510
|
+
f"Tutorial on '{theme.title}' "
|
|
511
|
+
f"(severity={theme.severity}, freq={theme.frequency})"
|
|
512
|
+
)
|
|
513
|
+
return opportunities
|