@realtimex/realtimex-alchemy 1.0.58 → 1.0.60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/CHANGELOG.md +18 -0
- package/dist/api/services/AlchemistService.js +34 -7
- package/dist/assets/{index-DQ-bmteY.js → index-BGFGGJ94.js} +58 -58
- package/dist/assets/index-uuNKpKqf.css +1 -0
- package/dist/index.html +2 -2
- package/package.json +1 -1
- package/dist/assets/index-CEKFBLoj.css +0 -1
package/dist/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,24 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [1.0.60] - 2026-01-28
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- **Content Intelligence**: Implemented "Alchemist's Distillation" – The AI now actively cleanses mined content during analysis, stripping navigation, ads, and "read more" noise while preserving the core text verbatim.
|
|
12
|
+
- **Data Quality**: Signals now store the AI-refined version of content (`refined_content`) instead of raw HTML scrapes, significantly improving the quality of future RAG retrieval.
|
|
13
|
+
|
|
14
|
+
### Improved
|
|
15
|
+
- **Context Window**: Increased the analysis safety ceiling from 10k to **200k characters** (~50k tokens). This ensures deep-dive articles, long-form essays, and technical documentation are analyzed in full without truncation.
|
|
16
|
+
|
|
17
|
+
## [1.0.59] - 2026-01-28
|
|
18
|
+
|
|
19
|
+
### Added
|
|
20
|
+
- **Localization**: Completed full localization for **Authentication** and **Setup Wizard** flows.
|
|
21
|
+
- **UI**: Integrated `LanguageSwitcher` directly into the Login and Setup screens, allowing users to choose their preferred language before onboarding.
|
|
22
|
+
|
|
23
|
+
### Improved
|
|
24
|
+
- **UX**: Enhanced error messages and helper text in the Setup Wizard with localized strings.
|
|
25
|
+
|
|
8
26
|
## [1.0.58] - 2026-01-28
|
|
9
27
|
|
|
10
28
|
### Documentation
|
|
@@ -120,8 +120,12 @@ export class AlchemistService {
|
|
|
120
120
|
content = `Page Title: ${entry.title} (Login/paywall required - content not accessible)`;
|
|
121
121
|
}
|
|
122
122
|
else {
|
|
123
|
-
//
|
|
124
|
-
|
|
123
|
+
// Safety ceiling to prevent pathological cases (e.g., entire doc sites scraped as one page)
|
|
124
|
+
// 200k chars ≈ 50k tokens - only triggers on edge cases, not normal articles
|
|
125
|
+
const MAX_SAFE_CHARS = 200000;
|
|
126
|
+
const truncated = cleaned.length > MAX_SAFE_CHARS
|
|
127
|
+
? cleaned.substring(0, MAX_SAFE_CHARS) + '\n\n[Content truncated - exceeds 200k chars]'
|
|
128
|
+
: cleaned;
|
|
125
129
|
content = `Page Title: ${entry.title}\nContent: ${truncated}`;
|
|
126
130
|
}
|
|
127
131
|
}
|
|
@@ -142,11 +146,25 @@ export class AlchemistService {
|
|
|
142
146
|
level: 'info',
|
|
143
147
|
userId
|
|
144
148
|
}, supabase);
|
|
145
|
-
// 3. LLM Analysis
|
|
149
|
+
// 3. LLM Analysis (+ Content Cleaning)
|
|
146
150
|
const response = await this.analyzeContent(content, finalUrl, settings, learningContext);
|
|
147
151
|
const duration = Date.now() - startAnalysis;
|
|
148
152
|
// 4. Save Signal (ALWAYS save for Active Learning - Low scores = candidates for boost)
|
|
149
153
|
console.log(`[AlchemistService] Saving signal (${response.score}%)...`);
|
|
154
|
+
// DECISION: Use "refined_content" from LLM if available and valid, otherwise fallback to our cleaned version
|
|
155
|
+
let finalContentToSave = content;
|
|
156
|
+
if (!isGatedContent && response.refined_content && response.refined_content.length > 50) {
|
|
157
|
+
// Use the LLM's pristine version
|
|
158
|
+
finalContentToSave = response.refined_content;
|
|
159
|
+
// Re-prefix title if lost (optional, but good for context)
|
|
160
|
+
if (!finalContentToSave.startsWith('Page Title:')) {
|
|
161
|
+
finalContentToSave = `Page Title: ${entry.title}\n\n${finalContentToSave}`;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
else {
|
|
165
|
+
// Fallback: Use original content (already has 200k safety ceiling applied above)
|
|
166
|
+
finalContentToSave = content;
|
|
167
|
+
}
|
|
150
168
|
const { data: insertedSignal, error: insertError } = await supabase
|
|
151
169
|
.from('signals')
|
|
152
170
|
.insert([{
|
|
@@ -158,13 +176,14 @@ export class AlchemistService {
|
|
|
158
176
|
category: response.category,
|
|
159
177
|
entities: response.entities,
|
|
160
178
|
tags: (response.tags || []).map(t => t.toLowerCase().trim()),
|
|
161
|
-
content:
|
|
179
|
+
content: finalContentToSave,
|
|
162
180
|
// Mark as dismissed if low score OR gated content
|
|
163
181
|
is_dismissed: response.score < 50 || isGatedContent,
|
|
164
182
|
metadata: {
|
|
165
183
|
original_source_url: entry.url,
|
|
166
184
|
resolved_at: new Date().toISOString(),
|
|
167
|
-
is_gated: isGatedContent
|
|
185
|
+
is_gated: isGatedContent,
|
|
186
|
+
ai_cleaned: !!(response.refined_content && response.refined_content.length > 50)
|
|
168
187
|
}
|
|
169
188
|
}])
|
|
170
189
|
.select()
|
|
@@ -281,7 +300,7 @@ export class AlchemistService {
|
|
|
281
300
|
- "Page not found", error pages, access denied
|
|
282
301
|
- App store pages, download prompts
|
|
283
302
|
- Empty or placeholder content
|
|
284
|
-
For these, return: score=0, category="Other", summary="[Login wall/Navigation page/etc]", tags=[], entities=[]
|
|
303
|
+
For these, return: score=0, category="Other", summary="[Login wall/Navigation page/etc]", tags=[], entities=[], refined_content=""
|
|
285
304
|
|
|
286
305
|
2. SCORING GUIDE:
|
|
287
306
|
- High (80-100): Original research, data, insights, technical depth. MATCHES USER INTERESTS.
|
|
@@ -297,6 +316,13 @@ export class AlchemistService {
|
|
|
297
316
|
"machine learning", "startups", "regulations", "cybersecurity", "investing"
|
|
298
317
|
NEVER include: "login", "navigation", "authentication", "menu", "footer", "social media", "facebook", "meta"
|
|
299
318
|
|
|
319
|
+
5. CONTENT REFINEMENT (The "Alchemist's Distillation"):
|
|
320
|
+
- You must CLEAN the input content to remove noise.
|
|
321
|
+
- Remove: "Read more" links, social media footers, "Subscribe" prompts, navigation elements, ads.
|
|
322
|
+
- KEEP: The core article text VERBATIM. Do not rewrite sentences. Do not fix grammar. Only remove noise lines.
|
|
323
|
+
- If the content is already clean, return it as is.
|
|
324
|
+
- This "refined_content" will be stored as the permanent record.
|
|
325
|
+
|
|
300
326
|
Return STRICT JSON:
|
|
301
327
|
{
|
|
302
328
|
"score": number (0-100),
|
|
@@ -304,7 +330,8 @@ export class AlchemistService {
|
|
|
304
330
|
"summary": string (1-sentence concise gist, or "[Junk page]" if score=0),
|
|
305
331
|
"entities": string[] (people, companies, products mentioned),
|
|
306
332
|
"tags": string[] (3-5 TOPIC tags only, no platform/UI terms),
|
|
307
|
-
"relevant": boolean (true if score > 50)
|
|
333
|
+
"relevant": boolean (true if score > 50),
|
|
334
|
+
"refined_content": string (The noise-free, verbatim article text)
|
|
308
335
|
}
|
|
309
336
|
`;
|
|
310
337
|
const response = await sdk.llm.chat([
|