@realtimex/realtimex-alchemy 1.0.58 → 1.0.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/CHANGELOG.md CHANGED
@@ -5,6 +5,24 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [1.0.60] - 2026-01-28
9
+
10
+ ### Added
11
+ - **Content Intelligence**: Implemented "Alchemist's Distillation" – The AI now actively cleanses mined content during analysis, stripping navigation, ads, and "read more" noise while preserving the core text verbatim.
12
+ - **Data Quality**: Signals now store the AI-refined version of content (`refined_content`) instead of raw HTML scrapes, significantly improving the quality of future RAG retrieval.
13
+
14
+ ### Improved
15
+ - **Context Window**: Increased the analysis safety ceiling from 10k to **200k characters** (~50k tokens). This ensures deep-dive articles, long-form essays, and technical documentation are analyzed in full without truncation.
16
+
17
+ ## [1.0.59] - 2026-01-28
18
+
19
+ ### Added
20
+ - **Localization**: Completed full localization for **Authentication** and **Setup Wizard** flows.
21
+ - **UI**: Integrated `LanguageSwitcher` directly into the Login and Setup screens, allowing users to choose their preferred language before onboarding.
22
+
23
+ ### Improved
24
+ - **UX**: Enhanced error messages and helper text in the Setup Wizard with localized strings.
25
+
8
26
  ## [1.0.58] - 2026-01-28
9
27
 
10
28
  ### Documentation
@@ -120,8 +120,12 @@ export class AlchemistService {
120
120
  content = `Page Title: ${entry.title} (Login/paywall required - content not accessible)`;
121
121
  }
122
122
  else {
123
- // Truncate to avoid token limits (keep ~8000 chars)
124
- const truncated = cleaned.length > 10000 ? cleaned.substring(0, 10000) + '...' : cleaned;
123
+ // Safety ceiling to prevent pathological cases (e.g., entire doc sites scraped as one page)
124
+ // 200k chars 50k tokens - only triggers on edge cases, not normal articles
125
+ const MAX_SAFE_CHARS = 200000;
126
+ const truncated = cleaned.length > MAX_SAFE_CHARS
127
+ ? cleaned.substring(0, MAX_SAFE_CHARS) + '\n\n[Content truncated - exceeds 200k chars]'
128
+ : cleaned;
125
129
  content = `Page Title: ${entry.title}\nContent: ${truncated}`;
126
130
  }
127
131
  }
@@ -142,11 +146,25 @@ export class AlchemistService {
142
146
  level: 'info',
143
147
  userId
144
148
  }, supabase);
145
- // 3. LLM Analysis
149
+ // 3. LLM Analysis (+ Content Cleaning)
146
150
  const response = await this.analyzeContent(content, finalUrl, settings, learningContext);
147
151
  const duration = Date.now() - startAnalysis;
148
152
  // 4. Save Signal (ALWAYS save for Active Learning - Low scores = candidates for boost)
149
153
  console.log(`[AlchemistService] Saving signal (${response.score}%)...`);
154
+ // DECISION: Use "refined_content" from LLM if available and valid, otherwise fallback to our cleaned version
155
+ let finalContentToSave = content;
156
+ if (!isGatedContent && response.refined_content && response.refined_content.length > 50) {
157
+ // Use the LLM's pristine version
158
+ finalContentToSave = response.refined_content;
159
+ // Re-prefix title if lost (optional, but good for context)
160
+ if (!finalContentToSave.startsWith('Page Title:')) {
161
+ finalContentToSave = `Page Title: ${entry.title}\n\n${finalContentToSave}`;
162
+ }
163
+ }
164
+ else {
165
+ // Fallback: Use original content (already has 200k safety ceiling applied above)
166
+ finalContentToSave = content;
167
+ }
150
168
  const { data: insertedSignal, error: insertError } = await supabase
151
169
  .from('signals')
152
170
  .insert([{
@@ -158,13 +176,14 @@ export class AlchemistService {
158
176
  category: response.category,
159
177
  entities: response.entities,
160
178
  tags: (response.tags || []).map(t => t.toLowerCase().trim()),
161
- content: content,
179
+ content: finalContentToSave,
162
180
  // Mark as dismissed if low score OR gated content
163
181
  is_dismissed: response.score < 50 || isGatedContent,
164
182
  metadata: {
165
183
  original_source_url: entry.url,
166
184
  resolved_at: new Date().toISOString(),
167
- is_gated: isGatedContent
185
+ is_gated: isGatedContent,
186
+ ai_cleaned: !!(response.refined_content && response.refined_content.length > 50)
168
187
  }
169
188
  }])
170
189
  .select()
@@ -281,7 +300,7 @@ export class AlchemistService {
281
300
  - "Page not found", error pages, access denied
282
301
  - App store pages, download prompts
283
302
  - Empty or placeholder content
284
- For these, return: score=0, category="Other", summary="[Login wall/Navigation page/etc]", tags=[], entities=[]
303
+ For these, return: score=0, category="Other", summary="[Login wall/Navigation page/etc]", tags=[], entities=[], refined_content=""
285
304
 
286
305
  2. SCORING GUIDE:
287
306
  - High (80-100): Original research, data, insights, technical depth. MATCHES USER INTERESTS.
@@ -297,6 +316,13 @@ export class AlchemistService {
297
316
  "machine learning", "startups", "regulations", "cybersecurity", "investing"
298
317
  NEVER include: "login", "navigation", "authentication", "menu", "footer", "social media", "facebook", "meta"
299
318
 
319
+ 5. CONTENT REFINEMENT (The "Alchemist's Distillation"):
320
+ - You must CLEAN the input content to remove noise.
321
+ - Remove: "Read more" links, social media footers, "Subscribe" prompts, navigation elements, ads.
322
+ - KEEP: The core article text VERBATIM. Do not rewrite sentences. Do not fix grammar. Only remove noise lines.
323
+ - If the content is already clean, return it as is.
324
+ - This "refined_content" will be stored as the permanent record.
325
+
300
326
  Return STRICT JSON:
301
327
  {
302
328
  "score": number (0-100),
@@ -304,7 +330,8 @@ export class AlchemistService {
304
330
  "summary": string (1-sentence concise gist, or "[Junk page]" if score=0),
305
331
  "entities": string[] (people, companies, products mentioned),
306
332
  "tags": string[] (3-5 TOPIC tags only, no platform/UI terms),
307
- "relevant": boolean (true if score > 50)
333
+ "relevant": boolean (true if score > 50),
334
+ "refined_content": string (The noise-free, verbatim article text)
308
335
  }
309
336
  `;
310
337
  const response = await sdk.llm.chat([