greptor 0.2.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/README.md +103 -83
  2. package/dist/greptor.d.ts.map +1 -1
  3. package/dist/greptor.js +6 -8
  4. package/dist/greptor.js.map +1 -1
  5. package/dist/index.d.ts +1 -0
  6. package/dist/index.d.ts.map +1 -1
  7. package/dist/processing/processor.d.ts +3 -3
  8. package/dist/processing/processor.d.ts.map +1 -1
  9. package/dist/processing/processor.js +85 -29
  10. package/dist/processing/processor.js.map +1 -1
  11. package/dist/skills/skill-generator.d.ts +2 -2
  12. package/dist/skills/skill-generator.d.ts.map +1 -1
  13. package/dist/skills/skill-generator.js +243 -106
  14. package/dist/skills/skill-generator.js.map +1 -1
  15. package/dist/storage/file-storage.d.ts +2 -2
  16. package/dist/storage/file-storage.d.ts.map +1 -1
  17. package/dist/storage/file-storage.js +10 -5
  18. package/dist/storage/file-storage.js.map +1 -1
  19. package/dist/tag-schema/generate.d.ts +4 -0
  20. package/dist/tag-schema/generate.d.ts.map +1 -0
  21. package/dist/tag-schema/generate.js +44 -0
  22. package/dist/tag-schema/generate.js.map +1 -0
  23. package/dist/tag-schema/initialize.d.ts +6 -0
  24. package/dist/tag-schema/initialize.d.ts.map +1 -0
  25. package/dist/tag-schema/initialize.js +37 -0
  26. package/dist/tag-schema/initialize.js.map +1 -0
  27. package/dist/tag-schema/types.d.ts +19 -0
  28. package/dist/tag-schema/types.d.ts.map +1 -0
  29. package/dist/{metadata-schema → tag-schema}/types.js +3 -11
  30. package/dist/tag-schema/types.js.map +1 -0
  31. package/dist/types.d.ts +8 -12
  32. package/dist/types.d.ts.map +1 -1
  33. package/package.json +3 -2
  34. package/dist/llm/llm-factory.d.ts +0 -7
  35. package/dist/llm/llm-factory.d.ts.map +0 -1
  36. package/dist/llm/llm-factory.js +0 -53
  37. package/dist/llm/llm-factory.js.map +0 -1
  38. package/dist/metadata-schema/generate.d.ts +0 -3
  39. package/dist/metadata-schema/generate.d.ts.map +0 -1
  40. package/dist/metadata-schema/generate.js +0 -43
  41. package/dist/metadata-schema/generate.js.map +0 -1
  42. package/dist/metadata-schema/initialize.d.ts +0 -5
  43. package/dist/metadata-schema/initialize.d.ts.map +0 -1
  44. package/dist/metadata-schema/initialize.js +0 -37
  45. package/dist/metadata-schema/initialize.js.map +0 -1
  46. package/dist/metadata-schema/types.d.ts +0 -34
  47. package/dist/metadata-schema/types.d.ts.map +0 -1
  48. package/dist/metadata-schema/types.js.map +0 -1
  49. package/dist/processing/chunk.d.ts +0 -3
  50. package/dist/processing/chunk.d.ts.map +0 -1
  51. package/dist/processing/chunk.js +0 -36
  52. package/dist/processing/chunk.js.map +0 -1
  53. package/dist/processing/extract-metadata.d.ts +0 -4
  54. package/dist/processing/extract-metadata.d.ts.map +0 -1
  55. package/dist/processing/extract-metadata.js +0 -39
  56. package/dist/processing/extract-metadata.js.map +0 -1
package/README.md CHANGED
@@ -34,22 +34,23 @@ bun add greptor
34
34
 
35
35
  ### Step 2: Initialize
36
36
 
37
- Create a Greptor instance with your base directory, topic, and LLM model.
37
+ Create a Greptor instance with your base directory, topic, and AI SDK language model.
38
38
 
39
39
  ```typescript
40
40
  import { createGreptor } from 'greptor';
41
+ import { openai } from "@ai-sdk/openai";
41
42
 
42
43
  // Create Greptor instance
43
44
  const greptor = await createGreptor({
44
45
  baseDir: './projects/investing',
45
46
  topic: 'Investing, stock market, financial, and macroeconomics',
46
- llmModel: 'openai:gpt-5-mini'
47
+ model: openai("gpt-5-mini"),
47
48
  });
48
49
  ```
49
50
 
50
51
  - **`baseDir`**: Home directory for your project where all data will be stored.
51
- - **`topic`**: Helps Greptor understand your data better and generate a relevant metadata schema.
52
- - **`llmModel`**: OpenAI-compatible model for chunking and metadata extraction. You must provide an API key via environment variables.
52
+ - **`topic`**: Helps Greptor understand your data better and generate a relevant tag schema.
53
+ - **`model`**: A `LanguageModel` instance from [Vercel AI SDK](https://ai-sdk.dev).
53
54
 
54
55
  ### Step 3: Start Feeding Documents
55
56
 
@@ -62,8 +63,8 @@ await greptor.eat({
62
63
  label: 'Top Five AI Stocks I\'m Buying Now',
63
64
  content: '{fetch and populate video transcript here}',
64
65
  creationDate: new Date('2025-11-15'),
65
- metadata: {
66
- // Optional custom metadata specific to the source or document
66
+ tags: {
67
+ // Optional custom tags specific to the source or document
67
68
  channelTitle: 'Joseph Carlson',
68
69
  channelSubscribers: 496000
69
70
  },
@@ -77,8 +78,8 @@ await greptor.eat({
77
78
  label: 'Tesla reports 418,227 deliveries for the fourth quarter, down 16%',
78
79
  content: '{fetch and populate Reddit post with comments here}',
79
80
  creationDate: new Date('2025-12-03'),
80
- metadata: {
81
- // Optional custom metadata
81
+ tags: {
82
+ // Optional custom tags
82
83
  upvotes: 1400
83
84
  },
84
85
  });
@@ -86,7 +87,7 @@ await greptor.eat({
86
87
 
87
88
  ### Step 4: Wait for Background Processing
88
89
 
89
- Greptor will write your input to a raw Markdown file immediately, then run background enrichment (LLM cleaning + chunking + metadata extraction) and write a processed Markdown file. You can grep the raw files right away, and the processed files will appear shortly after.
90
+ Greptor will write your input to a raw Markdown file immediately, then run background enrichment (LLM cleaning + chunking + tagging) and write a processed Markdown file. You can grep the raw files right away, and the processed files will appear shortly after.
90
91
 
91
92
  ### Step 5: Generate a Claude Code Skill
92
93
 
@@ -96,7 +97,7 @@ await greptor.createSkill(['youtube', 'reddit']);
96
97
 
97
98
  This generates a Claude Code skill that instructs agents on how to search your indexed content effectively.
98
99
 
99
- The skill is customized for the sources you provide and includes search tips based on the metadata schema. You can always customize it manually further for better results.
100
+ The skill is customized for the sources you provide and includes search tips based on the tag schema. You can always customize it manually further for better results.
100
101
 
101
102
  ### Step 6: Run the Agent
102
103
 
@@ -150,9 +151,7 @@ Now you have a personal investment research assistant with access to your portfo
150
151
 
151
152
  Workers pick up new documents and run a one-time pipeline:
152
153
 
153
- 1. **LLM cleaning**: Remove timestamps, ads, disclaimers, boilerplate, and irrelevant content.
154
- 2. **LLM chunking**: Transform a blob into semantic section chunks.
155
- 3. **LLM metadata extraction**: Extract metadata relevant to your topic/domain and enrich each chunk with denormalized metadata.
154
+ 1. **LLM clean + chunk + tag (single prompt)**: Remove boilerplate, split into semantic chunks, and inline grep-friendly per-chunk tags.
156
155
 
157
156
  Here's an example of a processed file:
158
157
 
@@ -165,121 +164,142 @@ date: 2025-11-15
165
164
  ticker: "NVDA"
166
165
  videoId: "dQw4w9WgXcQ"
167
166
  url: "https://youtube.com/watch?v=dQw4w9WgXcQ"
168
- chunks:
169
- - id: c01
170
- title: "Revenue Growth Analysis"
171
- topics: [earnings, revenue, data-center]
172
- sentiment: positive
173
- tickers: [NVDA]
174
- price_mentioned_usd: 850.50
175
- revenue_mentioned_billions: 35.1
176
- - id: c02
177
- title: "AI Chip Demand Outlook"
178
- topics: [ai, competition, market-share]
179
- sentiment: bullish
180
- tickers: [NVDA, AMD, INTC]
181
- timeframe: next-quarter
182
167
  ---
183
168
 
184
- CHUNK c01: "Revenue Growth Analysis"
169
+ ## 01 Revenue Growth Analysis
170
+ topics=earnings,revenue,data_center
171
+ sentiment=positive
172
+ tickers=NVDA
173
+
185
174
  NVIDIA reported Q4 revenue of $35.1 billion, beating estimates...
186
175
 
187
- CHUNK c02: "AI Chip Demand Outlook"
176
+ ## 02 AI Chip Demand Outlook
177
+ topics=ai,competition,market_share
178
+ sentiment=bullish
179
+ tickers=NVDA,AMD,INTC
180
+ timeframe=next_quarter
181
+
188
182
  The demand for AI accelerators continues to outpace supply...
189
183
  ```
190
184
 
191
185
  ### 3) Navigate with grep/glob
192
186
 
193
- Your "index" is the YAML frontmatter combined with the file layout. Agents can search it deterministically.
187
+ Your "index" is the YAML frontmatter (document-level) plus the per-chunk tag lines. Agents can search it deterministically.
194
188
 
195
- **Search examples**:
189
+ **Basic search examples**:
196
190
 
197
191
  ```bash
198
- # Find all bullish sentiment for TSLA stock
199
- rg -l "ticker:.*TSLA" content/processed | xargs rg "sentiment:.*bullish"
192
+ # Simple tag search with context
193
+ rg -n -C 6 "ticker=NVDA" content/processed/
200
194
 
201
- # Count documents per ticker
202
- rg "ticker:" content/processed -o | sort | uniq -c | sort -rn | head -20
195
+ # Search for any value in a tag field
196
+ rg -n -C 6 "sentiment=" content/processed/
203
197
 
204
- # What companies does a specific YouTuber discuss?
205
- rg "company:" content/processed/youtube/JosephCarlsonShow -o | sort | uniq -c | sort -rn
198
+ # Case-insensitive full-text search
199
+ rg -i -n -C 3 "artificial intelligence" content/processed/
200
+
201
+ # Search within a specific source
202
+ rg -n -C 6 "sector=technology" content/processed/youtube/
203
+ ```
204
+
205
+ **Date-filtered searches**:
206
+
207
+ ```bash
208
+ # Content from December 2025
209
+ rg -n -C 6 "ticker=TSLA" content/processed/ --glob "**/2025-12/*.md"
206
210
 
207
- # Find all AI-related narratives with strong buy recommendations
208
- rg -l "narrative:.*ai_boom" content/processed | xargs rg "recommendation:.*strong_buy"
211
+ # Q4 2025 content
212
+ rg -n -C 6 "sentiment=bullish" content/processed/ --glob "**/2025-1[0-2]/*.md"
209
213
 
210
- # Technology sector stocks with bullish sentiment in December 2025
211
- rg -l "sector:.*technology" content/processed --glob "**/2025-12/*.md" | xargs rg "sentiment:.*bullish"
214
+ # Specific month and source
215
+ rg -n -C 6 "asset_type=etf" content/processed/reddit/ --glob "**/2025-11/*.md"
216
+ ```
212
217
 
213
- # Find dividend investment style discussions
214
- rg "investment_style:.*dividend" content/processed -l | head -10
218
+ **Combined tag filters**:
215
219
 
216
- # Bearish sentiment on large-cap stocks
217
- rg -l "market_cap:.*large_cap" content/processed | xargs rg "sentiment:.*bearish"
220
+ ```bash
221
+ # Match chunks with two specific tags (using file list)
222
+ rg -l "sector=technology" content/processed/ | xargs rg -n -C 6 "sentiment=bullish"
218
223
 
219
- # List all tickers mentioned with their sentiment
220
- rg "ticker: \[.*\]" content/processed -A 5 | rg "sentiment:"
224
+ # Pipeline filter for complex queries
225
+ rg -n -C 6 "ticker=AAPL" content/processed/ | rg "recommendation=.*buy"
221
226
 
222
- # Find EV-related discussions across all sources
223
- rg "narrative:.*ev_transition" content/processed
227
+ # Three-way filter: tech stocks with bullish sentiment and buy recommendation
228
+ rg -l "sector=technology" content/processed/ | xargs rg -l "sentiment=bullish" | xargs rg -n -C 6 "recommendation=buy"
224
229
 
225
- # Combine multiple filters: tech stocks with strong buy in specific timeframe
226
- rg -l "sector:.*technology" content/processed --glob "**/2025-11/*.md" | \
227
- xargs rg -l "recommendation:.*strong_buy" | \
228
- xargs rg "ticker:" -o | sort | uniq -c
230
+ # Find AI narrative discussions with specific tickers
231
+ rg -n -C 6 "narrative=.*ai" content/processed/ | rg "ticker=NVDA\|ticker=.*,NVDA"
232
+ ```
233
+
234
+ **Discovery and exploration**:
235
+
236
+ ```bash
237
+ # List all unique tickers mentioned
238
+ rg -o "ticker=[^\n]+" content/processed/ | cut -d= -f2 | tr ',' '\n' | sort -u
239
+
240
+ # Count occurrences of each sentiment
241
+ rg -o "sentiment=[^\n]+" content/processed/ | cut -d= -f2 | sort | uniq -c | sort -rn
242
+
243
+ # Top 20 most discussed companies
244
+ rg -o "company=[^\n]+" content/processed/ | cut -d= -f2 | tr ',' '\n' | sort | uniq -c | sort -rn | head -20
245
+
246
+ # Find all files discussing dividend investing
247
+ rg -l "investment_style=dividend" content/processed/
248
+
249
+ # See what narratives exist in the data
250
+ rg -o "narrative=[^\n]+" content/processed/ | cut -d= -f2 | tr ',' '\n' | sort -u
229
251
  ```
230
252
 
231
253
  **Analysis patterns**:
232
254
 
233
255
  ```bash
234
- # Aggregate sentiment distribution
235
- rg "sentiment:" content/processed -o | cut -d: -f2 | tr -d ' ' | sort | uniq -c
256
+ # Sentiment distribution for a specific ticker
257
+ rg -n -C 6 "ticker=TSLA" content/processed/ | rg -o "sentiment=[^\n]+" | cut -d= -f2 | sort | uniq -c
236
258
 
237
259
  # Most discussed sectors
238
- rg "sector:" content/processed -o | sort | uniq -c | sort -rn
260
+ rg -o "sector=[^\n]+" content/processed/ | cut -d= -f2 | tr ',' '\n' | sort | uniq -c | sort -rn
239
261
 
240
262
  # Track narrative evolution over time
241
263
  for month in 2025-{10..12}; do
242
264
  echo "=== $month ==="
243
- rg "narrative:" content/processed --glob "**/$month-*/*.md" -o | sort | uniq -c | sort -rn | head -5
265
+ rg -o "narrative=[^\n]+" content/processed/ --glob "**/$month/*.md" | cut -d= -f2 | tr ',' '\n' | sort | uniq -c | sort -rn | head -5
244
266
  done
245
267
 
246
- # Compare sentiment on specific stock across sources
268
+ # Compare sentiment across sources for a stock
247
269
  for source in youtube reddit; do
248
270
  echo "=== $source ==="
249
- rg -l "ticker:.*AAPL" content/processed/$source | xargs rg "sentiment:" -o | sort | uniq -c
271
+ rg -n -C 6 "ticker=AAPL" content/processed/$source/ | rg -o "sentiment=[^\n]+" | cut -d= -f2 | tr ',' '\n' | sort | uniq -c
250
272
  done
251
- ```
252
273
 
253
- ## Configuration
274
+ # Find all strong buy recommendations by sector
275
+ for sector in technology healthcare financials; do
276
+ echo "=== $sector ==="
277
+ rg -l "sector=$sector" content/processed/ | xargs rg -n -C 3 "recommendation=strong_buy" | head -5
278
+ done
279
+ ```
254
280
 
255
- ### LLM Model Format
281
+ **Advanced multi-criteria searches**:
256
282
 
257
- Greptor uses the following LLM model format: `provider:model-name`
283
+ ```bash
284
+ # Large-cap tech stocks with bullish sentiment
285
+ rg -l "market_cap=large_cap" content/processed/ | xargs rg -l "sector=technology" | xargs rg -n -C 6 "sentiment=bullish"
258
286
 
259
- The provider is an OpenAI API-compatible provider, such as `openai`, `azure`, `ollama`, `deepseek`, etc.
287
+ # Growth investing discussions about mega-cap stocks
288
+ rg -n -C 6 "investment_style=growth" content/processed/ | rg "market_cap=mega_cap"
260
289
 
261
- **Examples**:
290
+ # ETF recommendations from specific time period
291
+ rg -n -C 6 "asset_type=etf" content/processed/ --glob "**/2025-12/*.md" | rg "recommendation=buy\|recommendation=strong_buy"
262
292
 
263
- ```typescript
264
- llmModel: 'openai:gpt-5-mini'
265
- llmModel: 'ollama:llama3-70b'
293
+ # Bearish sentiment on specific narrative
294
+ rg -n -C 6 "narrative=ev_transition" content/processed/ | rg "sentiment=bearish"
266
295
  ```
267
296
 
268
- **Important**: Use a model at least at the level of GPT-5-mini or better.
269
- **Required Environment Variables**:
270
-
271
- ```bash
272
- # For OpenAI models
273
- OPENAI_API_KEY=your_key_here
297
+ ## Configuration
274
298
 
275
- # For Azure OpenAI models
276
- AZURE_API_KEY=your_key_here
277
- AZURE_API_BASE_URL=https://your-azure-endpoint.com/v1
278
- ```
279
299
 
280
- ## Metadata Schemas
300
+ ## Tag Schemas
281
301
 
282
- If you don't provide a schema, Greptor can initialize one for your topic. However, for better results, provide a custom schema.
302
+ If you don't provide a schema, Greptor can initialize one for your topic. However, for better results, provide a custom tag schema.
283
303
 
284
304
  Here's a comprehensive example for investment research:
285
305
 
@@ -287,8 +307,8 @@ Here's a comprehensive example for investment research:
287
307
  const greptor = await createGreptor({
288
308
  baseDir: './projects/investing',
289
309
  topic: 'Investing, stock market, financial, and macroeconomics',
290
- llmModel: 'openai:gpt-5-mini',
291
- metadataSchema: [
310
+ model: openai("gpt-5-mini"),
311
+ tagSchema: [
292
312
  {
293
313
  name: 'company',
294
314
  type: 'string[]',
@@ -362,4 +382,4 @@ const greptor = await createGreptor({
362
382
 
363
383
  ## License
364
384
 
365
- MIT © Sergii Vashchyshchuk
385
+ MIT © Sergii Vashchyshchuk
@@ -1 +1 @@
1
- {"version":3,"file":"greptor.d.ts","sourceRoot":"","sources":["../src/greptor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACX,iBAAiB,EACjB,eAAe,EACf,gBAAgB,EAChB,cAAc,EACd,MAAM,YAAY,CAAC;AAcpB,MAAM,WAAW,OAAO;IACvB,GAAG,EAAE,CAAC,KAAK,EAAE,eAAe,KAAK,OAAO,CAAC,gBAAgB,CAAC,CAAC;IAC3D,WAAW,EAAE,CACZ,OAAO,EAAE,MAAM,EAAE,EACjB,SAAS,EAAE,OAAO,KACd,OAAO,CAAC,iBAAiB,CAAC,CAAC;CAChC;AAED,wBAAsB,aAAa,CAAC,OAAO,EAAE,cAAc,GAAG,OAAO,CAAC,OAAO,CAAC,CAqH7E"}
1
+ {"version":3,"file":"greptor.d.ts","sourceRoot":"","sources":["../src/greptor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACX,iBAAiB,EACjB,eAAe,EACf,gBAAgB,EAChB,cAAc,EACd,MAAM,YAAY,CAAC;AAapB,MAAM,WAAW,OAAO;IACvB,GAAG,EAAE,CAAC,KAAK,EAAE,eAAe,KAAK,OAAO,CAAC,gBAAgB,CAAC,CAAC;IAC3D,WAAW,EAAE,CACZ,OAAO,EAAE,MAAM,EAAE,EACjB,SAAS,EAAE,OAAO,KACd,OAAO,CAAC,iBAAiB,CAAC,CAAC;CAChC;AAED,wBAAsB,aAAa,CAAC,OAAO,EAAE,cAAc,GAAG,OAAO,CAAC,OAAO,CAAC,CAoH7E"}
package/dist/greptor.js CHANGED
@@ -1,27 +1,25 @@
1
1
  import path from "node:path";
2
2
  import YAML from "yaml";
3
- import { createLlmClient } from "./llm/llm-factory.js";
4
- import { initializeMetadataSchema } from "./metadata-schema/initialize.js";
3
+ import { initializeTagSchema } from "./tag-schema/initialize.js";
5
4
  import { createProcessingQueue, enqueueUnprocessedDocuments, startBackgroundWorkers, } from "./processing/processor.js";
6
5
  import { generateSkill } from "./skills/skill-generator.js";
7
6
  import { createFileStorage } from "./storage/file-storage.js";
8
7
  export async function createGreptor(options) {
9
- const { baseDir, logger } = options;
8
+ const { baseDir, logger, model } = options;
10
9
  const contentPath = path.join(baseDir, "content");
11
10
  const storage = createFileStorage(contentPath);
12
11
  logger?.debug?.("Initializing Greptor", { baseDir, topic: options.topic });
13
- const metadataSchema = await initializeMetadataSchema(storage.baseDir, options.llmModel, options.topic, options.metadataSchema, logger);
12
+ const tagSchema = await initializeTagSchema(storage.baseDir, model, options.topic, options.tagSchema, logger);
14
13
  const queue = createProcessingQueue();
15
14
  const queuedCount = await enqueueUnprocessedDocuments({
16
15
  storage,
17
16
  queue,
18
17
  ...(logger ? { logger } : {}),
19
18
  });
20
- const llm = createLlmClient(options.llmModel);
21
19
  const ctx = {
22
20
  domain: options.topic,
23
- metadataSchema: YAML.stringify(metadataSchema),
24
- llm,
21
+ tagSchema: YAML.stringify(tagSchema),
22
+ model,
25
23
  storage,
26
24
  ...(logger ? { logger } : {}),
27
25
  };
@@ -72,7 +70,7 @@ export async function createGreptor(options) {
72
70
  domain: options.topic,
73
71
  sources,
74
72
  baseDir: options.baseDir,
75
- metadataSchema,
73
+ tagSchema,
76
74
  overwrite,
77
75
  }, storage);
78
76
  return {
@@ -1 +1 @@
1
- {"version":3,"file":"greptor.js","sourceRoot":"","sources":["../src/greptor.ts"],"names":[],"mappings":"AAOA,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AACvD,OAAO,EAAE,wBAAwB,EAAE,MAAM,iCAAiC,CAAC;AAC3E,OAAO,EACN,qBAAqB,EACrB,2BAA2B,EAC3B,sBAAsB,GACtB,MAAM,2BAA2B,CAAC;AACnC,OAAO,EAAE,aAAa,EAAE,MAAM,6BAA6B,CAAC;AAC5D,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAU9D,MAAM,CAAC,KAAK,UAAU,aAAa,CAAC,OAAuB;IAC1D,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC;IACpC,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;IAClD,MAAM,OAAO,GAAG,iBAAiB,CAAC,WAAW,CAAC,CAAC;IAE/C,MAAM,EAAE,KAAK,EAAE,CAAC,sBAAsB,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC;IAE3E,MAAM,cAAc,GAAG,MAAM,wBAAwB,CACpD,OAAO,CAAC,OAAO,EACf,OAAO,CAAC,QAAQ,EAChB,OAAO,CAAC,KAAK,EACb,OAAO,CAAC,cAAc,EACtB,MAAM,CACN,CAAC;IAEF,MAAM,KAAK,GAAG,qBAAqB,EAAE,CAAC;IACtC,MAAM,WAAW,GAAG,MAAM,2BAA2B,CAAC;QACrD,OAAO;QACP,KAAK;QACL,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KAC7B,CAAC,CAAC;IAEH,MAAM,GAAG,GAAG,eAAe,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;IAC9C,MAAM,GAAG,GAAG;QACX,MAAM,EAAE,OAAO,CAAC,KAAK;QACrB,cAAc,EAAE,IAAI,CAAC,SAAS,CAAC,cAAc,CAAC;QAC9C,GAAG;QACH,OAAO;QACP,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KAC7B,CAAC;IAEF,sBAAsB,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,WAAW,EAAE,OAAO,CAAC,OAAO,IAAI,CAAC,EAAE,CAAC,CAAC;IAE1E,MAAM,EAAE,IAAI,EAAE,CAAC,qBAAqB,EAAE;QACrC,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,MAAM,EAAE,WAAW;KACnB,CAAC,CAAC;IAEH,KAAK,UAAU,GAAG,CAAC,KAAsB;QACxC,IAAI,KAAK,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;YAC7B,MAAM,EAAE,IAAI,EAAE,CAAC,oBAAoB,EAAE,EAAE,MAAM,EAAE,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;YAC/D,OAAO;gBACN,OAAO,EAAE,KAAK;gBACd,OAAO,EAAE,uBAAuB,KAAK,CAAC,MAAM,EAAE;aAC9C,CAAC;QACH,CAAC;QAED,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC;QAEhD,IAAI,GAAG,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;YAC9B,MAAM,EAAE,IAAI,EAAE,CAAC,mCAAmC,EAAE;gBACnD,GAAG,EAAE,GAAG,CAAC,GAAG;gBACZ,KAAK,EAAE,KAAK,CAAC,KAAK;aAClB,CAAC,CAAC;YACH,OAAO;gBACN,OAAO,EAAE,KAAK;gBACd,OAAO,EAAE,0BAA0B;aACnC,CAAC;QACH,CAAC;QAED,IAAI,GAAG,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;YAC1B,OAAO;gBACN,OAAO,EAAE,KAAK;gBACd,OAAO,EAAE,GAAG,CAAC,OAAO;aACpB,CAAC;QACH,CAAC;QAED,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACvB,MAAM,EAAE,IAAI,EAAE,CAAC,mBAAmB,EAAE,EAAE,GAAG,EAAE,GAAG,CAAC,GAAG,EAAE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE,CAAC,CAAC;QAE1E,OAAO;YACN,OAAO,EAAE,IAAI;YACb,OAAO,EAAE,gBAAgB;YACzB,GAAG,EAAE,GAAG,CAAC,GAAG;SACZ,CAAC;IACH,CAAC;IAED,KAAK,UAAU,WAAW,CACzB,OAAiB,EACjB,SAAS,GAAG,KAAK;QAEjB,IAAI,CAAC;YACJ,MAAM,EAAE,IAAI,EAAE,CAAC,8BAA8B,EAAE;gBAC9C,MAAM,EAAE,OAAO,CAAC,KAAK;aACrB,CAAC,CAAC;YAEH,MAAM,EAAE,SAAS,EAAE,GAAG,MAAM,aAAa,CACxC;gBACC,MAAM,EAAE,OAAO,CAAC,KAAK;gBACrB,OAAO;gBACP,OAAO,EAAE,OAAO,CAAC,OAAO;gBACxB,cAAc;gBACd,SAAS;aACT,EACD,OAAO,CACP,CAAC;YAEF,OAAO;gBACN,OAAO,EAAE,IAAI;gBACb,OAAO,EAAE,oBAAoB,SAAS,EAAE;gBACxC,SAAS;aACT,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YAChB,MAAM,YAAY,GACjB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACxD,MAAM,EAAE,KAAK,EAAE,CAAC,6BAA6B,YAAY,EAAE,CAAC,CAAC;YAC7D,OAAO;gBACN,OAAO,EAAE,KAAK;gBACd,OAAO,EAAE,YAAY;aACrB,CAAC;QACH,CAAC;IACF,CAAC;IAED,OAAO;QACN,GAAG;QACH,WAAW;KACX,CAAC;AACH,CAAC"}
1
+ {"version":3,"file":"greptor.js","sourceRoot":"","sources":["../src/greptor.ts"],"names":[],"mappings":"AAOA,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,mBAAmB,EAAE,MAAM,4BAA4B,CAAC;AACjE,OAAO,EACN,qBAAqB,EACrB,2BAA2B,EAC3B,sBAAsB,GACtB,MAAM,2BAA2B,CAAC;AACnC,OAAO,EAAE,aAAa,EAAE,MAAM,6BAA6B,CAAC;AAC5D,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAU9D,MAAM,CAAC,KAAK,UAAU,aAAa,CAAC,OAAuB;IAC1D,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,GAAG,OAAO,CAAC;IAC3C,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;IAClD,MAAM,OAAO,GAAG,iBAAiB,CAAC,WAAW,CAAC,CAAC;IAE/C,MAAM,EAAE,KAAK,EAAE,CAAC,sBAAsB,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC;IAE3E,MAAM,SAAS,GAAG,MAAM,mBAAmB,CAC1C,OAAO,CAAC,OAAO,EACf,KAAK,EACL,OAAO,CAAC,KAAK,EACb,OAAO,CAAC,SAAS,EACjB,MAAM,CACN,CAAC;IAEF,MAAM,KAAK,GAAG,qBAAqB,EAAE,CAAC;IACtC,MAAM,WAAW,GAAG,MAAM,2BAA2B,CAAC;QACrD,OAAO;QACP,KAAK;QACL,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KAC7B,CAAC,CAAC;IAEH,MAAM,GAAG,GAAG;QACX,MAAM,EAAE,OAAO,CAAC,KAAK;QACrB,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC;QACpC,KAAK;QACL,OAAO;QACP,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KAC7B,CAAC;IAEF,sBAAsB,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,WAAW,EAAE,OAAO,CAAC,OAAO,IAAI,CAAC,EAAE,CAAC,CAAC;IAE1E,MAAM,EAAE,IAAI,EAAE,CAAC,qBAAqB,EAAE;QACrC,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,MAAM,EAAE,WAAW;KACnB,CAAC,CAAC;IAEH,KAAK,UAAU,GAAG,CAAC,KAAsB;QACxC,IAAI,KAAK,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;YAC7B,MAAM,EAAE,IAAI,EAAE,CAAC,oBAAoB,EAAE,EAAE,MAAM,EAAE,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;YAC/D,OAAO;gBACN,OAAO,EAAE,KAAK;gBACd,OAAO,EAAE,uBAAuB,KAAK,CAAC,MAAM,EAAE;aAC9C,CAAC;QACH,CAAC;QAED,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC;QAEhD,IAAI,GAAG,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;YAC9B,MAAM,EAAE,IAAI,EAAE,CAAC,mCAAmC,EAAE;gBACnD,GAAG,EAAE,GAAG,CAAC,GAAG;gBACZ,KAAK,EAAE,KAAK,CAAC,KAAK;aAClB,CAAC,CAAC;YACH,OAAO;gBACN,OAAO,EAAE,KAAK;gBACd,OAAO,EAAE,0BAA0B;aACnC,CAAC;QACH,CAAC;QAED,IAAI,GAAG,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;YAC1B,OAAO;gBACN,OAAO,EAAE,KAAK;gBACd,OAAO,EAAE,GAAG,CAAC,OAAO;aACpB,CAAC;QACH,CAAC;QAED,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACvB,MAAM,EAAE,IAAI,EAAE,CAAC,mBAAmB,EAAE,EAAE,GAAG,EAAE,GAAG,CAAC,GAAG,EAAE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE,CAAC,CAAC;QAE1E,OAAO;YACN,OAAO,EAAE,IAAI;YACb,OAAO,EAAE,gBAAgB;YACzB,GAAG,EAAE,GAAG,CAAC,GAAG;SACZ,CAAC;IACH,CAAC;IAED,KAAK,UAAU,WAAW,CACzB,OAAiB,EACjB,SAAS,GAAG,KAAK;QAEjB,IAAI,CAAC;YACJ,MAAM,EAAE,IAAI,EAAE,CAAC,8BAA8B,EAAE;gBAC9C,MAAM,EAAE,OAAO,CAAC,KAAK;aACrB,CAAC,CAAC;YAEH,MAAM,EAAE,SAAS,EAAE,GAAG,MAAM,aAAa,CACxC;gBACC,MAAM,EAAE,OAAO,CAAC,KAAK;gBACrB,OAAO;gBACP,OAAO,EAAE,OAAO,CAAC,OAAO;gBACxB,SAAS;gBACT,SAAS;aACT,EACD,OAAO,CACP,CAAC;YAEF,OAAO;gBACN,OAAO,EAAE,IAAI;gBACb,OAAO,EAAE,oBAAoB,SAAS,EAAE;gBACxC,SAAS;aACT,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YAChB,MAAM,YAAY,GACjB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACxD,MAAM,EAAE,KAAK,EAAE,CAAC,6BAA6B,YAAY,EAAE,CAAC,CAAC;YAC7D,OAAO;gBACN,OAAO,EAAE,KAAK;gBACd,OAAO,EAAE,YAAY;aACrB,CAAC;QACH,CAAC;IACF,CAAC;IAED,OAAO;QACN,GAAG;QACH,WAAW;KACX,CAAC;AACH,CAAC"}
package/dist/index.d.ts CHANGED
@@ -1,4 +1,5 @@
1
1
  export * from "./types.js";
2
2
  export type { Greptor } from "./greptor.js";
3
3
  export { createGreptor } from "./greptor.js";
4
+ export type { LanguageModel } from "ai";
4
5
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,YAAY,CAAC;AAE3B,YAAY,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,YAAY,CAAC;AAE3B,YAAY,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAG7C,YAAY,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC"}
@@ -1,10 +1,10 @@
1
- import type { LlmClient } from "../llm/llm-factory.js";
1
+ import { type LanguageModel } from "ai";
2
2
  import type { DocumentRef, FileStorage } from "../storage/index.js";
3
3
  import type { Logger } from "../types.js";
4
4
  export interface ProcessorContext {
5
5
  domain: string;
6
- metadataSchema: string;
7
- llm: LlmClient;
6
+ tagSchema: string;
7
+ model: LanguageModel;
8
8
  storage: FileStorage;
9
9
  logger?: Logger;
10
10
  }
@@ -1 +1 @@
1
- {"version":3,"file":"processor.d.ts","sourceRoot":"","sources":["../../src/processing/processor.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAC;AACvD,OAAO,KAAK,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AACpE,OAAO,KAAK,EAAE,MAAM,EAAY,MAAM,aAAa,CAAC;AAMpD,MAAM,WAAW,gBAAgB;IAChC,MAAM,EAAE,MAAM,CAAC;IACf,cAAc,EAAE,MAAM,CAAC;IACvB,GAAG,EAAE,SAAS,CAAC;IACf,OAAO,EAAE,WAAW,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,eAAe;IAC/B,OAAO,EAAE,CAAC,GAAG,EAAE,WAAW,KAAK,IAAI,CAAC;IACpC,OAAO,EAAE,MAAM,WAAW,GAAG,SAAS,CAAC;IACvC,IAAI,EAAE,MAAM,MAAM,CAAC;CACnB;AAED,wBAAgB,qBAAqB,IAAI,eAAe,CAgBvD;AAgFD,wBAAgB,sBAAsB,CAAC,IAAI,EAAE;IAC5C,GAAG,EAAE,gBAAgB,CAAC;IACtB,KAAK,EAAE,eAAe,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,WAAW,CAAC,EAAE,MAAM,CAAC;CACrB,GAAG,IAAI,CAkCP;AAED,wBAAsB,2BAA2B,CAAC,IAAI,EAAE;IACvD,OAAO,EAAE,WAAW,CAAC;IACrB,KAAK,EAAE,eAAe,CAAC;IACvB,MAAM,CAAC,EAAE,MAAM,CAAC;CAChB,GAAG,OAAO,CAAC,MAAM,CAAC,CAalB"}
1
+ {"version":3,"file":"processor.d.ts","sourceRoot":"","sources":["../../src/processing/processor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,aAAa,EAAgB,MAAM,IAAI,CAAC;AAEtD,OAAO,KAAK,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AACpE,OAAO,KAAK,EAAE,MAAM,EAAQ,MAAM,aAAa,CAAC;AAIhD,MAAM,WAAW,gBAAgB;IAChC,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,aAAa,CAAC;IACrB,OAAO,EAAE,WAAW,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,eAAe;IAC/B,OAAO,EAAE,CAAC,GAAG,EAAE,WAAW,KAAK,IAAI,CAAC;IACpC,OAAO,EAAE,MAAM,WAAW,GAAG,SAAS,CAAC;IACvC,IAAI,EAAE,MAAM,MAAM,CAAC;CACnB;AAED,wBAAgB,qBAAqB,IAAI,eAAe,CAgBvD;AAmID,wBAAgB,sBAAsB,CAAC,IAAI,EAAE;IAC5C,GAAG,EAAE,gBAAgB,CAAC;IACtB,KAAK,EAAE,eAAe,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,WAAW,CAAC,EAAE,MAAM,CAAC;CACrB,GAAG,IAAI,CAkCP;AAED,wBAAsB,2BAA2B,CAAC,IAAI,EAAE;IACvD,OAAO,EAAE,WAAW,CAAC;IACrB,KAAK,EAAE,eAAe,CAAC;IACvB,MAAM,CAAC,EAAE,MAAM,CAAC;CAChB,GAAG,OAAO,CAAC,MAAM,CAAC,CAalB"}
@@ -1,6 +1,5 @@
1
+ import { generateText } from "ai";
1
2
  import YAML from "yaml";
2
- import { chunk as chunkDocument } from "./chunk.js";
3
- import { extractMetadata } from "./extract-metadata.js";
4
3
  const DEFAULT_IDLE_SLEEP_MS = 750;
5
4
  export function createProcessingQueue() {
6
5
  const items = [];
@@ -16,12 +15,8 @@ export function createProcessingQueue() {
16
15
  },
17
16
  };
18
17
  }
19
- function renderProcessedDocument(metadata, chunkMetadata, chunkContent) {
20
- const combinedMetadata = {
21
- ...metadata,
22
- chunks: chunkMetadata,
23
- };
24
- const doc = new YAML.Document(combinedMetadata);
18
+ function renderProcessedDocument(tags, chunkContent) {
19
+ const doc = new YAML.Document(tags);
25
20
  YAML.visit(doc, {
26
21
  Seq(_, node) {
27
22
  const allScalars = node.items.every((item) => YAML.isScalar(item));
@@ -30,33 +25,94 @@ function renderProcessedDocument(metadata, chunkMetadata, chunkContent) {
30
25
  }
31
26
  },
32
27
  });
33
- const renderedMetadata = doc.toString({ lineWidth: 200 });
34
- return [
35
- "---",
36
- renderedMetadata.trimEnd(),
37
- "---",
38
- "",
39
- chunkContent.trim(),
40
- ].join("\n");
28
+ const renderedTags = doc.toString({ lineWidth: 200 });
29
+ return ["---", renderedTags.trimEnd(), "---", "", chunkContent.trim()].join("\n");
30
+ }
31
+ function createProcessingPrompt(rawContent, domain, tagSchema) {
32
+ return `
33
+ # INSTRUCTIONS
34
+ Clean, chunk, and tag the raw content for **grep-based search** in the domain: ${domain}.
35
+
36
+ ## Core Principle
37
+ Optimize for **single-pass grep scanning**: a single grep hit should reveal what a chunk is about without reading other chunks.
38
+
39
+ ## Objectives
40
+ - Remove noise and boilerplate: ads, sponsors, intros/outros, CTAs, repetitions, contact or social links, and sign-offs.
41
+ - Preserve **all meaning and factual detail exactly** (facts, names, dates, numbers, ranges, uncertainty, conditions, and meaningful URLs).
42
+ - Use **minimal wording** while keeping all information.
43
+ - Chunk the content into **semantic sections** (prefer fewer, richer chunks when possible; do not pad content to reach size targets).
44
+
45
+ ## Output Format (Markdown only)
46
+
47
+ \`\`\`md
48
+ ## 01 Short descriptive title for chunk 1
49
+ field_1=value_1,value_4
50
+ field_2=value_2,
51
+ field_3=value_3,
52
+ <cleaned, condensed content>
53
+
54
+ ## 02 Short descriptive title for chunk 2
55
+ field_1=value_1
56
+ field_4=value_4
57
+ field_5=value_5,value_6
58
+ <cleaned, condensed content>
59
+ \`\`\`
60
+
61
+ ## Tagging Rules
62
+ - Use ONLY fields defined in the SCHEMA (field names must exactly match schema).
63
+ - Do not invent new fields.
64
+ - Omit fields with no value.
65
+ - One tag field per line.
66
+ - DO NOT duplicate fields. For arrays, use comma-separated values.
67
+ - For enums, use only allowed enum values from the schema.
68
+ - Use ISO-8601 for dates (YYYY-MM-DD).
69
+ - Keep tag values grep-friendly:
70
+ - snake_case where appropriate
71
+ - tickers, codes, and symbols in UPPERCASE
72
+ - Maintain a tag order as per schema.
73
+
74
+ ## Content Rules
75
+ - Output MUST be plain text or Markdown with simple formatting (headings, lists, bold/italic).
76
+ - Rewrite content to be token-efficient and grep-efficient without altering meaning.
77
+ - Content MUST be split into short paragraphs separated by blank lines.
78
+ - Each paragraph MUST be 1-3 sentences.
79
+ - Each sentence MUST be declarative and information-dense.
80
+ - Keep entities, tickers, and terms explicit; avoid pronouns.
81
+ - Normalize numbers (e.g. "1,000,000.00", "24%").
82
+ - Preserve uncertainty, ranges, and conditional statements exactly.
83
+ - Do not add interpretation, synthesis, or analysis.
84
+
85
+ # TAG SCHEMA:
86
+ ${tagSchema}
87
+
88
+ # RAW CONTENT:
89
+ ${rawContent}
90
+ `;
41
91
  }
42
92
  async function processDocument(ref, ctx) {
43
93
  // 1. Read raw content
44
- const { metadata, content } = await ctx.storage.readRawContent(ref);
45
- const contentLength = content.length;
46
- // 2. Chunk content with LLM
47
- ctx.logger?.debug?.("Chunking document", { ref, step: "chunk" });
48
- const chunkContent = await chunkDocument(content, ctx.domain, ctx.llm);
49
- // 3. Extract metadata with LLM
50
- ctx.logger?.debug?.("Extracting metadata", { ref, step: "metadata" });
51
- const chunkMetadata = await extractMetadata(chunkContent, ctx.domain, ctx.metadataSchema, ctx.llm);
52
- // 4. Parse chunk metadata and render final content
53
- const rendered = renderProcessedDocument(metadata, chunkMetadata, chunkContent);
54
- // 5. Save processed content
94
+ const { tags, content } = await ctx.storage.readRawContent(ref);
95
+ // 2. Clean + chunk + tag with a single LLM call
96
+ ctx.logger?.debug?.("Processing document", { ref, step: "single-pass" });
97
+ const prompt = createProcessingPrompt(content, ctx.domain, ctx.tagSchema);
98
+ const { text, usage } = await generateText({
99
+ model: ctx.model,
100
+ prompt,
101
+ });
102
+ if (!text) {
103
+ throw new Error("Failed to process content: empty LLM response");
104
+ }
105
+ // 3. Render final content with document-level YAML only
106
+ const rendered = renderProcessedDocument(tags, text);
107
+ // 4. Save processed content
55
108
  await ctx.storage.saveProcessedContent(ref, rendered);
56
109
  ctx.logger?.info?.("Document processed", {
57
110
  ref,
58
- chunks: chunkMetadata.length,
59
- bytes: contentLength,
111
+ inputCacheReadTokens: usage?.inputTokenDetails.cacheReadTokens,
112
+ inputCacheWriteTokens: usage?.inputTokenDetails.cacheWriteTokens,
113
+ inputTokens: usage?.inputTokens,
114
+ outputTokens: usage?.outputTokens,
115
+ totalTokens: usage?.totalTokens,
60
116
  });
61
117
  }
62
118
  function sleep(ms) {
@@ -1 +1 @@
1
- {"version":3,"file":"processor.js","sourceRoot":"","sources":["../../src/processing/processor.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AAIxB,OAAO,EAAE,KAAK,IAAI,aAAa,EAAE,MAAM,YAAY,CAAC;AACpD,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAExD,MAAM,qBAAqB,GAAG,GAAG,CAAC;AAgBlC,MAAM,UAAU,qBAAqB;IACpC,MAAM,KAAK,GAAkB,EAAE,CAAC;IAEhC,OAAO;QACN,OAAO,CAAC,GAAG;YACV,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACjB,CAAC;QAED,IAAI;YACH,OAAO,KAAK,CAAC,MAAM,CAAC;QACrB,CAAC;QAED,OAAO;YACN,OAAO,KAAK,CAAC,KAAK,EAAE,CAAC;QACtB,CAAC;KACD,CAAC;AACH,CAAC;AAED,SAAS,uBAAuB,CAC/B,QAAkB,EAClB,aAAyB,EACzB,YAAoB;IAEpB,MAAM,gBAAgB,GAAG;QACxB,GAAG,QAAQ;QACX,MAAM,EAAE,aAAa;KACrB,CAAC;IAEF,MAAM,GAAG,GAAG,IAAI,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,CAAC;IAEhD,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE;QACf,GAAG,CAAC,CAAC,EAAE,IAAI;YACV,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC;YACnE,IAAI,UAAU,EAAE,CAAC;gBAChB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;YAClB,CAAC;QACF,CAAC;KACD,CAAC,CAAC;IAEH,MAAM,gBAAgB,GAAG,GAAG,CAAC,QAAQ,CAAC,EAAE,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC;IAE1D,OAAO;QACN,KAAK;QACL,gBAAgB,CAAC,OAAO,EAAE;QAC1B,KAAK;QACL,EAAE;QACF,YAAY,CAAC,IAAI,EAAE;KACnB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACd,CAAC;AAED,KAAK,UAAU,eAAe,CAC7B,GAAgB,EAChB,GAAqB;IAErB,sBAAsB;IACtB,MAAM,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,MAAM,GAAG,CAAC,OAAO,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC;IACpE,MAAM,aAAa,GAAG,OAAO,CAAC,MAAM,CAAC;IAErC,4BAA4B;IAC5B,GAAG,CAAC,MAAM,EAAE,KAAK,EAAE,CAAC,mBAAmB,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAC;IACjE,MAAM,YAAY,GAAG,MAAM,aAAa,CAAC,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC;IAEvE,+BAA+B;IAC/B,GAAG,CAAC,MAAM,EAAE,KAAK,EAAE,CAAC,qBAAqB,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC,CAAC;IACtE,MAAM,aAAa,GAAG,MAAM,eAAe,CAC1C,YAAY,EACZ,GAAG,CAAC,MAAM,EACV,GAAG,CAAC,cAAc,EAClB,GAAG,CAAC,GAAG,CACP,CAAC;IAEF,mDAAmD;IACnD,MAAM,QAAQ,GAAG,uBAAuB,CACvC,QAAQ,EACR,aAAa,EACb,YAAY,CACZ,CAAC;IAEF,4BAA4B;IAC5B,MAAM,GAAG,CAAC,OAAO,CAAC,oBAAoB,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;IAEtD,GAAG,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,oBAAoB,EAAE;QACxC,GAAG;QACH,MAAM,EAAE,aAAa,CAAC,MAAM;QAC5B,KAAK,EAAE,aAAa;KACpB,CAAC,CAAC;AACJ,CAAC;AAED,SAAS,KAAK,CAAC,EAAU;IACxB,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;QAC9B,MAAM,CAAC,GAAG,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;QAClC,kEAAkE;QACjE,CAAuC,CAAC,KAAK,EAAE,EAAE,CAAC;IACpD,CAAC,CAAC,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,sBAAsB,CAAC,IAKtC;IACA,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,IAAI,CAAC,CAAC,CAAC;IACvD,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,IAAI,CAAC,WAAW,IAAI,qBAAqB,CAAC,CAAC;IAC5E,MAAM,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,IAAI,CAAC;IAE5B,KAAK,UAAU,UAAU,CAAC,WAAmB;QAC5C,OAAO,IAAI,EAAE,CAAC;YACb,MAAM,MAAM,GAAG,KAAK,CAAC,OAAO,EAAE,CAAC;YAC/B,IAAI,CAAC,MAAM,EAAE,CAAC;gBACb,MAAM,KAAK,CAAC,WAAW,CAAC,CAAC;gBACzB,SAAS;YACV,CAAC;YAED,GAAG,CAAC,MAAM,EAAE,KAAK,EAAE,CAAC,oBAAoB,EAAE;gBACzC,MAAM,EAAE,WAAW;gBACnB,GAAG,EAAE,MAAM;aACX,CAAC,CAAC;YACH,IAAI,CAAC;gBACJ,MAAM,eAAe,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;YACpC,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBAChB,GAAG,CAAC,MAAM,EAAE,KAAK,EAAE,CAAC,mBAAmB,EAAE;oBACxC,GAAG,EAAE,KAAK;oBACV,GAAG,EAAE,MAAM;oBACX,MAAM,EAAE,WAAW;iBACnB,CAAC,CAAC;YACJ,CAAC;QACF,CAAC;IACF,CAAC;IAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IACnB,CAAC;IAED,GAAG,CAAC,MAAM,EAAE,KAAK,EAAE,CAAC,4BAA4B,EAAE,EAAE,WAAW,EAAE,CAAC,CAAC;AACpE,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,2BAA2B,CAAC,IAIjD;IACA,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,sBAAsB,EAAE,CAAC;IAEzD,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACxB,IAAI,CAAC,MAAM,EAAE,KAAK,EAAE,CAAC,6BAA6B,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;QAC7D,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IACzB,CAAC;IAED,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrB,IAAI,CAAC,MAAM,EAAE,KAAK,EAAE,CAAC,6BAA6B,EAAE,EAAE,KAAK,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;IAC7E,CAAC;IAED,OAAO,IAAI,CAAC,MAAM,CAAC;AACpB,CAAC"}
1
+ {"version":3,"file":"processor.js","sourceRoot":"","sources":["../../src/processing/processor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAsB,YAAY,EAAE,MAAM,IAAI,CAAC;AACtD,OAAO,IAAI,MAAM,MAAM,CAAC;AAIxB,MAAM,qBAAqB,GAAG,GAAG,CAAC;AAgBlC,MAAM,UAAU,qBAAqB;IACpC,MAAM,KAAK,GAAkB,EAAE,CAAC;IAEhC,OAAO;QACN,OAAO,CAAC,GAAG;YACV,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACjB,CAAC;QAED,IAAI;YACH,OAAO,KAAK,CAAC,MAAM,CAAC;QACrB,CAAC;QAED,OAAO;YACN,OAAO,KAAK,CAAC,KAAK,EAAE,CAAC;QACtB,CAAC;KACD,CAAC;AACH,CAAC;AAED,SAAS,uBAAuB,CAAC,IAAU,EAAE,YAAoB;IAChE,MAAM,GAAG,GAAG,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;IAEpC,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE;QACf,GAAG,CAAC,CAAC,EAAE,IAAI;YACV,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC;YACnE,IAAI,UAAU,EAAE,CAAC;gBAChB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;YAClB,CAAC;QACF,CAAC;KACD,CAAC,CAAC;IAEH,MAAM,YAAY,GAAG,GAAG,CAAC,QAAQ,CAAC,EAAE,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC;IAEtD,OAAO,CAAC,KAAK,EAAE,YAAY,CAAC,OAAO,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAC1E,IAAI,CACJ,CAAC;AACH,CAAC;AAED,SAAS,sBAAsB,CAC9B,UAAkB,EAClB,MAAc,EACd,SAAiB;IAEjB,OAAO;;iFAEyE,MAAM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAoDrF,SAAS;;;EAGT,UAAU;CACX,CAAC;AACF,CAAC;AAED,KAAK,UAAU,eAAe,CAC7B,GAAgB,EAChB,GAAqB;IAErB,sBAAsB;IACtB,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,GAAG,MAAM,GAAG,CAAC,OAAO,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC;IAEhE,gDAAgD;IAChD,GAAG,CAAC,MAAM,EAAE,KAAK,EAAE,CAAC,qBAAqB,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE,aAAa,EAAE,CAAC,CAAC;IACzE,MAAM,MAAM,GAAG,sBAAsB,CAAC,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,SAAS,CAAC,CAAC;IAE1E,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,MAAM,YAAY,CAAC;QAC1C,KAAK,EAAE,GAAG,CAAC,KAAK;QAChB,MAAM;KACN,CAAC,CAAC;IAEH,IAAI,CAAC,IAAI,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAC;IAClE,CAAC;IAED,wDAAwD;IACxD,MAAM,QAAQ,GAAG,uBAAuB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;IAErD,4BAA4B;IAC5B,MAAM,GAAG,CAAC,OAAO,CAAC,oBAAoB,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;IAEtD,GAAG,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,oBAAoB,EAAE;QACxC,GAAG;QACH,oBAAoB,EAAE,KAAK,EAAE,iBAAiB,CAAC,eAAe;QAC9D,qBAAqB,EAAE,KAAK,EAAE,iBAAiB,CAAC,gBAAgB;QAChE,WAAW,EAAE,KAAK,EAAE,WAAW;QAC/B,YAAY,EAAE,KAAK,EAAE,YAAY;QACjC,WAAW,EAAE,KAAK,EAAE,WAAW;KAC/B,CAAC,CAAC;AACJ,CAAC;AAED,SAAS,KAAK,CAAC,EAAU;IACxB,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;QAC9B,MAAM,CAAC,GAAG,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;QAClC,kEAAkE;QACjE,CAAuC,CAAC,KAAK,EAAE,EAAE,CAAC;IACpD,CAAC,CAAC,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,sBAAsB,CAAC,IAKtC;IACA,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,IAAI,CAAC,CAAC,CAAC;IACvD,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,IAAI,CAAC,WAAW,IAAI,qBAAqB,CAAC,CAAC;IAC5E,MAAM,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,IAAI,CAAC;IAE5B,KAAK,UAAU,UAAU,CAAC,WAAmB;QAC5C,OAAO,IAAI,EAAE,CAAC;YACb,MAAM,MAAM,GAAG,KAAK,CAAC,OAAO,EAAE,CAAC;YAC/B,IAAI,CAAC,MAAM,EAAE,CAAC;gBACb,MAAM,KAAK,CAAC,WAAW,CAAC,CAAC;gBACzB,SAAS;YACV,CAAC;YAED,GAAG,CAAC,MAAM,EAAE,KAAK,EAAE,CAAC,oBAAoB,EAAE;gBACzC,MAAM,EAAE,WAAW;gBACnB,GAAG,EAAE,MAAM;aACX,CAAC,CAAC;YACH,IAAI,CAAC;gBACJ,MAAM,eAAe,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;YACpC,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBAChB,GAAG,CAAC,MAAM,EAAE,KAAK,EAAE,CAAC,mBAAmB,EAAE;oBACxC,GAAG,EAAE,KAAK;oBACV,GAAG,EAAE,MAAM;oBACX,MAAM,EAAE,WAAW;iBACnB,CAAC,CAAC;YACJ,CAAC;QACF,CAAC;IACF,CAAC;IAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IACnB,CAAC;IAED,GAAG,CAAC,MAAM,EAAE,KAAK,EAAE,CAAC,4BAA4B,EAAE,EAAE,WAAW,EAAE,CAAC,CAAC;AACpE,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,2BAA2B,CAAC,IAIjD;IACA,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,sBAAsB,EAAE,CAAC;IAEzD,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACxB,IAAI,CAAC,MAAM,EAAE,KAAK,EAAE,CAAC,6BAA6B,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;QAC7D,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IACzB,CAAC;IAED,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrB,IAAI,CAAC,MAAM,EAAE,KAAK,EAAE,CAAC,6BAA6B,EAAE,EAAE,KAAK,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;IAC7E,CAAC;IAED,OAAO,IAAI,CAAC,MAAM,CAAC;AACpB,CAAC"}
@@ -1,10 +1,10 @@
1
1
  import type { FileStorage } from "../storage/file-storage.js";
2
- import type { MetadataSchemaItem } from "../types.js";
2
+ import type { TagSchemaItem } from "../types.js";
3
3
  export interface SkillGeneratorOptions {
4
4
  domain: string;
5
5
  sources: string[];
6
6
  baseDir: string;
7
- metadataSchema: MetadataSchemaItem[];
7
+ tagSchema: TagSchemaItem[];
8
8
  overwrite: boolean;
9
9
  }
10
10
  /**
@@ -1 +1 @@
1
- {"version":3,"file":"skill-generator.d.ts","sourceRoot":"","sources":["../../src/skills/skill-generator.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,4BAA4B,CAAC;AAC9D,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAGtD,MAAM,WAAW,qBAAqB;IACrC,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,cAAc,EAAE,kBAAkB,EAAE,CAAC;IACrC,SAAS,EAAE,OAAO,CAAC;CACnB;AAqND;;GAEG;AACH,wBAAsB,aAAa,CAClC,OAAO,EAAE,qBAAqB,EAC9B,WAAW,EAAE,WAAW,GACtB,OAAO,CAAC;IAAE,SAAS,EAAE,MAAM,CAAA;CAAE,CAAC,CAsBhC"}
1
+ {"version":3,"file":"skill-generator.d.ts","sourceRoot":"","sources":["../../src/skills/skill-generator.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,4BAA4B,CAAC;AAC9D,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAGjD,MAAM,WAAW,qBAAqB;IACrC,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,aAAa,EAAE,CAAC;IAC3B,SAAS,EAAE,OAAO,CAAC;CACnB;AA2WD;;GAEG;AACH,wBAAsB,aAAa,CAClC,OAAO,EAAE,qBAAqB,EAC9B,WAAW,EAAE,WAAW,GACtB,OAAO,CAAC;IAAE,SAAS,EAAE,MAAM,CAAA;CAAE,CAAC,CAsBhC"}