@jaypie/mcp 0.8.47 → 0.8.48
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -9,7 +9,7 @@ import { gt } from 'semver';
|
|
|
9
9
|
/**
|
|
10
10
|
* Docs Suite - Documentation services (skill, version, release_notes)
|
|
11
11
|
*/
|
|
12
|
-
const BUILD_VERSION_STRING = "@jaypie/mcp@0.8.
|
|
12
|
+
const BUILD_VERSION_STRING = "@jaypie/mcp@0.8.48#ce72b43d"
|
|
13
13
|
;
|
|
14
14
|
const __filename$1 = fileURLToPath(import.meta.url);
|
|
15
15
|
const __dirname$1 = path.dirname(__filename$1);
|
package/package.json
CHANGED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
---
|
|
2
|
+
version: 0.3.2
|
|
3
|
+
date: 2026-04-24
|
|
4
|
+
summary: Adds fab.corpus() for seeded corpus text generation
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Changes
|
|
8
|
+
|
|
9
|
+
- New `fab.corpus(words?, options?)` method on `Fabricator` for generating deterministic prose fixtures.
|
|
10
|
+
- Default 108 words; pass a number for any other size.
|
|
11
|
+
- Custom corpus support: pass `corpus` (raw text) or `words` (explicit weights). Custom and default English pools blend 50/50 by default; tune with `blend` or skip defaults with `replaceDefaults: true`.
|
|
12
|
+
- Custom token functions via `functions` option — each function receives the fabricator and emits one token per draw, with weight subtracted from the main word stream. Useful for UUIDs, dollar amounts, emails, and other non-word-shaped tokens. Shorthand `[fn, weight]` for one function, `[[fn1, w1], [fn2, w2]]` for many.
|
|
13
|
+
- Full passthrough of typo rate, phonotactic rate, sentence/punctuation density, and a `chars` escape hatch for char-length output.
|
|
14
|
+
- `CorpusOptions`, `CorpusTokenFunction`, and `CorpusFunctionEntry` types exported from `@jaypie/fabricator`.
|
|
15
|
+
|
|
16
|
+
## Determinism contract
|
|
17
|
+
|
|
18
|
+
- Each call advances the fabricator's faker state, so successive calls with the same params return different output (`fab.corpus(100) !== fab.corpus(100)`).
|
|
19
|
+
- Replaying from a fresh `fabricator(seed)` reproduces the same sequence.
|
|
20
|
+
- Word count and options are folded into the per-call seed, so different params produce independent streams — `corpus(100)` and `corpus(101)` are not off-by-one variants of each other.
|
|
21
|
+
|
|
22
|
+
## Why
|
|
23
|
+
|
|
24
|
+
Test fixtures and seeded benchmarks frequently want bulk prose with stable, reproducible content. `lorem` is too uniform; ad-hoc generators are non-deterministic. `corpus()` fills that gap with a single ergonomic call that respects the fabricator's seeding contract.
|
|
25
|
+
|
|
26
|
+
## Implementation note
|
|
27
|
+
|
|
28
|
+
Generator internals (`babble`, `phonotactic`, English word/typo data, `Rng`) live in `src/corpus/` as a private vendored module. They are not exported — only the `corpus()` method and `CorpusOptions` type.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
---
|
|
2
|
+
version: 0.8.48
|
|
3
|
+
date: 2026-04-24
|
|
4
|
+
summary: Update fabricator skill with corpus() coverage
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Changes
|
|
8
|
+
|
|
9
|
+
- Added a Corpus section to `skill("fabricator")` covering `fab.corpus()`: signatures, determinism contract (advance per call, replay across runs, params-folded-into-seed), custom corpus blending (`corpus`, `words`, `blend`, `replaceDefaults`), and custom token functions (`functions` shorthand and array forms).
|
|
10
|
+
- Documented the full `CorpusOptions` shape and `CorpusTokenFunction` type.
|
package/skills/fabricator.md
CHANGED
|
@@ -85,6 +85,82 @@ fab.generate.person();
|
|
|
85
85
|
- **RARE (2.1%)**: firstName is a surname, lastName is hyphenated
|
|
86
86
|
- **EPIC (0.307%)**: double middle names
|
|
87
87
|
|
|
88
|
+
### Corpus
|
|
89
|
+
|
|
90
|
+
Generate deterministic prose for fixtures, snapshots, or seeded benchmarks.
|
|
91
|
+
|
|
92
|
+
```typescript
|
|
93
|
+
const fab = fabricator("seed");
|
|
94
|
+
|
|
95
|
+
fab.corpus(); // 108 words of English-ish prose (default)
|
|
96
|
+
fab.corpus(1000); // 1000 words
|
|
97
|
+
fab.corpus({ wordsPerPeriod: 10 });
|
|
98
|
+
fab.corpus(500, { typoRate: 0.20 });
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
#### Determinism contract
|
|
102
|
+
|
|
103
|
+
- Each call **advances** the fabricator's faker state, so successive calls with the same params return different output (`fab.corpus(100) !== fab.corpus(100)`).
|
|
104
|
+
- Replaying from a fresh `fabricator(seed)` reproduces the same sequence.
|
|
105
|
+
- Word count and options are folded into the per-call seed, so different params produce **independent** streams. `corpus(100)` and `corpus(101)` differ across the whole text — not by one word.
|
|
106
|
+
|
|
107
|
+
#### Custom corpus
|
|
108
|
+
|
|
109
|
+
Mix domain vocabulary into the output. By default the custom pool blends 50/50 with default English; typo (~6%) and phonotactic invention (~3%) rates stay at their defaults.
|
|
110
|
+
|
|
111
|
+
```typescript
|
|
112
|
+
fab.corpus(500, { corpus: deployLogText }); // raw text → derived weights
|
|
113
|
+
fab.corpus(500, { words: [["deploy", 5], ["lambda", 2]] });
|
|
114
|
+
|
|
115
|
+
fab.corpus(500, { corpus: deployLogText, blend: 0.7 }); // tune the mix
|
|
116
|
+
fab.corpus(500, { corpus: deployLogText, replaceDefaults: true }); // pure custom
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
#### Custom token functions
|
|
120
|
+
|
|
121
|
+
For non-word-shaped tokens (UUIDs, dollar amounts, IDs), pass `functions`. Each function receives the fabricator and emits one token per draw. Weight is the share of total content tokens taken from the main word stream.
|
|
122
|
+
|
|
123
|
+
```typescript
|
|
124
|
+
// Shorthand for a single function
|
|
125
|
+
fab.corpus(500, {
|
|
126
|
+
functions: [({ fab }) => fab.string.uuid(), 0.03],
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
// Multiple functions
|
|
130
|
+
fab.corpus(500, {
|
|
131
|
+
functions: [
|
|
132
|
+
[({ fab }) => fab.string.uuid(), 0.03],
|
|
133
|
+
[({ fab }) => "$" + fab.finance.amount(), 0.04],
|
|
134
|
+
[({ fab }) => fab.internet.email(), 0.02],
|
|
135
|
+
],
|
|
136
|
+
});
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
#### Full options
|
|
140
|
+
|
|
141
|
+
```typescript
|
|
142
|
+
interface CorpusOptions {
|
|
143
|
+
corpus?: string; // raw text → derive weights
|
|
144
|
+
words?: ReadonlyArray<readonly [string, number]>; // explicit weights
|
|
145
|
+
blend?: number; // share given to custom pool, default 0.5
|
|
146
|
+
replaceDefaults?: boolean; // skip default English entirely
|
|
147
|
+
typos?: ReadonlyArray<readonly [string, number]>; // override typo pool
|
|
148
|
+
phonotactic?: PhonotacticOptions; // tune invented words
|
|
149
|
+
typoRate?: number; // default 0.06
|
|
150
|
+
phonotacticRate?: number; // default 0.03
|
|
151
|
+
wordsPerPeriod?: number; // default 17
|
|
152
|
+
wordsPerComma?: number; // default 22
|
|
153
|
+
periodsPerBreak?: number; // default 5
|
|
154
|
+
sentences?: boolean; // default true
|
|
155
|
+
chars?: number; // generate by char length instead of word count
|
|
156
|
+
functions?: // custom token functions
|
|
157
|
+
| readonly [CorpusTokenFunction, number]
|
|
158
|
+
| ReadonlyArray<readonly [CorpusTokenFunction, number]>;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
type CorpusTokenFunction = (params: { fab: Fabricator }) => string;
|
|
162
|
+
```
|
|
163
|
+
|
|
88
164
|
## CHANCE Constants
|
|
89
165
|
|
|
90
166
|
```typescript
|