marlarky 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +489 -0
- package/dist/adapters/faker-js-adapter.d.ts +44 -0
- package/dist/adapters/faker-js-adapter.d.ts.map +1 -0
- package/dist/adapters/faker-js-adapter.js +46 -0
- package/dist/adapters/faker-js-adapter.js.map +1 -0
- package/dist/adapters/index.d.ts +3 -0
- package/dist/adapters/index.d.ts.map +1 -0
- package/dist/adapters/index.js +3 -0
- package/dist/adapters/index.js.map +1 -0
- package/dist/adapters/simple-faker-adapter.d.ts +22 -0
- package/dist/adapters/simple-faker-adapter.d.ts.map +1 -0
- package/dist/adapters/simple-faker-adapter.js +54 -0
- package/dist/adapters/simple-faker-adapter.js.map +1 -0
- package/dist/cli.d.ts +7 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +588 -0
- package/dist/cli.js.map +1 -0
- package/dist/defaults/index.d.ts +2 -0
- package/dist/defaults/index.d.ts.map +1 -0
- package/dist/defaults/index.js +2 -0
- package/dist/defaults/index.js.map +1 -0
- package/dist/defaults/word-lists.d.ts +22 -0
- package/dist/defaults/word-lists.d.ts.map +1 -0
- package/dist/defaults/word-lists.js +124 -0
- package/dist/defaults/word-lists.js.map +1 -0
- package/dist/generator/index.d.ts +2 -0
- package/dist/generator/index.d.ts.map +1 -0
- package/dist/generator/index.js +2 -0
- package/dist/generator/index.js.map +1 -0
- package/dist/generator/text-generator.d.ts +93 -0
- package/dist/generator/text-generator.d.ts.map +1 -0
- package/dist/generator/text-generator.js +411 -0
- package/dist/generator/text-generator.js.map +1 -0
- package/dist/generator/text-generator.test.d.ts +2 -0
- package/dist/generator/text-generator.test.d.ts.map +1 -0
- package/dist/generator/text-generator.test.js +151 -0
- package/dist/generator/text-generator.test.js.map +1 -0
- package/dist/grammar/index.d.ts +5 -0
- package/dist/grammar/index.d.ts.map +1 -0
- package/dist/grammar/index.js +3 -0
- package/dist/grammar/index.js.map +1 -0
- package/dist/grammar/phrase-builders.d.ts +72 -0
- package/dist/grammar/phrase-builders.d.ts.map +1 -0
- package/dist/grammar/phrase-builders.js +241 -0
- package/dist/grammar/phrase-builders.js.map +1 -0
- package/dist/grammar/sentence-templates.d.ts +67 -0
- package/dist/grammar/sentence-templates.d.ts.map +1 -0
- package/dist/grammar/sentence-templates.js +272 -0
- package/dist/grammar/sentence-templates.js.map +1 -0
- package/dist/index.d.ts +17 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +40 -0
- package/dist/index.js.map +1 -0
- package/dist/interfaces/faker-adapter.d.ts +25 -0
- package/dist/interfaces/faker-adapter.d.ts.map +1 -0
- package/dist/interfaces/faker-adapter.js +2 -0
- package/dist/interfaces/faker-adapter.js.map +1 -0
- package/dist/interfaces/index.d.ts +4 -0
- package/dist/interfaces/index.d.ts.map +1 -0
- package/dist/interfaces/index.js +2 -0
- package/dist/interfaces/index.js.map +1 -0
- package/dist/interfaces/lexicon-store.d.ts +39 -0
- package/dist/interfaces/lexicon-store.d.ts.map +1 -0
- package/dist/interfaces/lexicon-store.js +2 -0
- package/dist/interfaces/lexicon-store.js.map +1 -0
- package/dist/interfaces/rng.d.ts +25 -0
- package/dist/interfaces/rng.d.ts.map +1 -0
- package/dist/interfaces/rng.js +2 -0
- package/dist/interfaces/rng.js.map +1 -0
- package/dist/lexicon/index.d.ts +4 -0
- package/dist/lexicon/index.d.ts.map +1 -0
- package/dist/lexicon/index.js +4 -0
- package/dist/lexicon/index.js.map +1 -0
- package/dist/lexicon/loader.d.ts +24 -0
- package/dist/lexicon/loader.d.ts.map +1 -0
- package/dist/lexicon/loader.js +47 -0
- package/dist/lexicon/loader.js.map +1 -0
- package/dist/lexicon/store.d.ts +39 -0
- package/dist/lexicon/store.d.ts.map +1 -0
- package/dist/lexicon/store.js +291 -0
- package/dist/lexicon/store.js.map +1 -0
- package/dist/lexicon/validator.d.ts +10 -0
- package/dist/lexicon/validator.d.ts.map +1 -0
- package/dist/lexicon/validator.js +273 -0
- package/dist/lexicon/validator.js.map +1 -0
- package/dist/morphology/articles.d.ts +17 -0
- package/dist/morphology/articles.d.ts.map +1 -0
- package/dist/morphology/articles.js +66 -0
- package/dist/morphology/articles.js.map +1 -0
- package/dist/morphology/articles.test.d.ts +2 -0
- package/dist/morphology/articles.test.d.ts.map +1 -0
- package/dist/morphology/articles.test.js +56 -0
- package/dist/morphology/articles.test.js.map +1 -0
- package/dist/morphology/conjugate.d.ts +46 -0
- package/dist/morphology/conjugate.d.ts.map +1 -0
- package/dist/morphology/conjugate.js +337 -0
- package/dist/morphology/conjugate.js.map +1 -0
- package/dist/morphology/conjugate.test.d.ts +2 -0
- package/dist/morphology/conjugate.test.d.ts.map +1 -0
- package/dist/morphology/conjugate.test.js +168 -0
- package/dist/morphology/conjugate.test.js.map +1 -0
- package/dist/morphology/index.d.ts +6 -0
- package/dist/morphology/index.d.ts.map +1 -0
- package/dist/morphology/index.js +9 -0
- package/dist/morphology/index.js.map +1 -0
- package/dist/morphology/normalize.d.ts +52 -0
- package/dist/morphology/normalize.d.ts.map +1 -0
- package/dist/morphology/normalize.js +127 -0
- package/dist/morphology/normalize.js.map +1 -0
- package/dist/morphology/pluralize.d.ts +17 -0
- package/dist/morphology/pluralize.d.ts.map +1 -0
- package/dist/morphology/pluralize.js +186 -0
- package/dist/morphology/pluralize.js.map +1 -0
- package/dist/morphology/pluralize.test.d.ts +2 -0
- package/dist/morphology/pluralize.test.d.ts.map +1 -0
- package/dist/morphology/pluralize.test.js +86 -0
- package/dist/morphology/pluralize.test.js.map +1 -0
- package/dist/providers/index.d.ts +2 -0
- package/dist/providers/index.d.ts.map +1 -0
- package/dist/providers/index.js +2 -0
- package/dist/providers/index.js.map +1 -0
- package/dist/providers/word-provider.d.ts +125 -0
- package/dist/providers/word-provider.d.ts.map +1 -0
- package/dist/providers/word-provider.js +266 -0
- package/dist/providers/word-provider.js.map +1 -0
- package/dist/rng/index.d.ts +2 -0
- package/dist/rng/index.d.ts.map +1 -0
- package/dist/rng/index.js +2 -0
- package/dist/rng/index.js.map +1 -0
- package/dist/rng/seedable-rng.d.ts +19 -0
- package/dist/rng/seedable-rng.d.ts.map +1 -0
- package/dist/rng/seedable-rng.js +77 -0
- package/dist/rng/seedable-rng.js.map +1 -0
- package/dist/rng/seedable-rng.test.d.ts +2 -0
- package/dist/rng/seedable-rng.test.d.ts.map +1 -0
- package/dist/rng/seedable-rng.test.js +165 -0
- package/dist/rng/seedable-rng.test.js.map +1 -0
- package/dist/rules/index.d.ts +3 -0
- package/dist/rules/index.d.ts.map +1 -0
- package/dist/rules/index.js +2 -0
- package/dist/rules/index.js.map +1 -0
- package/dist/rules/rule-engine.d.ts +78 -0
- package/dist/rules/rule-engine.d.ts.map +1 -0
- package/dist/rules/rule-engine.js +271 -0
- package/dist/rules/rule-engine.js.map +1 -0
- package/dist/transforms/config-merge.d.ts +19 -0
- package/dist/transforms/config-merge.d.ts.map +1 -0
- package/dist/transforms/config-merge.js +88 -0
- package/dist/transforms/config-merge.js.map +1 -0
- package/dist/transforms/config-merge.test.d.ts +2 -0
- package/dist/transforms/config-merge.test.d.ts.map +1 -0
- package/dist/transforms/config-merge.test.js +91 -0
- package/dist/transforms/config-merge.test.js.map +1 -0
- package/dist/transforms/default-registry.d.ts +10 -0
- package/dist/transforms/default-registry.d.ts.map +1 -0
- package/dist/transforms/default-registry.js +17 -0
- package/dist/transforms/default-registry.js.map +1 -0
- package/dist/transforms/index.d.ts +15 -0
- package/dist/transforms/index.d.ts.map +1 -0
- package/dist/transforms/index.js +20 -0
- package/dist/transforms/index.js.map +1 -0
- package/dist/transforms/pipeline.d.ts +28 -0
- package/dist/transforms/pipeline.d.ts.map +1 -0
- package/dist/transforms/pipeline.js +176 -0
- package/dist/transforms/pipeline.js.map +1 -0
- package/dist/transforms/pipeline.test.d.ts +2 -0
- package/dist/transforms/pipeline.test.d.ts.map +1 -0
- package/dist/transforms/pipeline.test.js +175 -0
- package/dist/transforms/pipeline.test.js.map +1 -0
- package/dist/transforms/protection.d.ts +16 -0
- package/dist/transforms/protection.d.ts.map +1 -0
- package/dist/transforms/protection.js +97 -0
- package/dist/transforms/protection.js.map +1 -0
- package/dist/transforms/protection.test.d.ts +2 -0
- package/dist/transforms/protection.test.d.ts.map +1 -0
- package/dist/transforms/protection.test.js +79 -0
- package/dist/transforms/protection.test.js.map +1 -0
- package/dist/transforms/registry.d.ts +25 -0
- package/dist/transforms/registry.d.ts.map +1 -0
- package/dist/transforms/registry.js +32 -0
- package/dist/transforms/registry.js.map +1 -0
- package/dist/transforms/registry.test.d.ts +2 -0
- package/dist/transforms/registry.test.d.ts.map +1 -0
- package/dist/transforms/registry.test.js +64 -0
- package/dist/transforms/registry.test.js.map +1 -0
- package/dist/transforms/tokenizer.d.ts +26 -0
- package/dist/transforms/tokenizer.d.ts.map +1 -0
- package/dist/transforms/tokenizer.js +137 -0
- package/dist/transforms/tokenizer.js.map +1 -0
- package/dist/transforms/tokenizer.test.d.ts +2 -0
- package/dist/transforms/tokenizer.test.d.ts.map +1 -0
- package/dist/transforms/tokenizer.test.js +85 -0
- package/dist/transforms/tokenizer.test.js.map +1 -0
- package/dist/transforms/transforms/biz-jargon.d.ts +7 -0
- package/dist/transforms/transforms/biz-jargon.d.ts.map +1 -0
- package/dist/transforms/transforms/biz-jargon.js +117 -0
- package/dist/transforms/transforms/biz-jargon.js.map +1 -0
- package/dist/transforms/transforms/emoji.d.ts +7 -0
- package/dist/transforms/transforms/emoji.d.ts.map +1 -0
- package/dist/transforms/transforms/emoji.js +127 -0
- package/dist/transforms/transforms/emoji.js.map +1 -0
- package/dist/transforms/transforms/index.d.ts +17 -0
- package/dist/transforms/transforms/index.d.ts.map +1 -0
- package/dist/transforms/transforms/index.js +37 -0
- package/dist/transforms/transforms/index.js.map +1 -0
- package/dist/transforms/transforms/leet.d.ts +7 -0
- package/dist/transforms/transforms/leet.d.ts.map +1 -0
- package/dist/transforms/transforms/leet.js +109 -0
- package/dist/transforms/transforms/leet.js.map +1 -0
- package/dist/transforms/transforms/mock-case.d.ts +7 -0
- package/dist/transforms/transforms/mock-case.d.ts.map +1 -0
- package/dist/transforms/transforms/mock-case.js +116 -0
- package/dist/transforms/transforms/mock-case.js.map +1 -0
- package/dist/transforms/transforms/pig-latin.d.ts +7 -0
- package/dist/transforms/transforms/pig-latin.d.ts.map +1 -0
- package/dist/transforms/transforms/pig-latin.js +132 -0
- package/dist/transforms/transforms/pig-latin.js.map +1 -0
- package/dist/transforms/transforms/pig-latin.test.d.ts +2 -0
- package/dist/transforms/transforms/pig-latin.test.d.ts.map +1 -0
- package/dist/transforms/transforms/pig-latin.test.js +77 -0
- package/dist/transforms/transforms/pig-latin.test.js.map +1 -0
- package/dist/transforms/transforms/pirate.d.ts +7 -0
- package/dist/transforms/transforms/pirate.d.ts.map +1 -0
- package/dist/transforms/transforms/pirate.js +150 -0
- package/dist/transforms/transforms/pirate.js.map +1 -0
- package/dist/transforms/transforms/redact.d.ts +7 -0
- package/dist/transforms/transforms/redact.d.ts.map +1 -0
- package/dist/transforms/transforms/redact.js +109 -0
- package/dist/transforms/transforms/redact.js.map +1 -0
- package/dist/transforms/transforms/reverse-words.d.ts +7 -0
- package/dist/transforms/transforms/reverse-words.d.ts.map +1 -0
- package/dist/transforms/transforms/reverse-words.js +88 -0
- package/dist/transforms/transforms/reverse-words.js.map +1 -0
- package/dist/transforms/transforms/transforms.test.d.ts +11 -0
- package/dist/transforms/transforms/transforms.test.d.ts.map +1 -0
- package/dist/transforms/transforms/transforms.test.js +489 -0
- package/dist/transforms/transforms/transforms.test.js.map +1 -0
- package/dist/transforms/transforms/ubbi-dubbi.d.ts +7 -0
- package/dist/transforms/transforms/ubbi-dubbi.d.ts.map +1 -0
- package/dist/transforms/transforms/ubbi-dubbi.js +120 -0
- package/dist/transforms/transforms/ubbi-dubbi.js.map +1 -0
- package/dist/transforms/transforms/uwu.d.ts +7 -0
- package/dist/transforms/transforms/uwu.d.ts.map +1 -0
- package/dist/transforms/transforms/uwu.js +106 -0
- package/dist/transforms/transforms/uwu.js.map +1 -0
- package/dist/transforms/types.d.ts +159 -0
- package/dist/transforms/types.d.ts.map +1 -0
- package/dist/transforms/types.js +22 -0
- package/dist/transforms/types.js.map +1 -0
- package/dist/types/api.d.ts +158 -0
- package/dist/types/api.d.ts.map +1 -0
- package/dist/types/api.js +6 -0
- package/dist/types/api.js.map +1 -0
- package/dist/types/config.d.ts +86 -0
- package/dist/types/config.d.ts.map +1 -0
- package/dist/types/config.js +66 -0
- package/dist/types/config.js.map +1 -0
- package/dist/types/context.d.ts +74 -0
- package/dist/types/context.d.ts.map +1 -0
- package/dist/types/context.js +83 -0
- package/dist/types/context.js.map +1 -0
- package/dist/types/index.d.ts +7 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +3 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/lexicon.d.ts +247 -0
- package/dist/types/lexicon.d.ts.map +1 -0
- package/dist/types/lexicon.js +6 -0
- package/dist/types/lexicon.js.map +1 -0
- package/examples/basic-usage.ts +48 -0
- package/examples/corporate-lexicon.ts +71 -0
- package/examples/lexicons/corporate-min.json +200 -0
- package/examples/with-tracing.ts +85 -0
- package/package.json +70 -0
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import { executePipeline, checkPipelineOrder } from './pipeline.js';
|
|
3
|
+
import { createDefaultRegistry } from './default-registry.js';
|
|
4
|
+
import { DEFAULT_PROTECTION_CONFIG } from './types.js';
|
|
5
|
+
const registry = createDefaultRegistry();
|
|
6
|
+
function makeConfig(pipeline, opts = {}) {
|
|
7
|
+
return {
|
|
8
|
+
enabled: true,
|
|
9
|
+
pipeline,
|
|
10
|
+
protection: { ...DEFAULT_PROTECTION_CONFIG },
|
|
11
|
+
strict: false,
|
|
12
|
+
autoOrder: false,
|
|
13
|
+
...opts,
|
|
14
|
+
};
|
|
15
|
+
}
|
|
16
|
+
describe('Pipeline Executor', () => {
|
|
17
|
+
it('should return unmodified text when disabled', () => {
|
|
18
|
+
const config = {
|
|
19
|
+
enabled: false,
|
|
20
|
+
pipeline: [{ id: 'pigLatin' }],
|
|
21
|
+
protection: { ...DEFAULT_PROTECTION_CONFIG },
|
|
22
|
+
strict: false,
|
|
23
|
+
};
|
|
24
|
+
const result = executePipeline('Hello world.', config, registry, 42, false);
|
|
25
|
+
expect(result.text).toBe('Hello world.');
|
|
26
|
+
expect(result.transformsApplied).toHaveLength(0);
|
|
27
|
+
});
|
|
28
|
+
it('should return unmodified text when pipeline is empty', () => {
|
|
29
|
+
const config = makeConfig([]);
|
|
30
|
+
const result = executePipeline('Hello world.', config, registry, 42, false);
|
|
31
|
+
expect(result.text).toBe('Hello world.');
|
|
32
|
+
});
|
|
33
|
+
it('should apply a single transform', () => {
|
|
34
|
+
const config = makeConfig([{ id: 'mockCase' }]);
|
|
35
|
+
const result = executePipeline('hello world.', config, registry, 42, false);
|
|
36
|
+
expect(result.text).not.toBe('hello world.');
|
|
37
|
+
expect(result.transformsApplied).toEqual(['mockCase']);
|
|
38
|
+
});
|
|
39
|
+
it('should apply multiple transforms in pipeline order (pirate then pigLatin)', () => {
|
|
40
|
+
const config = makeConfig([
|
|
41
|
+
{ id: 'pirate', params: { interjectionRate: 0 } },
|
|
42
|
+
{ id: 'pigLatin' },
|
|
43
|
+
]);
|
|
44
|
+
const result = executePipeline('You are my friend.', config, registry, 42, false);
|
|
45
|
+
// pirate: You->Ye, are->be, my->me, friend->matey
|
|
46
|
+
// pigLatin applied on pirate output
|
|
47
|
+
expect(result.transformsApplied).toEqual(['pirate', 'pigLatin']);
|
|
48
|
+
expect(result.text).not.toBe('You are my friend.');
|
|
49
|
+
});
|
|
50
|
+
it('should produce deterministic output with same seed', () => {
|
|
51
|
+
const config = makeConfig([
|
|
52
|
+
{ id: 'pirate', params: { interjectionRate: 0.5 } },
|
|
53
|
+
{ id: 'pigLatin' },
|
|
54
|
+
]);
|
|
55
|
+
const r1 = executePipeline('Hello world today.', config, registry, 42, false);
|
|
56
|
+
const r2 = executePipeline('Hello world today.', config, registry, 42, false);
|
|
57
|
+
expect(r1.text).toBe(r2.text);
|
|
58
|
+
});
|
|
59
|
+
it('should produce different output with different seeds', () => {
|
|
60
|
+
const config = makeConfig([
|
|
61
|
+
{ id: 'leet', params: { intensity: 0.5 } },
|
|
62
|
+
]);
|
|
63
|
+
const r1 = executePipeline('Hello world today.', config, registry, 42, false);
|
|
64
|
+
const r2 = executePipeline('Hello world today.', config, registry, 999, false);
|
|
65
|
+
// May or may not differ with partial intensity, but should not error
|
|
66
|
+
expect(typeof r1.text).toBe('string');
|
|
67
|
+
expect(typeof r2.text).toBe('string');
|
|
68
|
+
});
|
|
69
|
+
it('should protect tokens across all transforms', () => {
|
|
70
|
+
const config = makeConfig([{ id: 'pigLatin' }, { id: 'mockCase' }], { protection: { keepAcronyms: true } });
|
|
71
|
+
const result = executePipeline('API is great.', config, registry, 42, false);
|
|
72
|
+
// API should remain unchanged through both transforms
|
|
73
|
+
expect(result.text).toContain('API');
|
|
74
|
+
});
|
|
75
|
+
it('should skip unknown transforms in non-strict mode', () => {
|
|
76
|
+
const config = makeConfig([
|
|
77
|
+
{ id: 'nonExistent' },
|
|
78
|
+
{ id: 'mockCase' },
|
|
79
|
+
]);
|
|
80
|
+
const result = executePipeline('hello world.', config, registry, 42, false);
|
|
81
|
+
expect(result.transformsApplied).toEqual(['mockCase']);
|
|
82
|
+
});
|
|
83
|
+
it('should throw for unknown transforms in strict mode', () => {
|
|
84
|
+
const config = makeConfig([{ id: 'nonExistent' }], { strict: true });
|
|
85
|
+
expect(() => executePipeline('hello.', config, registry, 42, false)).toThrow('Unknown transform ID');
|
|
86
|
+
});
|
|
87
|
+
it('should throw for invalid params in strict mode', () => {
|
|
88
|
+
const config = makeConfig([{ id: 'leet', params: { intensity: 999 } }], { strict: true });
|
|
89
|
+
expect(() => executePipeline('hello.', config, registry, 42, false)).toThrow('invalid params');
|
|
90
|
+
});
|
|
91
|
+
it('should produce trace data when traceEnabled', () => {
|
|
92
|
+
const config = makeConfig([{ id: 'mockCase' }]);
|
|
93
|
+
const result = executePipeline('hello world.', config, registry, 42, true);
|
|
94
|
+
expect(result.outputTokens).toBeDefined();
|
|
95
|
+
expect(result.transformEvents).toBeDefined();
|
|
96
|
+
expect(result.transformEvents).toHaveLength(1);
|
|
97
|
+
expect(result.transformEvents[0].transformId).toBe('mockCase');
|
|
98
|
+
expect(result.outputTokens.length).toBeGreaterThan(0);
|
|
99
|
+
});
|
|
100
|
+
it('should auto-order pipeline when autoOrder=true', () => {
|
|
101
|
+
// mockCase (preferredOrder=50) should come after pirate (preferredOrder=10)
|
|
102
|
+
const config = makeConfig([
|
|
103
|
+
{ id: 'mockCase' },
|
|
104
|
+
{ id: 'pirate', params: { interjectionRate: 0 } },
|
|
105
|
+
], { autoOrder: true });
|
|
106
|
+
const result = executePipeline('You are my friend.', config, registry, 42, false);
|
|
107
|
+
// pirate should be applied first (order 10), then mockCase (order 50)
|
|
108
|
+
expect(result.transformsApplied).toEqual(['pirate', 'mockCase']);
|
|
109
|
+
});
|
|
110
|
+
it('should preserve punctuation order across transforms', () => {
|
|
111
|
+
const config = makeConfig([{ id: 'pigLatin' }, { id: 'mockCase' }]);
|
|
112
|
+
const result = executePipeline('Hello, world! How are you?', config, registry, 42, false);
|
|
113
|
+
// Punctuation should still be present
|
|
114
|
+
expect(result.text).toContain(',');
|
|
115
|
+
expect(result.text).toContain('!');
|
|
116
|
+
expect(result.text).toContain('?');
|
|
117
|
+
});
|
|
118
|
+
it('should preserve whitespace across transforms', () => {
|
|
119
|
+
const config = makeConfig([{ id: 'pirate', params: { interjectionRate: 0 } }]);
|
|
120
|
+
const result = executePipeline('Hello world.', config, registry, 42, false);
|
|
121
|
+
// Should have spaces between words
|
|
122
|
+
expect(result.text).toMatch(/\S+ \S+/);
|
|
123
|
+
});
|
|
124
|
+
});
|
|
125
|
+
describe('Pipeline Order Checker', () => {
|
|
126
|
+
it('should return no warnings for correctly ordered pipeline', () => {
|
|
127
|
+
const pipeline = [
|
|
128
|
+
{ id: 'pirate' }, // order 10
|
|
129
|
+
{ id: 'leet' }, // order 30
|
|
130
|
+
{ id: 'pigLatin' }, // order 40
|
|
131
|
+
{ id: 'mockCase' }, // order 50
|
|
132
|
+
];
|
|
133
|
+
const warnings = checkPipelineOrder(pipeline, registry);
|
|
134
|
+
expect(warnings).toHaveLength(0);
|
|
135
|
+
});
|
|
136
|
+
it('should warn about out-of-order transforms', () => {
|
|
137
|
+
const pipeline = [
|
|
138
|
+
{ id: 'mockCase' }, // order 50
|
|
139
|
+
{ id: 'pirate' }, // order 10
|
|
140
|
+
];
|
|
141
|
+
const warnings = checkPipelineOrder(pipeline, registry);
|
|
142
|
+
expect(warnings.length).toBeGreaterThan(0);
|
|
143
|
+
});
|
|
144
|
+
});
|
|
145
|
+
describe('Invariant Tests', () => {
|
|
146
|
+
it('should produce non-empty output for any non-empty input', () => {
|
|
147
|
+
const config = makeConfig([{ id: 'pigLatin' }]);
|
|
148
|
+
for (let seed = 1; seed <= 20; seed++) {
|
|
149
|
+
const result = executePipeline('Hello world.', config, registry, seed, false);
|
|
150
|
+
expect(result.text.length).toBeGreaterThan(0);
|
|
151
|
+
}
|
|
152
|
+
});
|
|
153
|
+
it('should preserve punctuation tokens across many seeds', () => {
|
|
154
|
+
const config = makeConfig([
|
|
155
|
+
{ id: 'pirate', params: { interjectionRate: 0 } },
|
|
156
|
+
{ id: 'pigLatin' },
|
|
157
|
+
]);
|
|
158
|
+
for (let seed = 1; seed <= 20; seed++) {
|
|
159
|
+
const result = executePipeline('Hello, world!', config, registry, seed, false);
|
|
160
|
+
expect(result.text).toContain(',');
|
|
161
|
+
expect(result.text).toContain('!');
|
|
162
|
+
}
|
|
163
|
+
});
|
|
164
|
+
it('should not introduce double spaces', () => {
|
|
165
|
+
const config = makeConfig([
|
|
166
|
+
{ id: 'pirate', params: { interjectionRate: 0 } },
|
|
167
|
+
{ id: 'bizJargon', params: { rate: 1.0 } },
|
|
168
|
+
]);
|
|
169
|
+
for (let seed = 1; seed <= 20; seed++) {
|
|
170
|
+
const result = executePipeline('You use my plan.', config, registry, seed, false);
|
|
171
|
+
expect(result.text).not.toMatch(/ /);
|
|
172
|
+
}
|
|
173
|
+
});
|
|
174
|
+
});
|
|
175
|
+
//# sourceMappingURL=pipeline.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pipeline.test.js","sourceRoot":"","sources":["../../src/transforms/pipeline.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,eAAe,EAAE,kBAAkB,EAAE,MAAM,eAAe,CAAC;AACpE,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAE9D,OAAO,EAAE,yBAAyB,EAAE,MAAM,YAAY,CAAC;AAEvD,MAAM,QAAQ,GAAG,qBAAqB,EAAE,CAAC;AAEzC,SAAS,UAAU,CAAC,QAAiE,EAAE,OAAwC,EAAE;IAC/H,OAAO;QACL,OAAO,EAAE,IAAI;QACb,QAAQ;QACR,UAAU,EAAE,EAAE,GAAG,yBAAyB,EAAE;QAC5C,MAAM,EAAE,KAAK;QACb,SAAS,EAAE,KAAK;QAChB,GAAG,IAAI;KACR,CAAC;AACJ,CAAC;AAED,QAAQ,CAAC,mBAAmB,EAAE,GAAG,EAAE;IACjC,EAAE,CAAC,6CAA6C,EAAE,GAAG,EAAE;QACrD,MAAM,MAAM,GAA2B;YACrC,OAAO,EAAE,KAAK;YACd,QAAQ,EAAE,CAAC,EAAE,EAAE,EAAE,UAAU,EAAE,CAAC;YAC9B,UAAU,EAAE,EAAE,GAAG,yBAAyB,EAAE;YAC5C,MAAM,EAAE,KAAK;SACd,CAAC;QACF,MAAM,MAAM,GAAG,eAAe,CAAC,cAAc,EAAE,MAAM,EAAE,QAAQ,EAAE,EAAE,EAAE,KAAK,CAAC,CAAC;QAC5E,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QACzC,MAAM,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sDAAsD,EAAE,GAAG,EAAE;QAC9D,MAAM,MAAM,GAAG,UAAU,CAAC,EAAE,CAAC,CAAC;QAC9B,MAAM,MAAM,GAAG,eAAe,CAAC,cAAc,EAAE,MAAM,EAAE,QAAQ,EAAE,EAAE,EAAE,KAAK,CAAC,CAAC;QAC5E,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iCAAiC,EAAE,GAAG,EAAE;QACzC,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,EAAE,EAAE,EAAE,UAAU,EAAE,CAAC,CAAC,CAAC;QAChD,MAAM,MAAM,GAAG,eAAe,CAAC,cAAc,EAAE,MAAM,EAAE,QAAQ,EAAE,EAAE,EAAE,KAAK,CAAC,CAAC;QAC5E,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAC7C,MAAM,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,OAAO,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC;IACzD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2EAA2E,EAAE,GAAG,EAAE;QACnF,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,EAAE,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,gBAAgB,EAAE,CAAC,EAAE,EAAE;YACjD,EAAE,EAAE,EAAE,UAAU,EAAE;SACnB,CAAC,CAAC;QACH,MAAM,MAAM,GAAG,eAAe,CAAC,oBAAoB,EAAE,MAAM,EAAE,QAAQ,EAAE,EAAE,EAAE,KAAK,CAAC,CAAC;QAClF,kDAAkD;QAClD,oCAAoC;QACpC,MAAM,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,OAAO,CAAC,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC,CAAC;QACjE,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oDAAoD,EAAE,GAAG,EAAE;QAC5D,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,EAAE,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,gBAAgB,EAAE,GAAG,EAAE,EAAE;YACnD,EAAE,EAAE,EAAE,UAAU,EAAE;SACnB,CAAC,CAAC;QACH,MAAM,EAAE,GAAG,eAAe,CAAC,oBAAoB,EAAE,MAAM,EAAE,QAAQ,EAAE,EAAE,EAAE,KAAK,CAAC,CAAC;QAC9E,MAAM,EAAE,GAAG,eAAe,CAAC,oBAAoB,EAAE,MAAM,EAAE,QAAQ,EAAE,EAAE,EAAE,KAAK,CAAC,CAAC;QAC9E,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC;IAChC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sDAAsD,EAAE,GAAG,EAAE;QAC9D,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE,SAAS,EAAE,GAAG,EAAE,EAAE;SAC3C,CAAC,CAAC;QACH,MAAM,EAAE,GAAG,eAAe,CAAC,oBAAoB,EAAE,MAAM,EAAE,QAAQ,EAAE,EAAE,EAAE,KAAK,CAAC,CAAC;QAC9E,MAAM,EAAE,GAAG,eAAe,CAAC,oBAAoB,EAAE,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,KAAK,CAAC,CAAC;QAC/E,qEAAqE;QACrE,MAAM,CAAC,OAAO,EAAE,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACtC,MAAM,CAAC,OAAO,EAAE,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACxC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6CAA6C,EAAE,GAAG,EAAE;QACrD,MAAM,MAAM,GAAG,UAAU,CACvB,CAAC,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE,EAAE,EAAE,UAAU,EAAE,CAAC,EACxC,EAAE,UAAU,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,EAAE,CACvC,CAAC;QACF,MAAM,MAAM,GAAG,eAAe,CAAC,eAAe,EAAE,MAAM,EAAE,QAAQ,EAAE,EAAE,EAAE,KAAK,CAAC,CAAC;QAC7E,sDAAsD;QACtD,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mDAAmD,EAAE,GAAG,EAAE;QAC3D,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,EAAE,EAAE,EAAE,aAAa,EAAE;YACrB,EAAE,EAAE,EAAE,UAAU,EAAE;SACnB,CAAC,CAAC;QACH,MAAM,MAAM,GAAG,eAAe,CAAC,cAAc,EAAE,MAAM,EAAE,QAAQ,EAAE,EAAE,EAAE,KAAK,CAAC,CAAC;QAC5E,MAAM,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,OAAO,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC;IACzD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oDAAoD,EAAE,GAAG,EAAE;QAC5D,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,EAAE,EAAE,EAAE,aAAa,EAAE,CAAC,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;QACrE,MAAM,CAAC,GAAG,EAAE,CAAC,eAAe,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,EAAE,EAAE,KAAK,CAAC,CAAC,CAAC,OAAO,CAAC,sBAAsB,CAAC,CAAC;IACvG,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gDAAgD,EAAE,GAAG,EAAE;QACxD,MAAM,MAAM,GAAG,UAAU,CACvB,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE,SAAS,EAAE,GAAG,EAAE,EAAE,CAAC,EAC5C,EAAE,MAAM,EAAE,IAAI,EAAE,CACjB,CAAC;QACF,MAAM,CAAC,GAAG,EAAE,CAAC,eAAe,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,EAAE,EAAE,KAAK,CAAC,CAAC,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC;IACjG,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6CAA6C,EAAE,GAAG,EAAE;QACrD,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,EAAE,EAAE,EAAE,UAAU,EAAE,CAAC,CAAC,CAAC;QAChD,MAAM,MAAM,GAAG,eAAe,CAAC,cAAc,EAAE,MAAM,EAAE,QAAQ,EAAE,EAAE,EAAE,IAAI,CAAC,CAAC;QAC3E,MAAM,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC,WAAW,EAAE,CAAC;QAC1C,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,WAAW,EAAE,CAAC;QAC7C,MAAM,CAAC,MAAM,CAAC,eAAgB,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAChD,MAAM,CAAC,MAAM,CAAC,eAAgB,CAAC,CAAC,CAAE,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACjE,MAAM,CAAC,MAAM,CAAC,YAAa,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IACzD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gDAAgD,EAAE,GAAG,EAAE;QACxD,4EAA4E;QAC5E,MAAM,MAAM,GAAG,UAAU,CACvB;YACE,EAAE,EAAE,EAAE,UAAU,EAAE;YAClB,EAAE,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,gBAAgB,EAAE,CAAC,EAAE,EAAE;SAClD,EACD,EAAE,SAAS,EAAE,IAAI,EAAE,CACpB,CAAC;QACF,MAAM,MAAM,GAAG,eAAe,CAAC,oBAAoB,EAAE,MAAM,EAAE,QAAQ,EAAE,EAAE,EAAE,KAAK,CAAC,CAAC;QAClF,sEAAsE;QACtE,MAAM,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,OAAO,CAAC,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC,CAAC;IACnE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qDAAqD,EAAE,GAAG,EAAE;QAC7D,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE,EAAE,EAAE,UAAU,EAAE,CAAC,CAAC,CAAC;QACpE,MAAM,MAAM,GAAG,eAAe,CAAC,4BAA4B,EAAE,MAAM,EAAE,QAAQ,EAAE,EAAE,EAAE,KAAK,CAAC,CAAC;QAC1F,sCAAsC;QACtC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;QACnC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;QACnC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;IACrC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8CAA8C,EAAE,GAAG,EAAE;QACtD,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,EAAE,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,gBAAgB,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;QAC/E,MAAM,MAAM,GAAG,eAAe,CAAC,cAAc,EAAE,MAAM,EAAE,QAAQ,EAAE,EAAE,EAAE,KAAK,CAAC,CAAC;QAC5E,mCAAmC;QACnC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,wBAAwB,EAAE,GAAG,EAAE;IACtC,EAAE,CAAC,0DAA0D,EAAE,GAAG,EAAE;QAClE,MAAM,QAAQ,GAAG;YACf,EAAE,EAAE,EAAE,QAAQ,EAAE,EAAM,WAAW;YACjC,EAAE,EAAE,EAAE,MAAM,EAAE,EAAQ,WAAW;YACjC,EAAE,EAAE,EAAE,UAAU,EAAE,EAAI,WAAW;YACjC,EAAE,EAAE,EAAE,UAAU,EAAE,EAAI,WAAW;SAClC,CAAC;QACF,MAAM,QAAQ,GAAG,kBAAkB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;QACxD,MAAM,CAAC,QAAQ,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IACnC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2CAA2C,EAAE,GAAG,EAAE;QACnD,MAAM,QAAQ,GAAG;YACf,EAAE,EAAE,EAAE,UAAU,EAAE,EAAI,WAAW;YACjC,EAAE,EAAE,EAAE,QAAQ,EAAE,EAAM,WAAW;SAClC,CAAC;QACF,MAAM,QAAQ,GAAG,kBAAkB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;QACxD,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IAC7C,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,iBAAiB,EAAE,GAAG,EAAE;IAC/B,EAAE,CAAC,yDAAyD,EAAE,GAAG,EAAE;QACjE,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,EAAE,EAAE,EAAE,UAAU,EAAE,CAAC,CAAC,CAAC;QAChD,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,IAAI,EAAE,EAAE,IAAI,EAAE,EAAE,CAAC;YACtC,MAAM,MAAM,GAAG,eAAe,CAAC,cAAc,EAAE,MAAM,EAAE,QAAQ,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC;YAC9E,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAChD,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sDAAsD,EAAE,GAAG,EAAE;QAC9D,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,EAAE,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,gBAAgB,EAAE,CAAC,EAAE,EAAE;YACjD,EAAE,EAAE,EAAE,UAAU,EAAE;SACnB,CAAC,CAAC;QACH,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,IAAI,EAAE,EAAE,IAAI,EAAE,EAAE,CAAC;YACtC,MAAM,MAAM,GAAG,eAAe,CAAC,eAAe,EAAE,MAAM,EAAE,QAAQ,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC;YAC/E,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;YACnC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;QACrC,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oCAAoC,EAAE,GAAG,EAAE;QAC5C,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,EAAE,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,gBAAgB,EAAE,CAAC,EAAE,EAAE;YACjD,EAAE,EAAE,EAAE,WAAW,EAAE,MAAM,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,EAAE;SAC3C,CAAC,CAAC;QACH,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,IAAI,EAAE,EAAE,IAAI,EAAE,EAAE,CAAC;YACtC,MAAM,MAAM,GAAG,eAAe,CAAC,kBAAkB,EAAE,MAAM,EAAE,QAAQ,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC;YAClF,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QACxC,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Protection Rules
|
|
3
|
+
* Evaluates tokens and marks them as protected based on ProtectionConfig.
|
|
4
|
+
* Protection is computed once before the pipeline runs and carried through token meta.
|
|
5
|
+
*/
|
|
6
|
+
import type { Token, ProtectionConfig } from './types.js';
|
|
7
|
+
/**
|
|
8
|
+
* Apply protection rules to all tokens.
|
|
9
|
+
* This mutates the token meta in place for efficiency.
|
|
10
|
+
*/
|
|
11
|
+
export declare function applyProtection(tokens: Token[], config: ProtectionConfig): void;
|
|
12
|
+
/**
|
|
13
|
+
* Check if a token is protected.
|
|
14
|
+
*/
|
|
15
|
+
export declare function isProtected(token: Token): boolean;
|
|
16
|
+
//# sourceMappingURL=protection.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"protection.d.ts","sourceRoot":"","sources":["../../src/transforms/protection.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EAAE,KAAK,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAgC1D;;;GAGG;AACH,wBAAgB,eAAe,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE,MAAM,EAAE,gBAAgB,GAAG,IAAI,CA0D/E;AAED;;GAEG;AACH,wBAAgB,WAAW,CAAC,KAAK,EAAE,KAAK,GAAG,OAAO,CAEjD"}
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Protection Rules
|
|
3
|
+
* Evaluates tokens and marks them as protected based on ProtectionConfig.
|
|
4
|
+
* Protection is computed once before the pipeline runs and carried through token meta.
|
|
5
|
+
*/
|
|
6
|
+
import { DEFAULT_PROTECTION_CONFIG } from './types.js';
|
|
7
|
+
/** Acronym pattern: 2+ uppercase letters */
|
|
8
|
+
const ACRONYM_REGEX = /^[A-Z]{2,}$/;
|
|
9
|
+
/** URL-like pattern: contains :// or starts with www. */
|
|
10
|
+
function isUrlLike(value) {
|
|
11
|
+
return value.includes('://') || value.startsWith('www.');
|
|
12
|
+
}
|
|
13
|
+
/** Email-like pattern: contains @ */
|
|
14
|
+
function isEmailLike(value) {
|
|
15
|
+
return value.includes('@');
|
|
16
|
+
}
|
|
17
|
+
/** Code-like heuristics */
|
|
18
|
+
function isCodeLike(value) {
|
|
19
|
+
// Contains underscore or backslash
|
|
20
|
+
if (value.includes('_') || value.includes('\\'))
|
|
21
|
+
return true;
|
|
22
|
+
// Contains / that looks like a path (not just a single slash)
|
|
23
|
+
if (value.includes('/') && value.length > 1)
|
|
24
|
+
return true;
|
|
25
|
+
// Multiple dot-separated segments (e.g., "file.txt", "a.b.c")
|
|
26
|
+
const dotParts = value.split('.');
|
|
27
|
+
if (dotParts.length > 1 && dotParts.every(p => p.length > 0))
|
|
28
|
+
return true;
|
|
29
|
+
// Mixed letters+digits (e.g., "v2", "H2O")
|
|
30
|
+
const hasLetters = /[a-zA-Z]/.test(value);
|
|
31
|
+
const hasDigits = /[0-9]/.test(value);
|
|
32
|
+
if (hasLetters && hasDigits)
|
|
33
|
+
return true;
|
|
34
|
+
return false;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Apply protection rules to all tokens.
|
|
38
|
+
* This mutates the token meta in place for efficiency.
|
|
39
|
+
*/
|
|
40
|
+
export function applyProtection(tokens, config) {
|
|
41
|
+
const cfg = { ...DEFAULT_PROTECTION_CONFIG, ...config };
|
|
42
|
+
const customRegexes = cfg.customProtectedRegex.map(r => new RegExp(r));
|
|
43
|
+
for (const token of tokens) {
|
|
44
|
+
// Only word and symbol tokens can be protected
|
|
45
|
+
// Number tokens are separately handled
|
|
46
|
+
const protections = [];
|
|
47
|
+
if (token.type === 'number' && cfg.keepNumbers) {
|
|
48
|
+
protections.push('number');
|
|
49
|
+
}
|
|
50
|
+
if (token.type === 'word') {
|
|
51
|
+
// Acronym check
|
|
52
|
+
if (cfg.keepAcronyms && ACRONYM_REGEX.test(token.value)) {
|
|
53
|
+
protections.push('acronym');
|
|
54
|
+
}
|
|
55
|
+
// URL/email check
|
|
56
|
+
if (cfg.keepUrlsEmails && (isUrlLike(token.value) || isEmailLike(token.value))) {
|
|
57
|
+
protections.push('urlLike');
|
|
58
|
+
}
|
|
59
|
+
// Code-like check
|
|
60
|
+
if (cfg.keepCodeTokens && isCodeLike(token.value)) {
|
|
61
|
+
protections.push('codeLike');
|
|
62
|
+
}
|
|
63
|
+
// Short word check
|
|
64
|
+
if (cfg.minWordLength > 0 && token.value.length < cfg.minWordLength) {
|
|
65
|
+
protections.push('shortWord');
|
|
66
|
+
}
|
|
67
|
+
// Custom regex
|
|
68
|
+
for (let idx = 0; idx < customRegexes.length; idx++) {
|
|
69
|
+
if (customRegexes[idx].test(token.value)) {
|
|
70
|
+
protections.push(`customRegex[${idx}]`);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
// Symbol tokens: check URL/email/code
|
|
75
|
+
if (token.type === 'symbol') {
|
|
76
|
+
if (cfg.keepUrlsEmails && (isUrlLike(token.value) || isEmailLike(token.value))) {
|
|
77
|
+
protections.push('urlLike');
|
|
78
|
+
}
|
|
79
|
+
if (cfg.keepCodeTokens && isCodeLike(token.value)) {
|
|
80
|
+
protections.push('codeLike');
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
if (protections.length > 0) {
|
|
84
|
+
if (!token.meta)
|
|
85
|
+
token.meta = {};
|
|
86
|
+
token.meta.protected = true;
|
|
87
|
+
token.meta.protectionsApplied = protections;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Check if a token is protected.
|
|
93
|
+
*/
|
|
94
|
+
export function isProtected(token) {
|
|
95
|
+
return token.meta?.protected === true;
|
|
96
|
+
}
|
|
97
|
+
//# sourceMappingURL=protection.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"protection.js","sourceRoot":"","sources":["../../src/transforms/protection.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,OAAO,EAAE,yBAAyB,EAAE,MAAM,YAAY,CAAC;AAEvD,4CAA4C;AAC5C,MAAM,aAAa,GAAG,aAAa,CAAC;AAEpC,yDAAyD;AACzD,SAAS,SAAS,CAAC,KAAa;IAC9B,OAAO,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;AAC3D,CAAC;AAED,qCAAqC;AACrC,SAAS,WAAW,CAAC,KAAa;IAChC,OAAO,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;AAC7B,CAAC;AAED,2BAA2B;AAC3B,SAAS,UAAU,CAAC,KAAa;IAC/B,mCAAmC;IACnC,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IAC7D,8DAA8D;IAC9D,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IACzD,8DAA8D;IAC9D,MAAM,QAAQ,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAClC,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,IAAI,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;QAAE,OAAO,IAAI,CAAC;IAC1E,2CAA2C;IAC3C,MAAM,UAAU,GAAG,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC1C,MAAM,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACtC,IAAI,UAAU,IAAI,SAAS;QAAE,OAAO,IAAI,CAAC;IACzC,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,eAAe,CAAC,MAAe,EAAE,MAAwB;IACvE,MAAM,GAAG,GAAG,EAAE,GAAG,yBAAyB,EAAE,GAAG,MAAM,EAAE,CAAC;IACxD,MAAM,aAAa,GAAG,GAAG,CAAC,oBAAoB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IAEvE,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,+CAA+C;QAC/C,uCAAuC;QACvC,MAAM,WAAW,GAAa,EAAE,CAAC;QAEjC,IAAI,KAAK,CAAC,IAAI,KAAK,QAAQ,IAAI,GAAG,CAAC,WAAW,EAAE,CAAC;YAC/C,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC7B,CAAC;QAED,IAAI,KAAK,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;YAC1B,gBAAgB;YAChB,IAAI,GAAG,CAAC,YAAY,IAAI,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;gBACxD,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAC9B,CAAC;YAED,kBAAkB;YAClB,IAAI,GAAG,CAAC,cAAc,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,WAAW,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;gBAC/E,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAC9B,CAAC;YAED,kBAAkB;YAClB,IAAI,GAAG,CAAC,cAAc,IAAI,UAAU,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;gBAClD,WAAW,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAC/B,CAAC;YAED,mBAAmB;YACnB,IAAI,GAAG,CAAC,aAAa,GAAG,CAAC,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,GAAG,CAAC,aAAa,EAAE,CAAC;gBACpE,WAAW,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAChC,CAAC;YAED,eAAe;YACf,KAAK,IAAI,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,aAAa,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC;gBACpD,IAAI,aAAa,CAAC,GAAG,CAAE,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;oBAC1C,WAAW,CAAC,IAAI,CAAC,eAAe,GAAG,GAAG,CAAC,CAAC;gBAC1C,CAAC;YACH,CAAC;QACH,CAAC;QAED,sCAAsC;QACtC,IAAI,KAAK,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC5B,IAAI,GAAG,CAAC,cAAc,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,WAAW,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;gBAC/E,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAC9B,CAAC;YACD,IAAI,GAAG,CAAC,cAAc,IAAI,UAAU,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;gBAClD,WAAW,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAC/B,CAAC;QACH,CAAC;QAED,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC3B,IAAI,CAAC,KAAK,CAAC,IAAI;gBAAE,KAAK,CAAC,IAAI,GAAG,EAAE,CAAC;YACjC,KAAK,CAAC,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;YAC5B,KAAK,CAAC,IAAI,CAAC,kBAAkB,GAAG,WAAW,CAAC;QAC9C,CAAC;IACH,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,KAAY;IACtC,OAAO,KAAK,CAAC,IAAI,EAAE,SAAS,KAAK,IAAI,CAAC;AACxC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"protection.test.d.ts","sourceRoot":"","sources":["../../src/transforms/protection.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import { applyProtection, isProtected } from './protection.js';
|
|
3
|
+
function makeWord(value) {
|
|
4
|
+
return { type: 'word', value };
|
|
5
|
+
}
|
|
6
|
+
function makeNumber(value) {
|
|
7
|
+
return { type: 'number', value };
|
|
8
|
+
}
|
|
9
|
+
function makeSymbol(value) {
|
|
10
|
+
return { type: 'symbol', value };
|
|
11
|
+
}
|
|
12
|
+
describe('Protection Rules', () => {
|
|
13
|
+
it('should protect acronyms (all uppercase 2+ letters)', () => {
|
|
14
|
+
const tokens = [makeWord('API'), makeWord('hello'), makeWord('NASA')];
|
|
15
|
+
applyProtection(tokens, { keepAcronyms: true });
|
|
16
|
+
expect(isProtected(tokens[0])).toBe(true);
|
|
17
|
+
expect(isProtected(tokens[1])).toBe(false);
|
|
18
|
+
expect(isProtected(tokens[2])).toBe(true);
|
|
19
|
+
expect(tokens[0].meta.protectionsApplied).toContain('acronym');
|
|
20
|
+
});
|
|
21
|
+
it('should protect number tokens', () => {
|
|
22
|
+
const tokens = [makeNumber('42'), makeNumber('3.14'), makeWord('hello')];
|
|
23
|
+
applyProtection(tokens, { keepNumbers: true });
|
|
24
|
+
expect(isProtected(tokens[0])).toBe(true);
|
|
25
|
+
expect(isProtected(tokens[1])).toBe(true);
|
|
26
|
+
expect(isProtected(tokens[2])).toBe(false);
|
|
27
|
+
});
|
|
28
|
+
it('should protect code-like tokens (contains underscore)', () => {
|
|
29
|
+
const tokens = [makeWord('hello'), makeWord('my_var')];
|
|
30
|
+
applyProtection(tokens, { keepCodeTokens: true });
|
|
31
|
+
expect(isProtected(tokens[0])).toBe(false);
|
|
32
|
+
// my_var won't be a word token in actual tokenizer since _ breaks word parsing
|
|
33
|
+
// but the protection still checks the value
|
|
34
|
+
});
|
|
35
|
+
it('should protect short words below minWordLength', () => {
|
|
36
|
+
const tokens = [makeWord('I'), makeWord('a'), makeWord('hello')];
|
|
37
|
+
applyProtection(tokens, { minWordLength: 2 });
|
|
38
|
+
expect(isProtected(tokens[0])).toBe(true);
|
|
39
|
+
expect(isProtected(tokens[1])).toBe(true);
|
|
40
|
+
expect(isProtected(tokens[2])).toBe(false);
|
|
41
|
+
});
|
|
42
|
+
it('should protect URL-like symbols', () => {
|
|
43
|
+
const tokens = [makeSymbol('https://example.com')];
|
|
44
|
+
applyProtection(tokens, { keepUrlsEmails: true });
|
|
45
|
+
expect(isProtected(tokens[0])).toBe(true);
|
|
46
|
+
});
|
|
47
|
+
it('should protect email-like symbols', () => {
|
|
48
|
+
const tokens = [makeSymbol('user@email.com')];
|
|
49
|
+
applyProtection(tokens, { keepUrlsEmails: true });
|
|
50
|
+
expect(isProtected(tokens[0])).toBe(true);
|
|
51
|
+
});
|
|
52
|
+
it('should apply custom regex protection', () => {
|
|
53
|
+
const tokens = [makeWord('FooBar'), makeWord('hello')];
|
|
54
|
+
applyProtection(tokens, { customProtectedRegex: ['^Foo'] });
|
|
55
|
+
expect(isProtected(tokens[0])).toBe(true);
|
|
56
|
+
expect(isProtected(tokens[1])).toBe(false);
|
|
57
|
+
});
|
|
58
|
+
it('should not protect when all protections disabled', () => {
|
|
59
|
+
const tokens = [makeWord('API'), makeNumber('42')];
|
|
60
|
+
applyProtection(tokens, {
|
|
61
|
+
keepAcronyms: false,
|
|
62
|
+
keepNumbers: false,
|
|
63
|
+
keepCodeTokens: false,
|
|
64
|
+
keepUrlsEmails: false,
|
|
65
|
+
minWordLength: 0,
|
|
66
|
+
});
|
|
67
|
+
expect(isProtected(tokens[0])).toBe(false);
|
|
68
|
+
expect(isProtected(tokens[1])).toBe(false);
|
|
69
|
+
});
|
|
70
|
+
it('should accumulate multiple protection reasons', () => {
|
|
71
|
+
// A short acronym
|
|
72
|
+
const tokens = [makeWord('AI')];
|
|
73
|
+
applyProtection(tokens, { keepAcronyms: true, minWordLength: 3 });
|
|
74
|
+
expect(isProtected(tokens[0])).toBe(true);
|
|
75
|
+
expect(tokens[0].meta.protectionsApplied).toContain('acronym');
|
|
76
|
+
expect(tokens[0].meta.protectionsApplied).toContain('shortWord');
|
|
77
|
+
});
|
|
78
|
+
});
|
|
79
|
+
//# sourceMappingURL=protection.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"protection.test.js","sourceRoot":"","sources":["../../src/transforms/protection.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,eAAe,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAG/D,SAAS,QAAQ,CAAC,KAAa;IAC7B,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC;AACjC,CAAC;AAED,SAAS,UAAU,CAAC,KAAa;IAC/B,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC;AACnC,CAAC;AAED,SAAS,UAAU,CAAC,KAAa;IAC/B,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC;AACnC,CAAC;AAED,QAAQ,CAAC,kBAAkB,EAAE,GAAG,EAAE;IAChC,EAAE,CAAC,oDAAoD,EAAE,GAAG,EAAE;QAC5D,MAAM,MAAM,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,QAAQ,CAAC,OAAO,CAAC,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC;QACtE,eAAe,CAAC,MAAM,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC,CAAC;QAChD,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC3C,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC3C,MAAM,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,IAAK,CAAC,kBAAkB,CAAC,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC;IACnE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8BAA8B,EAAE,GAAG,EAAE;QACtC,MAAM,MAAM,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,UAAU,CAAC,MAAM,CAAC,EAAE,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC;QACzE,eAAe,CAAC,MAAM,EAAE,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC,CAAC;QAC/C,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC3C,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC3C,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC9C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uDAAuD,EAAE,GAAG,EAAE;QAC/D,MAAM,MAAM,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC;QACvD,eAAe,CAAC,MAAM,EAAE,EAAE,cAAc,EAAE,IAAI,EAAE,CAAC,CAAC;QAClD,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC5C,+EAA+E;QAC/E,4CAA4C;IAC9C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gDAAgD,EAAE,GAAG,EAAE;QACxD,MAAM,MAAM,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,QAAQ,CAAC,GAAG,CAAC,EAAE,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC;QACjE,eAAe,CAAC,MAAM,EAAE,EAAE,aAAa,EAAE,CAAC,EAAE,CAAC,CAAC;QAC9C,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC3C,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC3C,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC9C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iCAAiC,EAAE,GAAG,EAAE;QACzC,MAAM,MAAM,GAAG,CAAC,UAAU,CAAC,qBAAqB,CAAC,CAAC,CAAC;QACnD,eAAe,CAAC,MAAM,EAAE,EAAE,cAAc,EAAE,IAAI,EAAE,CAAC,CAAC;QAClD,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mCAAmC,EAAE,GAAG,EAAE;QAC3C,MAAM,MAAM,GAAG,CAAC,UAAU,CAAC,gBAAgB,CAAC,CAAC,CAAC;QAC9C,eAAe,CAAC,MAAM,EAAE,EAAE,cAAc,EAAE,IAAI,EAAE,CAAC,CAAC;QAClD,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sCAAsC,EAAE,GAAG,EAAE;QAC9C,MAAM,MAAM,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC;QACvD,eAAe,CAAC,MAAM,EAAE,EAAE,oBAAoB,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAC5D,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC3C,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC9C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kDAAkD,EAAE,GAAG,EAAE;QAC1D,MAAM,MAAM,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;QACnD,eAAe,CAAC,MAAM,EAAE;YACtB,YAAY,EAAE,KAAK;YACnB,WAAW,EAAE,KAAK;YAClB,cAAc,EAAE,KAAK;YACrB,cAAc,EAAE,KAAK;YACrB,aAAa,EAAE,CAAC;SACjB,CAAC,CAAC;QACH,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC9C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+CAA+C,EAAE,GAAG,EAAE;QACvD,kBAAkB;QAClB,MAAM,MAAM,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC;QAChC,eAAe,CAAC,MAAM,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,aAAa,EAAE,CAAC,EAAE,CAAC,CAAC;QAClE,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC3C,MAAM,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,IAAK,CAAC,kBAAkB,CAAC,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC;QACjE,MAAM,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,IAAK,CAAC,kBAAkB,CAAC,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;IACrE,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Transform Registry
|
|
3
|
+
* Stores and retrieves registered IOutputTransform implementations.
|
|
4
|
+
*/
|
|
5
|
+
import type { IOutputTransform } from './types.js';
|
|
6
|
+
export declare class TransformRegistry {
|
|
7
|
+
private transforms;
|
|
8
|
+
/**
|
|
9
|
+
* Register a transform. Overwrites if ID already exists.
|
|
10
|
+
*/
|
|
11
|
+
register(transform: IOutputTransform): void;
|
|
12
|
+
/**
|
|
13
|
+
* Get a transform by ID.
|
|
14
|
+
*/
|
|
15
|
+
get(id: string): IOutputTransform | null;
|
|
16
|
+
/**
|
|
17
|
+
* List all registered transforms.
|
|
18
|
+
*/
|
|
19
|
+
list(): IOutputTransform[];
|
|
20
|
+
/**
|
|
21
|
+
* Check if a transform ID is registered.
|
|
22
|
+
*/
|
|
23
|
+
has(id: string): boolean;
|
|
24
|
+
}
|
|
25
|
+
//# sourceMappingURL=registry.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"registry.d.ts","sourceRoot":"","sources":["../../src/transforms/registry.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAEnD,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,UAAU,CAA4C;IAE9D;;OAEG;IACH,QAAQ,CAAC,SAAS,EAAE,gBAAgB,GAAG,IAAI;IAI3C;;OAEG;IACH,GAAG,CAAC,EAAE,EAAE,MAAM,GAAG,gBAAgB,GAAG,IAAI;IAIxC;;OAEG;IACH,IAAI,IAAI,gBAAgB,EAAE;IAI1B;;OAEG;IACH,GAAG,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO;CAGzB"}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Transform Registry
|
|
3
|
+
* Stores and retrieves registered IOutputTransform implementations.
|
|
4
|
+
*/
|
|
5
|
+
export class TransformRegistry {
|
|
6
|
+
transforms = new Map();
|
|
7
|
+
/**
|
|
8
|
+
* Register a transform. Overwrites if ID already exists.
|
|
9
|
+
*/
|
|
10
|
+
register(transform) {
|
|
11
|
+
this.transforms.set(transform.id, transform);
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Get a transform by ID.
|
|
15
|
+
*/
|
|
16
|
+
get(id) {
|
|
17
|
+
return this.transforms.get(id) ?? null;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* List all registered transforms.
|
|
21
|
+
*/
|
|
22
|
+
list() {
|
|
23
|
+
return Array.from(this.transforms.values());
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Check if a transform ID is registered.
|
|
27
|
+
*/
|
|
28
|
+
has(id) {
|
|
29
|
+
return this.transforms.has(id);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
//# sourceMappingURL=registry.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"registry.js","sourceRoot":"","sources":["../../src/transforms/registry.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAIH,MAAM,OAAO,iBAAiB;IACpB,UAAU,GAAkC,IAAI,GAAG,EAAE,CAAC;IAE9D;;OAEG;IACH,QAAQ,CAAC,SAA2B;QAClC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,EAAE,SAAS,CAAC,CAAC;IAC/C,CAAC;IAED;;OAEG;IACH,GAAG,CAAC,EAAU;QACZ,OAAO,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,IAAI,CAAC;IACzC,CAAC;IAED;;OAEG;IACH,IAAI;QACF,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC;IAC9C,CAAC;IAED;;OAEG;IACH,GAAG,CAAC,EAAU;QACZ,OAAO,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IACjC,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"registry.test.d.ts","sourceRoot":"","sources":["../../src/transforms/registry.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import { TransformRegistry } from './registry.js';
|
|
3
|
+
import { createDefaultRegistry } from './default-registry.js';
|
|
4
|
+
function makeMockTransform(id) {
|
|
5
|
+
return {
|
|
6
|
+
id,
|
|
7
|
+
version: '1.0.0',
|
|
8
|
+
capabilities: {
|
|
9
|
+
requiresTrace: false,
|
|
10
|
+
posAware: false,
|
|
11
|
+
deterministic: true,
|
|
12
|
+
safeToStack: true,
|
|
13
|
+
},
|
|
14
|
+
validateParams: () => ({ valid: true, errors: [] }),
|
|
15
|
+
apply: (input) => ({ tokens: input.tokens }),
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
describe('TransformRegistry', () => {
|
|
19
|
+
it('should register and retrieve a transform', () => {
|
|
20
|
+
const registry = new TransformRegistry();
|
|
21
|
+
const transform = makeMockTransform('test');
|
|
22
|
+
registry.register(transform);
|
|
23
|
+
expect(registry.get('test')).toBe(transform);
|
|
24
|
+
});
|
|
25
|
+
it('should return null for unknown transform', () => {
|
|
26
|
+
const registry = new TransformRegistry();
|
|
27
|
+
expect(registry.get('nonexistent')).toBeNull();
|
|
28
|
+
});
|
|
29
|
+
it('should list all registered transforms', () => {
|
|
30
|
+
const registry = new TransformRegistry();
|
|
31
|
+
registry.register(makeMockTransform('a'));
|
|
32
|
+
registry.register(makeMockTransform('b'));
|
|
33
|
+
expect(registry.list()).toHaveLength(2);
|
|
34
|
+
});
|
|
35
|
+
it('should overwrite on re-register', () => {
|
|
36
|
+
const registry = new TransformRegistry();
|
|
37
|
+
const t1 = makeMockTransform('test');
|
|
38
|
+
const t2 = makeMockTransform('test');
|
|
39
|
+
registry.register(t1);
|
|
40
|
+
registry.register(t2);
|
|
41
|
+
expect(registry.get('test')).toBe(t2);
|
|
42
|
+
expect(registry.list()).toHaveLength(1);
|
|
43
|
+
});
|
|
44
|
+
it('should check if transform exists', () => {
|
|
45
|
+
const registry = new TransformRegistry();
|
|
46
|
+
registry.register(makeMockTransform('exists'));
|
|
47
|
+
expect(registry.has('exists')).toBe(true);
|
|
48
|
+
expect(registry.has('missing')).toBe(false);
|
|
49
|
+
});
|
|
50
|
+
});
|
|
51
|
+
describe('Default Registry', () => {
|
|
52
|
+
it('should contain all 10 V1 transforms', () => {
|
|
53
|
+
const registry = createDefaultRegistry();
|
|
54
|
+
const expectedIds = [
|
|
55
|
+
'pigLatin', 'ubbiDubbi', 'leet', 'uwu', 'pirate',
|
|
56
|
+
'redact', 'emoji', 'mockCase', 'reverseWords', 'bizJargon',
|
|
57
|
+
];
|
|
58
|
+
for (const id of expectedIds) {
|
|
59
|
+
expect(registry.has(id), `Missing transform: ${id}`).toBe(true);
|
|
60
|
+
}
|
|
61
|
+
expect(registry.list()).toHaveLength(10);
|
|
62
|
+
});
|
|
63
|
+
});
|
|
64
|
+
//# sourceMappingURL=registry.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"registry.test.js","sourceRoot":"","sources":["../../src/transforms/registry.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAClD,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAG9D,SAAS,iBAAiB,CAAC,EAAU;IACnC,OAAO;QACL,EAAE;QACF,OAAO,EAAE,OAAO;QAChB,YAAY,EAAE;YACZ,aAAa,EAAE,KAAK;YACpB,QAAQ,EAAE,KAAK;YACf,aAAa,EAAE,IAAI;YACnB,WAAW,EAAE,IAAI;SAClB;QACD,cAAc,EAAE,GAAG,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC;QACnD,KAAK,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,EAAE,MAAM,EAAE,KAAK,CAAC,MAAM,EAAE,CAAC;KAC7C,CAAC;AACJ,CAAC;AAED,QAAQ,CAAC,mBAAmB,EAAE,GAAG,EAAE;IACjC,EAAE,CAAC,0CAA0C,EAAE,GAAG,EAAE;QAClD,MAAM,QAAQ,GAAG,IAAI,iBAAiB,EAAE,CAAC;QACzC,MAAM,SAAS,GAAG,iBAAiB,CAAC,MAAM,CAAC,CAAC;QAC5C,QAAQ,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;QAC7B,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,GAAG,EAAE;QAClD,MAAM,QAAQ,GAAG,IAAI,iBAAiB,EAAE,CAAC;QACzC,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC;IACjD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uCAAuC,EAAE,GAAG,EAAE;QAC/C,MAAM,QAAQ,GAAG,IAAI,iBAAiB,EAAE,CAAC;QACzC,QAAQ,CAAC,QAAQ,CAAC,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC;QAC1C,QAAQ,CAAC,QAAQ,CAAC,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC;QAC1C,MAAM,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IAC1C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iCAAiC,EAAE,GAAG,EAAE;QACzC,MAAM,QAAQ,GAAG,IAAI,iBAAiB,EAAE,CAAC;QACzC,MAAM,EAAE,GAAG,iBAAiB,CAAC,MAAM,CAAC,CAAC;QACrC,MAAM,EAAE,GAAG,iBAAiB,CAAC,MAAM,CAAC,CAAC;QACrC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;QACtB,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;QACtB,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACtC,MAAM,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IAC1C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kCAAkC,EAAE,GAAG,EAAE;QAC1C,MAAM,QAAQ,GAAG,IAAI,iBAAiB,EAAE,CAAC;QACzC,QAAQ,CAAC,QAAQ,CAAC,iBAAiB,CAAC,QAAQ,CAAC,CAAC,CAAC;QAC/C,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC1C,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC9C,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,kBAAkB,EAAE,GAAG,EAAE;IAChC,EAAE,CAAC,qCAAqC,EAAE,GAAG,EAAE;QAC7C,MAAM,QAAQ,GAAG,qBAAqB,EAAE,CAAC;QACzC,MAAM,WAAW,GAAG;YAClB,UAAU,EAAE,WAAW,EAAE,MAAM,EAAE,KAAK,EAAE,QAAQ;YAChD,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE,cAAc,EAAE,WAAW;SAC3D,CAAC;QACF,KAAK,MAAM,EAAE,IAAI,WAAW,EAAE,CAAC;YAC7B,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,sBAAsB,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAClE,CAAC;QACD,MAAM,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Output Transform Tokenizer & Renderer
|
|
3
|
+
* Single-pass O(n) tokenizer that preserves exact whitespace and punctuation.
|
|
4
|
+
* Re-rendering via render() is lossless.
|
|
5
|
+
*/
|
|
6
|
+
import type { Token, TokenType } from './types.js';
|
|
7
|
+
/**
|
|
8
|
+
* Tokenize a string into Token[] in a single pass.
|
|
9
|
+
*
|
|
10
|
+
* Token types:
|
|
11
|
+
* - word: sequences of ASCII letters, may include internal apostrophes (e.g., "don't")
|
|
12
|
+
* - number: digits with optional decimals/percent (e.g., "12", "3.14", "60%")
|
|
13
|
+
* - punct: individual punctuation characters
|
|
14
|
+
* - whitespace: sequences of whitespace characters
|
|
15
|
+
* - symbol: anything else
|
|
16
|
+
*/
|
|
17
|
+
export declare function tokenize(input: string): Token[];
|
|
18
|
+
/**
|
|
19
|
+
* Render tokens back to a string. Lossless: render(tokenize(s)) === s
|
|
20
|
+
*/
|
|
21
|
+
export declare function render(tokens: Token[]): string;
|
|
22
|
+
/**
|
|
23
|
+
* Get the type classification of a single character (utility).
|
|
24
|
+
*/
|
|
25
|
+
export declare function classifyChar(ch: string): TokenType;
|
|
26
|
+
//# sourceMappingURL=tokenizer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tokenizer.d.ts","sourceRoot":"","sources":["../../src/transforms/tokenizer.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AA4BnD;;;;;;;;;GASG;AACH,wBAAgB,QAAQ,CAAC,KAAK,EAAE,MAAM,GAAG,KAAK,EAAE,CA8E/C;AAED;;GAEG;AACH,wBAAgB,MAAM,CAAC,MAAM,EAAE,KAAK,EAAE,GAAG,MAAM,CAM9C;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,EAAE,EAAE,MAAM,GAAG,SAAS,CAMlD"}
|