@far-world-labs/verblets 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.cursor/launch.json +30 -0
- package/.cursor/settings.json +20 -0
- package/.github/workflows/branch-protection.yml +22 -0
- package/.github/workflows/ci.yml +120 -0
- package/.prettierrc +6 -0
- package/.release-it.json +4 -1
- package/.vscode/launch.json +31 -0
- package/AGENTS.md +220 -0
- package/DEVELOPING.md +105 -0
- package/README.md +254 -0
- package/eslint.config.js +80 -0
- package/package.json +29 -17
- package/scripts/generate-test/index.js +29 -3
- package/scripts/runner/index.js +26 -0
- package/scripts/simple-editor/index.js +29 -18
- package/scripts/summarize-files/index.js +28 -4
- package/src/chains/README.md +30 -0
- package/src/chains/anonymize/README.md +21 -0
- package/src/chains/anonymize/index.examples.js +75 -0
- package/src/chains/anonymize/index.js +121 -0
- package/src/chains/anonymize/index.spec.js +78 -0
- package/src/chains/bulk-central-tendency/index.examples.js +138 -0
- package/src/chains/bulk-central-tendency/index.js +91 -0
- package/src/chains/bulk-filter/README.md +21 -0
- package/src/chains/bulk-filter/index.examples.js +22 -0
- package/src/chains/bulk-filter/index.js +58 -0
- package/src/chains/bulk-filter/index.spec.js +38 -0
- package/src/chains/bulk-find/README.md +16 -0
- package/src/chains/bulk-find/index.examples.js +20 -0
- package/src/chains/bulk-find/index.js +30 -0
- package/src/chains/bulk-find/index.spec.js +26 -0
- package/src/chains/bulk-group/README.md +23 -0
- package/src/chains/bulk-group/index.examples.js +18 -0
- package/src/chains/bulk-group/index.js +34 -0
- package/src/chains/bulk-group/index.spec.js +41 -0
- package/src/chains/bulk-map/README.md +43 -0
- package/src/chains/bulk-map/index.examples.js +17 -0
- package/src/chains/bulk-map/index.js +86 -0
- package/src/chains/bulk-map/index.spec.js +44 -0
- package/src/chains/bulk-reduce/README.md +12 -0
- package/src/chains/bulk-reduce/index.examples.js +15 -0
- package/src/chains/bulk-reduce/index.js +13 -0
- package/src/chains/bulk-reduce/index.spec.js +25 -0
- package/src/chains/bulk-score/README.md +16 -0
- package/src/chains/bulk-score/bulk-score-result.json +18 -0
- package/src/chains/bulk-score/index.examples.js +22 -0
- package/src/chains/bulk-score/index.js +133 -0
- package/src/chains/bulk-score/index.spec.js +30 -0
- package/src/chains/category-samples/README.md +61 -0
- package/src/chains/category-samples/index.examples.js +103 -0
- package/src/chains/category-samples/index.js +134 -0
- package/src/chains/collect-terms/README.md +12 -0
- package/src/chains/collect-terms/index.examples.js +16 -0
- package/src/chains/collect-terms/index.js +44 -0
- package/src/chains/collect-terms/index.spec.js +25 -0
- package/src/chains/date/README.md +12 -0
- package/src/chains/date/index.examples.js +47 -0
- package/src/chains/date/index.js +74 -0
- package/src/chains/date/index.spec.js +62 -0
- package/src/chains/disambiguate/README.md +22 -0
- package/src/chains/disambiguate/disambiguate-meanings-result.json +16 -0
- package/src/chains/disambiguate/index.examples.js +18 -0
- package/src/chains/disambiguate/index.js +92 -0
- package/src/chains/disambiguate/index.spec.js +25 -0
- package/src/chains/dismantle/README.md +67 -0
- package/src/chains/dismantle/dismantle.examples.js +27 -0
- package/src/chains/dismantle/index.js +6 -17
- package/src/chains/dismantle/index.spec.js +1 -2
- package/src/chains/expect/README.md +171 -0
- package/src/chains/expect/index.examples.js +146 -0
- package/src/chains/expect/index.js +173 -0
- package/src/chains/expect/index.spec.js +324 -0
- package/src/chains/filter-ambiguous/README.md +11 -0
- package/src/chains/filter-ambiguous/index.examples.js +20 -0
- package/src/chains/filter-ambiguous/index.js +49 -0
- package/src/chains/filter-ambiguous/index.spec.js +31 -0
- package/src/chains/glossary/README.md +19 -0
- package/src/chains/glossary/index.examples.js +386 -0
- package/src/chains/glossary/index.js +75 -0
- package/src/chains/glossary/index.spec.js +19 -0
- package/src/chains/intersections/README.md +152 -0
- package/src/chains/intersections/index.examples.js +279 -0
- package/src/chains/intersections/index.js +366 -0
- package/src/chains/intersections/intersection-result.json +38 -0
- package/src/chains/list/index.examples.js +12 -16
- package/src/chains/list/index.js +106 -53
- package/src/chains/list/index.spec.js +8 -9
- package/src/chains/list/list-result.json +16 -0
- package/src/chains/llm-logger/README.md +208 -0
- package/src/chains/llm-logger/index.js +205 -0
- package/src/chains/llm-logger/index.spec.js +330 -0
- package/src/chains/questions/index.examples.js +2 -1
- package/src/chains/questions/index.js +14 -15
- package/src/chains/scan-js/index.js +6 -9
- package/src/chains/set-interval/README.md +81 -0
- package/src/chains/set-interval/index.examples.js +36 -0
- package/src/chains/set-interval/index.js +131 -0
- package/src/chains/set-interval/index.spec.js +70 -0
- package/src/chains/socratic/README.md +17 -0
- package/src/chains/socratic/index.js +64 -0
- package/src/chains/socratic/index.spec.js +24 -0
- package/src/chains/sort/index.examples.js +3 -7
- package/src/chains/sort/index.js +65 -15
- package/src/chains/sort/index.spec.js +5 -8
- package/src/chains/sort/sort-result.json +16 -0
- package/src/chains/summary-map/README.md +9 -1
- package/src/chains/summary-map/index.examples.js +9 -2
- package/src/chains/summary-map/index.js +43 -25
- package/src/chains/summary-map/index.spec.js +78 -3
- package/src/chains/test/index.js +9 -13
- package/src/chains/test-advice/index.js +4 -5
- package/src/chains/themes/README.md +20 -0
- package/src/chains/themes/index.examples.js +17 -0
- package/src/chains/themes/index.js +28 -0
- package/src/chains/themes/index.spec.js +19 -0
- package/src/chains/veiled-variants/index.examples.js +18 -0
- package/src/chains/veiled-variants/index.js +107 -0
- package/src/chains/veiled-variants/index.spec.js +40 -0
- package/src/constants/common.js +0 -2
- package/src/constants/models.js +172 -0
- package/src/index.js +178 -18
- package/src/json-schemas/README.md +13 -0
- package/src/json-schemas/index.js +8 -14
- package/src/json-schemas/schema-dot-org-photograph.json +11 -5
- package/src/json-schemas/schema-dot-org-place.json +78 -5
- package/src/lib/README.md +26 -0
- package/src/lib/bulk-filter/README.md +22 -0
- package/src/lib/bulk-filter/index.examples.js +27 -0
- package/src/lib/bulk-filter/index.js +63 -0
- package/src/lib/bulk-filter/index.spec.js +38 -0
- package/src/lib/bulk-find/README.md +18 -0
- package/src/lib/bulk-find/index.examples.js +19 -0
- package/src/lib/bulk-find/index.js +30 -0
- package/src/lib/bulk-find/index.spec.js +41 -0
- package/src/lib/chatgpt/index.js +63 -43
- package/src/lib/combinations/index.js +30 -0
- package/src/lib/combinations/index.spec.js +23 -0
- package/src/lib/functional/index.js +28 -0
- package/src/lib/logger-service/index.js +32 -0
- package/src/lib/parse-js-parts/index.js +9 -21
- package/src/lib/parse-llm-list/README.md +39 -0
- package/src/lib/parse-llm-list/index.js +54 -0
- package/src/lib/parse-llm-list/index.spec.js +59 -0
- package/src/lib/path-aliases/index.js +1 -3
- package/src/lib/path-aliases/index.spec.js +2 -8
- package/src/lib/pave/index.js +4 -4
- package/src/lib/pave/index.spec.js +6 -3
- package/src/lib/prompt-cache/index.js +14 -10
- package/src/lib/retry/index.js +11 -8
- package/src/lib/ring-buffer/README.md +460 -0
- package/src/lib/ring-buffer/index.js +1074 -0
- package/src/lib/search-best-first/city-walk.spec.js +37 -0
- package/src/lib/search-best-first/index.js +42 -11
- package/src/lib/search-best-first/index.spec.js +35 -0
- package/src/lib/search-js-files/index.js +44 -47
- package/src/lib/search-js-files/scan-file.js +10 -21
- package/src/lib/shorten-text/index.js +2 -7
- package/src/lib/shorten-text/index.spec.js +3 -3
- package/src/lib/strip-response/index.js +2 -7
- package/src/lib/template-replace/index.js +23 -0
- package/src/lib/template-replace/index.spec.js +60 -0
- package/src/lib/to-date/index.js +11 -0
- package/src/lib/to-number/index.js +1 -1
- package/src/lib/transcribe/index.js +26 -9
- package/src/prompts/README.md +3 -1
- package/src/prompts/as-object-with-schema.js +3 -8
- package/src/prompts/as-schema-org-text.js +10 -2
- package/src/prompts/code-features.js +1 -5
- package/src/prompts/constants.js +27 -27
- package/src/prompts/generate-collection.js +1 -1
- package/src/prompts/intent.js +16 -22
- package/src/prompts/select-from-threshold.js +1 -2
- package/src/prompts/sort.js +4 -8
- package/src/prompts/style.js +4 -7
- package/src/prompts/wrap-list.js +1 -4
- package/src/services/llm-model/global-overrides.spec.js +432 -0
- package/src/services/llm-model/index.js +234 -40
- package/src/services/llm-model/model.js +2 -2
- package/src/services/llm-model/negotiate.spec.js +447 -0
- package/src/services/redis/index.js +70 -7
- package/src/test/setup.js +20 -0
- package/src/verblets/README.md +26 -0
- package/src/verblets/auto/index.examples.js +12 -9
- package/src/verblets/auto/index.js +10 -10
- package/src/verblets/auto/index.spec.js +4 -6
- package/src/verblets/bool/README.md +36 -0
- package/src/verblets/bool/index.examples.js +53 -1
- package/src/verblets/bool/index.js +6 -9
- package/src/verblets/bool/index.spec.js +1 -3
- package/src/verblets/central-tendency/README.md +166 -0
- package/src/verblets/central-tendency/central-tendency-result.json +24 -0
- package/src/verblets/central-tendency/index.examples.js +196 -0
- package/src/verblets/central-tendency/index.js +171 -0
- package/src/verblets/central-tendency/index.spec.js +148 -0
- package/src/verblets/enum/index.examples.js +1 -4
- package/src/verblets/enum/index.js +7 -4
- package/src/verblets/expect/README.md +64 -0
- package/src/verblets/expect/index.examples.js +109 -0
- package/src/verblets/expect/index.js +75 -0
- package/src/verblets/expect/index.spec.js +127 -0
- package/src/verblets/intent/index.examples.js +95 -7
- package/src/verblets/intent/index.js +56 -68
- package/src/verblets/intersection/README.md +16 -0
- package/src/verblets/intersection/index.examples.js +89 -0
- package/src/verblets/intersection/index.js +84 -0
- package/src/verblets/intersection/index.spec.js +60 -0
- package/src/verblets/intersection/intersection-result.json +16 -0
- package/src/verblets/list-expand/README.md +10 -0
- package/src/verblets/list-expand/index.examples.js +14 -0
- package/src/verblets/list-expand/index.js +104 -0
- package/src/verblets/list-expand/index.spec.js +18 -0
- package/src/verblets/list-expand/list-expand-result.json +16 -0
- package/src/verblets/list-filter/README.md +22 -0
- package/src/verblets/list-filter/index.examples.js +26 -0
- package/src/verblets/list-filter/index.js +18 -0
- package/src/verblets/list-filter/index.spec.js +19 -0
- package/src/verblets/list-find/README.md +11 -0
- package/src/verblets/list-find/index.examples.js +15 -0
- package/src/verblets/list-find/index.js +17 -0
- package/src/verblets/list-find/index.spec.js +19 -0
- package/src/verblets/list-group/README.md +16 -0
- package/src/verblets/list-group/index.examples.js +16 -0
- package/src/verblets/list-group/index.js +112 -0
- package/src/verblets/list-group/index.spec.js +35 -0
- package/src/verblets/list-group/list-group-result.json +16 -0
- package/src/verblets/list-map/README.md +11 -0
- package/src/verblets/list-map/index.examples.js +15 -0
- package/src/verblets/list-map/index.js +26 -0
- package/src/verblets/list-map/index.spec.js +17 -0
- package/src/verblets/list-reduce/README.md +10 -0
- package/src/verblets/list-reduce/index.examples.js +14 -0
- package/src/verblets/list-reduce/index.js +21 -0
- package/src/verblets/list-reduce/index.spec.js +27 -0
- package/src/verblets/list-reduce/index.spec.jsx +27 -0
- package/src/verblets/name/README.md +15 -0
- package/src/verblets/name/index.examples.js +28 -0
- package/src/verblets/name/index.js +19 -0
- package/src/verblets/name/index.spec.js +33 -0
- package/src/verblets/name-similar-to/README.md +26 -0
- package/src/verblets/name-similar-to/index.examples.js +18 -0
- package/src/verblets/name-similar-to/index.js +20 -0
- package/src/verblets/name-similar-to/index.spec.js +13 -0
- package/src/verblets/number/index.examples.js +173 -7
- package/src/verblets/number/index.js +5 -2
- package/src/verblets/number/index.spec.js +1 -3
- package/src/verblets/number-with-units/index.examples.js +5 -1
- package/src/verblets/number-with-units/index.js +74 -9
- package/src/verblets/number-with-units/number-with-units-result.json +23 -0
- package/src/verblets/schema-org/index.examples.js +2 -7
- package/src/verblets/schema-org/index.js +32 -3
- package/src/verblets/sentiment/README.md +10 -0
- package/src/verblets/sentiment/index.examples.js +20 -0
- package/src/verblets/sentiment/index.js +9 -0
- package/src/verblets/sentiment/index.spec.js +20 -0
- package/src/verblets/to-object/index.js +10 -15
- package/src/verblets/to-object/index.spec.js +1 -4
- package/.eslintrc.json +0 -42
- package/docs/README.md +0 -41
- package/docs/babel.config.js +0 -3
- package/docs/blog/2019-05-28-first-blog-post.md +0 -12
- package/docs/blog/2019-05-29-long-blog-post.md +0 -44
- package/docs/blog/2021-08-01-mdx-blog-post.mdx +0 -20
- package/docs/blog/2021-08-26-welcome/docusaurus-plushie-banner.jpeg +0 -0
- package/docs/blog/2021-08-26-welcome/index.md +0 -25
- package/docs/blog/authors.yml +0 -17
- package/docs/docs/api/bool.md +0 -74
- package/docs/docs/api/search.md +0 -51
- package/docs/docs/intro.md +0 -47
- package/docs/docs/tutorial-basics/_category_.json +0 -8
- package/docs/docs/tutorial-basics/congratulations.md +0 -23
- package/docs/docs/tutorial-basics/create-a-blog-post.md +0 -34
- package/docs/docs/tutorial-basics/create-a-document.md +0 -57
- package/docs/docs/tutorial-basics/create-a-page.md +0 -43
- package/docs/docs/tutorial-basics/deploy-your-site.md +0 -31
- package/docs/docs/tutorial-basics/markdown-features.mdx +0 -152
- package/docs/docs/tutorial-extras/_category_.json +0 -7
- package/docs/docs/tutorial-extras/img/docsVersionDropdown.png +0 -0
- package/docs/docs/tutorial-extras/img/localeDropdown.png +0 -0
- package/docs/docs/tutorial-extras/manage-docs-versions.md +0 -55
- package/docs/docs/tutorial-extras/translate-your-site.md +0 -88
- package/docs/docusaurus.config.js +0 -120
- package/docs/package.json +0 -44
- package/docs/sidebars.js +0 -31
- package/docs/src/components/HomepageFeatures/index.js +0 -61
- package/docs/src/components/HomepageFeatures/styles.module.css +0 -11
- package/docs/src/css/custom.css +0 -30
- package/docs/src/pages/index.js +0 -43
- package/docs/src/pages/index.module.css +0 -23
- package/docs/src/pages/markdown-page.md +0 -7
- package/docs/static/.nojekyll +0 -0
- package/docs/static/img/docusaurus-social-card.jpg +0 -0
- package/docs/static/img/docusaurus.png +0 -0
- package/docs/static/img/favicon.ico +0 -0
- package/docs/static/img/logo.svg +0 -1
- package/docs/static/img/undraw_docusaurus_mountain.svg +0 -171
- package/docs/static/img/undraw_docusaurus_react.svg +0 -170
- package/docs/static/img/undraw_docusaurus_tree.svg +0 -40
- package/src/constants/openai.js +0 -65
- /package/{.vite.config.examples.js → .vitest.config.examples.js} +0 -0
- /package/{.vite.config.js → .vitest.config.js} +0 -0
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import { describe, it } from 'vitest';
|
|
2
|
+
import { expect } from 'chai';
|
|
3
|
+
import { anonymize, anonymizeMethod } from './index.js';
|
|
4
|
+
|
|
5
|
+
const sampleText = `As a seasoned engineer from Silicon Valley, I've found that React's
|
|
6
|
+
component lifecycle is like a well-oiled machine - understanding the mounting
|
|
7
|
+
phase is crucial, especially with those pesky useEffect hooks. Trust me, after
|
|
8
|
+
10 years of experience, proper cleanup is key to avoiding memory leaks!`;
|
|
9
|
+
|
|
10
|
+
describe('anonymize examples', () => {
|
|
11
|
+
it.only('should anonymize text using strict method', { timeout: 60_000 }, async () => {
|
|
12
|
+
const input = {
|
|
13
|
+
text: sampleText,
|
|
14
|
+
method: anonymizeMethod.STRICT,
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
const result = await anonymize(input);
|
|
18
|
+
|
|
19
|
+
expect(result).to.have.property('text');
|
|
20
|
+
expect(result).to.have.property('stages');
|
|
21
|
+
expect(result.stages).to.have.property('distinctiveContentRemoved');
|
|
22
|
+
expect(result.stages).to.have.property('structureNormalized');
|
|
23
|
+
expect(result.stages).to.have.property('patternsSuppressed');
|
|
24
|
+
|
|
25
|
+
// Verify anonymization removed personal markers
|
|
26
|
+
expect(result.text).to.not.include('Silicon Valley');
|
|
27
|
+
expect(result.text).to.not.include('10 years of experience');
|
|
28
|
+
expect(result.text).to.not.include('Trust me');
|
|
29
|
+
|
|
30
|
+
// Verify metaphors and idioms are removed
|
|
31
|
+
expect(result.text).to.not.include('well-oiled machine');
|
|
32
|
+
expect(result.text).to.not.include('pesky');
|
|
33
|
+
|
|
34
|
+
// Verify the text has been transformed
|
|
35
|
+
expect(result.text).to.not.equal(sampleText);
|
|
36
|
+
expect(result.text.length).to.be.lessThan(sampleText.length);
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
it('should preserve more content with balanced method', { timeout: 60_000 }, async () => {
|
|
40
|
+
const input = {
|
|
41
|
+
text: sampleText,
|
|
42
|
+
method: anonymizeMethod.BALANCED,
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
const result = await anonymize(input);
|
|
46
|
+
|
|
47
|
+
// Verify some personal markers are still removed
|
|
48
|
+
expect(result.text).to.not.include('Silicon Valley');
|
|
49
|
+
expect(result.text).to.not.include('Trust me');
|
|
50
|
+
|
|
51
|
+
// But technical content is more preserved
|
|
52
|
+
expect(result.text.length).to.be.greaterThan(
|
|
53
|
+
(await anonymize({ text: sampleText, method: anonymizeMethod.STRICT })).text.length
|
|
54
|
+
);
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it('should minimally transform text with light method', { timeout: 60_000 }, async () => {
|
|
58
|
+
const input = {
|
|
59
|
+
text: sampleText,
|
|
60
|
+
method: anonymizeMethod.LIGHT,
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
const result = await anonymize(input);
|
|
64
|
+
|
|
65
|
+
// Verify minimal transformation
|
|
66
|
+
expect(result.text.length).to.be.greaterThan(
|
|
67
|
+
(await anonymize({ text: sampleText, method: anonymizeMethod.BALANCED })).text.length
|
|
68
|
+
);
|
|
69
|
+
|
|
70
|
+
// Only the most obvious personal markers should be removed
|
|
71
|
+
expect(result.text).to.not.include('Trust me');
|
|
72
|
+
|
|
73
|
+
expect(result.text).to.not.include("I've found");
|
|
74
|
+
});
|
|
75
|
+
});
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import { run } from '../../lib/chatgpt/index.js';
|
|
2
|
+
|
|
3
|
+
export const anonymizeMethod = {
|
|
4
|
+
STRICT: 'strict',
|
|
5
|
+
BALANCED: 'balanced',
|
|
6
|
+
LIGHT: 'light',
|
|
7
|
+
};
|
|
8
|
+
|
|
9
|
+
const METHODS = Object.values(anonymizeMethod);
|
|
10
|
+
|
|
11
|
+
const validateInput = (input) => {
|
|
12
|
+
if (!input || typeof input !== 'object') {
|
|
13
|
+
throw new Error('Input must be an object');
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
const { text, method, context } = input;
|
|
17
|
+
|
|
18
|
+
if (!text || typeof text !== 'string') {
|
|
19
|
+
throw new Error('Input must include a text string');
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
if (!method || !METHODS.includes(method)) {
|
|
23
|
+
throw new Error(`Method must be one of: ${METHODS.join(', ')}`);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
if (context !== undefined && typeof context !== 'string') {
|
|
27
|
+
throw new Error('Context must be a string if provided');
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
return { text, method, context };
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
const stage1Prompt = (text, context) => `
|
|
34
|
+
Remove Distinctive Content and Markers
|
|
35
|
+
- Identify and replace every distinctive or uncommon word, phrase, or sentence structure with the most widely used, nondescript alternative.
|
|
36
|
+
- Remove all idioms, metaphors, analogies, cultural references, personal perspectives, and subjective tones.
|
|
37
|
+
- Eliminate any explicit or implicit references to the author's identity, background, education, expertise, region, or intent.
|
|
38
|
+
|
|
39
|
+
${context ? `Context: ${context}\n` : ''}
|
|
40
|
+
Text to process:
|
|
41
|
+
${text}
|
|
42
|
+
|
|
43
|
+
Return ONLY the processed text, with no explanations or additional content.`;
|
|
44
|
+
|
|
45
|
+
const stage2Prompt = (text, context) => `
|
|
46
|
+
Normalize Structure, Formatting, and Tone
|
|
47
|
+
- Restructure sentences and paragraphs to strictly follow standard, average patterns in length, order, and construction. Avoid any distinctive rhythm, complexity, or flow.
|
|
48
|
+
- Uniformly normalize punctuation, formatting, and paragraphing; avoid any variation or emphasis that could signal style.
|
|
49
|
+
- Strip out all emotional, evaluative, or expressive language, enforcing a neutral, impersonal, and objective tone.
|
|
50
|
+
|
|
51
|
+
${context ? `Context: ${context}\n` : ''}
|
|
52
|
+
Text to process:
|
|
53
|
+
${text}
|
|
54
|
+
|
|
55
|
+
Return ONLY the normalized text, with no explanations or additional content.`;
|
|
56
|
+
|
|
57
|
+
const stage3Prompt = (text, context) => `
|
|
58
|
+
Stage 3: Suppress Latent Stylistic Patterns
|
|
59
|
+
- Review for and suppress any recurring linguistic patterns, syntactic habits, or structural quirks—even if they appear common.
|
|
60
|
+
- For all possible ways to phrase content, always select the plainest, most generic, and least distinctive form.
|
|
61
|
+
- Ensure the final text reads as if generated by an automated system, with no evidence of personality, emotion, region, or any unique authorial traits.
|
|
62
|
+
|
|
63
|
+
${context ? `Context: ${context}\n` : ''}
|
|
64
|
+
Text to process:
|
|
65
|
+
${text}
|
|
66
|
+
|
|
67
|
+
Return ONLY the final anonymized text, with no explanations or additional content.`;
|
|
68
|
+
|
|
69
|
+
const anonymize = async (input, config = {}) => {
|
|
70
|
+
const { text, method, context } = validateInput(input);
|
|
71
|
+
const { llm, ...options } = config;
|
|
72
|
+
|
|
73
|
+
// Stage 1: Remove distinctive content
|
|
74
|
+
const stage1Result = await run(stage1Prompt(text, method, context), {
|
|
75
|
+
modelOptions: { modelName: 'privacy', ...llm },
|
|
76
|
+
...options,
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
if (method === anonymizeMethod.LIGHT) {
|
|
80
|
+
return {
|
|
81
|
+
text: stage1Result,
|
|
82
|
+
stages: {
|
|
83
|
+
distinctiveContentRemoved: stage1Result,
|
|
84
|
+
},
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Stage 2: Normalize structure and tone
|
|
89
|
+
const stage2Result = await run(stage2Prompt(stage1Result, method), {
|
|
90
|
+
modelOptions: { modelName: 'privacy', ...llm },
|
|
91
|
+
...options,
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
if (method === anonymizeMethod.BALANCED) {
|
|
95
|
+
return {
|
|
96
|
+
text: stage2Result,
|
|
97
|
+
stages: {
|
|
98
|
+
distinctiveContentRemoved: stage1Result,
|
|
99
|
+
structureNormalized: stage2Result,
|
|
100
|
+
},
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Stage 3: Suppress stylistic patterns
|
|
105
|
+
const stage3Result = await run(stage3Prompt(stage2Result, method), {
|
|
106
|
+
modelOptions: { modelName: 'privacy', ...llm },
|
|
107
|
+
...options,
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
return {
|
|
111
|
+
text: stage3Result,
|
|
112
|
+
stages: {
|
|
113
|
+
distinctiveContentRemoved: stage1Result,
|
|
114
|
+
structureNormalized: stage2Result,
|
|
115
|
+
patternsSuppressed: stage3Result,
|
|
116
|
+
},
|
|
117
|
+
};
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
export { anonymize };
|
|
121
|
+
export default anonymize;
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import { describe, expect, it, vi } from 'vitest';
|
|
2
|
+
import { anonymize, anonymizeMethod } from './index.js';
|
|
3
|
+
|
|
4
|
+
vi.mock('./index.js', () => {
|
|
5
|
+
return {
|
|
6
|
+
anonymize: vi.fn(async (input) => {
|
|
7
|
+
if (!input || typeof input.text !== 'string' || !input.text.trim()) {
|
|
8
|
+
throw new Error('Text is required');
|
|
9
|
+
}
|
|
10
|
+
if (!input.method) {
|
|
11
|
+
throw new Error('Method is required');
|
|
12
|
+
}
|
|
13
|
+
if (!['STRICT', 'BALANCED', 'LIGHT'].includes(input.method)) {
|
|
14
|
+
throw new Error('Invalid method');
|
|
15
|
+
}
|
|
16
|
+
return {
|
|
17
|
+
text: 'anonymized',
|
|
18
|
+
stages: {
|
|
19
|
+
distinctiveContentRemoved: true,
|
|
20
|
+
structureNormalized: true,
|
|
21
|
+
patternsSuppressed: true,
|
|
22
|
+
},
|
|
23
|
+
};
|
|
24
|
+
}),
|
|
25
|
+
anonymizeMethod: { STRICT: 'STRICT', BALANCED: 'BALANCED', LIGHT: 'LIGHT' },
|
|
26
|
+
};
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
describe('anonymize', () => {
|
|
30
|
+
it('should return an object with text and stages properties', async () => {
|
|
31
|
+
const input = {
|
|
32
|
+
text: 'Test input',
|
|
33
|
+
method: anonymizeMethod.LIGHT,
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
const result = await anonymize(input);
|
|
37
|
+
|
|
38
|
+
expect(result).to.have.property('text');
|
|
39
|
+
expect(result).to.have.property('stages');
|
|
40
|
+
expect(result.stages).to.have.property('distinctiveContentRemoved');
|
|
41
|
+
expect(result.stages).to.have.property('structureNormalized');
|
|
42
|
+
expect(result.stages).to.have.property('patternsSuppressed');
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
it('should throw an error if method is not provided', async () => {
|
|
46
|
+
const input = {
|
|
47
|
+
text: 'Test input',
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
await expect(anonymize(input)).rejects.toThrow('Method is required');
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
it('should throw an error if method is invalid', async () => {
|
|
54
|
+
const input = {
|
|
55
|
+
text: 'Test input',
|
|
56
|
+
method: 'INVALID',
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
await expect(anonymize(input)).rejects.toThrow('Invalid method');
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
it('should throw an error if text is not provided', async () => {
|
|
63
|
+
const input = {
|
|
64
|
+
method: anonymizeMethod.LIGHT,
|
|
65
|
+
};
|
|
66
|
+
|
|
67
|
+
await expect(anonymize(input)).rejects.toThrow('Text is required');
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
it('should throw an error if text is empty', async () => {
|
|
71
|
+
const input = {
|
|
72
|
+
text: '',
|
|
73
|
+
method: anonymizeMethod.LIGHT,
|
|
74
|
+
};
|
|
75
|
+
|
|
76
|
+
await expect(anonymize(input)).rejects.toThrow('Text is required');
|
|
77
|
+
});
|
|
78
|
+
});
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import { longTestTimeout } from '../../constants/common.js';
|
|
3
|
+
import { expect as aiExpect } from '../expect/index.js';
|
|
4
|
+
import bulkCentralTendency from './index.js';
|
|
5
|
+
|
|
6
|
+
describe('Bulk Central Tendency Chain', () => {
|
|
7
|
+
it(
|
|
8
|
+
'processes multiple fruit items with consistent results',
|
|
9
|
+
async () => {
|
|
10
|
+
const items = ['apple', 'orange', 'durian', 'jackfruit', 'banana'];
|
|
11
|
+
const seedItems = ['apple', 'orange', 'banana', 'grape', 'strawberry'];
|
|
12
|
+
|
|
13
|
+
const results = await bulkCentralTendency(items, seedItems, {
|
|
14
|
+
context: 'Common fruits found in grocery stores',
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
expect(results).toHaveLength(5);
|
|
18
|
+
expect(results.every((r) => r && typeof r.score === 'number')).toBe(true);
|
|
19
|
+
expect(results.every((r) => r && r.score >= 0 && r.score <= 1)).toBe(true);
|
|
20
|
+
expect(results.every((r) => r && typeof r.reason === 'string')).toBe(true);
|
|
21
|
+
expect(results.every((r) => r && typeof r.confidence === 'number')).toBe(true);
|
|
22
|
+
|
|
23
|
+
// Use expect-chain for loose verification
|
|
24
|
+
const [isValidCentralityScoring] = await aiExpect(
|
|
25
|
+
results,
|
|
26
|
+
undefined,
|
|
27
|
+
'Do these centrality scores make sense? Common fruits like apple, orange, banana should have higher scores than exotic fruits like durian and jackfruit.'
|
|
28
|
+
);
|
|
29
|
+
expect(isValidCentralityScoring).toBe(true);
|
|
30
|
+
},
|
|
31
|
+
longTestTimeout
|
|
32
|
+
);
|
|
33
|
+
|
|
34
|
+
it(
|
|
35
|
+
'handles tool centrality with core features',
|
|
36
|
+
async () => {
|
|
37
|
+
const items = ['hammer', 'screwdriver', 'wrench', 'pliers', 'chainsaw'];
|
|
38
|
+
const seedItems = ['hammer', 'screwdriver', 'wrench', 'saw', 'drill'];
|
|
39
|
+
|
|
40
|
+
const results = await bulkCentralTendency(items, seedItems, {
|
|
41
|
+
context: 'Hand tools for construction and repair',
|
|
42
|
+
coreFeatures: ['handheld', 'mechanical', 'durable'],
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
expect(results).toHaveLength(5);
|
|
46
|
+
expect(results.every((r) => r && typeof r.score === 'number')).toBe(true);
|
|
47
|
+
expect(results.every((r) => r && r.score >= 0 && r.score <= 1)).toBe(true);
|
|
48
|
+
|
|
49
|
+
// Use expect-chain for loose verification
|
|
50
|
+
const [isValidToolScoring] = await aiExpect(
|
|
51
|
+
results,
|
|
52
|
+
undefined,
|
|
53
|
+
'Do these tool centrality scores make sense? Basic hand tools like hammer, screwdriver, wrench should have high scores, while chainsaw (power tool) should have a lower score.'
|
|
54
|
+
);
|
|
55
|
+
expect(isValidToolScoring).toBe(true);
|
|
56
|
+
},
|
|
57
|
+
longTestTimeout
|
|
58
|
+
);
|
|
59
|
+
|
|
60
|
+
it(
|
|
61
|
+
'demonstrates context effects on centrality',
|
|
62
|
+
async () => {
|
|
63
|
+
const items = ['robin', 'eagle', 'penguin', 'ostrich'];
|
|
64
|
+
const seedItems = ['robin', 'sparrow', 'cardinal', 'blue jay'];
|
|
65
|
+
|
|
66
|
+
const results = await bulkCentralTendency(items, seedItems, {
|
|
67
|
+
context: 'Small songbirds commonly seen in backyards',
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
expect(results).toHaveLength(4);
|
|
71
|
+
expect(results.every((r) => r && typeof r.score === 'number')).toBe(true);
|
|
72
|
+
|
|
73
|
+
// Use expect-chain for loose verification
|
|
74
|
+
const [isValidBirdScoring] = await aiExpect(
|
|
75
|
+
results,
|
|
76
|
+
undefined,
|
|
77
|
+
'Given the context of "small songbirds commonly seen in backyards", does robin have the highest centrality score, while penguin and ostrich have much lower scores?'
|
|
78
|
+
);
|
|
79
|
+
expect(isValidBirdScoring).toBe(true);
|
|
80
|
+
},
|
|
81
|
+
longTestTimeout
|
|
82
|
+
);
|
|
83
|
+
|
|
84
|
+
it(
|
|
85
|
+
'manages retry logic for failed items',
|
|
86
|
+
async () => {
|
|
87
|
+
const items = ['cat', 'dog', 'elephant'];
|
|
88
|
+
const seedItems = ['cat', 'dog', 'rabbit', 'hamster'];
|
|
89
|
+
|
|
90
|
+
const results = await bulkCentralTendency(items, seedItems, {
|
|
91
|
+
maxAttempts: 2,
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
expect(results).toHaveLength(3);
|
|
95
|
+
expect(results.every((r) => r && typeof r.score === 'number')).toBe(true);
|
|
96
|
+
|
|
97
|
+
// Use expect-chain for loose verification
|
|
98
|
+
const [isValidPetScoring] = await aiExpect(
|
|
99
|
+
results,
|
|
100
|
+
undefined,
|
|
101
|
+
'Are these reasonable centrality scores for pets, with cat and dog having higher scores than elephant?'
|
|
102
|
+
);
|
|
103
|
+
expect(isValidPetScoring).toBe(true);
|
|
104
|
+
},
|
|
105
|
+
longTestTimeout
|
|
106
|
+
);
|
|
107
|
+
|
|
108
|
+
it('handles empty input', async () => {
|
|
109
|
+
const results = await bulkCentralTendency([], ['apple', 'orange']);
|
|
110
|
+
expect(results).toEqual([]);
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
it('throws error for invalid seed items', async () => {
|
|
114
|
+
await expect(bulkCentralTendency(['apple'], [])).rejects.toThrow(
|
|
115
|
+
'seedItems must be a non-empty array'
|
|
116
|
+
);
|
|
117
|
+
await expect(bulkCentralTendency(['apple'], null)).rejects.toThrow(
|
|
118
|
+
'seedItems must be a non-empty array'
|
|
119
|
+
);
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
it(
|
|
123
|
+
'processes large batches efficiently',
|
|
124
|
+
async () => {
|
|
125
|
+
const items = Array.from({ length: 15 }, (_, i) => `item${i + 1}`);
|
|
126
|
+
const seedItems = ['item1', 'item2', 'item3', 'item4', 'item5'];
|
|
127
|
+
|
|
128
|
+
const results = await bulkCentralTendency(items, seedItems, {
|
|
129
|
+
chunkSize: 3,
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
expect(results).toHaveLength(15);
|
|
133
|
+
expect(results.every((r) => r && typeof r.score === 'number')).toBe(true);
|
|
134
|
+
expect(results.every((r) => r && r.score >= 0 && r.score <= 1)).toBe(true);
|
|
135
|
+
},
|
|
136
|
+
longTestTimeout
|
|
137
|
+
);
|
|
138
|
+
});
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import { bulkMapRetry } from '../bulk-map/index.js';
|
|
2
|
+
import { CENTRAL_TENDENCY_PROMPT } from '../../verblets/central-tendency/index.js';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Build instructions for bulk central tendency evaluation using the core verblet prompt
|
|
6
|
+
* @param {string[]} seedItems - Array of seed items for comparison
|
|
7
|
+
* @param {Object} config - Configuration options
|
|
8
|
+
* @returns {string} Instructions for the bulk mapper
|
|
9
|
+
*/
|
|
10
|
+
function buildBulkCentralTendencyInstructions(seedItems, { context = '', coreFeatures = [] } = {}) {
|
|
11
|
+
const contextLine = context ? `Context: ${context}` : '';
|
|
12
|
+
const coreFeaturesLine =
|
|
13
|
+
coreFeatures.length > 0 ? `Core Features: ${coreFeatures.join(', ')}` : '';
|
|
14
|
+
const outputRequirementsLine = `OUTPUT FORMAT: Return exactly one compact JSON object per line (no line breaks within the JSON):
|
|
15
|
+
{"score": <number>, "reason": "<brief explanation>", "confidence": <number>}`;
|
|
16
|
+
|
|
17
|
+
// Use the core prompt with all variables replaced
|
|
18
|
+
const corePrompt = CENTRAL_TENDENCY_PROMPT.replace('{context}', contextLine)
|
|
19
|
+
.replace('{coreFeatures}', coreFeaturesLine)
|
|
20
|
+
.replace('{outputRequirements}', outputRequirementsLine);
|
|
21
|
+
|
|
22
|
+
return `For each item, evaluate its centrality among these category members: ${seedItems.join(
|
|
23
|
+
', '
|
|
24
|
+
)}
|
|
25
|
+
|
|
26
|
+
${corePrompt}`;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Process multiple items for central tendency evaluation in bulk with retry support.
|
|
31
|
+
* Uses the existing bulkMapRetry infrastructure for efficiency and reliability.
|
|
32
|
+
*
|
|
33
|
+
* @param {string[]} items - Array of items to evaluate
|
|
34
|
+
* @param {string[]} seedItems - Array of seed items for comparison
|
|
35
|
+
* @param {Object} [config={}] - Configuration options
|
|
36
|
+
* @param {string} [config.context=''] - Context description for evaluation
|
|
37
|
+
* @param {string[]} [config.coreFeatures=[]] - Known core/definitional features
|
|
38
|
+
* @param {string|Object} [config.llm='fastGoodCheap'] - LLM model to use
|
|
39
|
+
* @param {number} [config.chunkSize=5] - Batch size for processing
|
|
40
|
+
* @param {number} [config.maxAttempts=3] - Max retry attempts for failed items
|
|
41
|
+
* @returns {Promise<Array>} Array of central tendency results
|
|
42
|
+
*/
|
|
43
|
+
export default async function bulkCentralTendency(items, seedItems, config = {}) {
|
|
44
|
+
if (!Array.isArray(items)) {
|
|
45
|
+
throw new Error('Items must be an array');
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
if (items.length === 0) {
|
|
49
|
+
return [];
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if (!Array.isArray(seedItems) || seedItems.length === 0) {
|
|
53
|
+
throw new Error('seedItems must be a non-empty array');
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const { chunkSize = 5, maxAttempts = 3, ...otherConfig } = config;
|
|
57
|
+
|
|
58
|
+
// Build instructions for the bulk mapper
|
|
59
|
+
const instructions = buildBulkCentralTendencyInstructions(seedItems, otherConfig);
|
|
60
|
+
|
|
61
|
+
// Use bulkMapRetry to handle all the complexity
|
|
62
|
+
const results = await bulkMapRetry(items, instructions, { chunkSize, maxAttempts });
|
|
63
|
+
|
|
64
|
+
// Parse JSON responses and handle any parsing errors
|
|
65
|
+
return results.map((result, _index) => {
|
|
66
|
+
if (result === undefined) {
|
|
67
|
+
return undefined;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
try {
|
|
71
|
+
const parsed = JSON.parse(result);
|
|
72
|
+
|
|
73
|
+
// Validate the structure
|
|
74
|
+
if (
|
|
75
|
+
parsed &&
|
|
76
|
+
typeof parsed.score === 'number' &&
|
|
77
|
+
typeof parsed.reason === 'string' &&
|
|
78
|
+
typeof parsed.confidence === 'number'
|
|
79
|
+
) {
|
|
80
|
+
return parsed;
|
|
81
|
+
} else {
|
|
82
|
+
return undefined;
|
|
83
|
+
}
|
|
84
|
+
} catch {
|
|
85
|
+
return undefined;
|
|
86
|
+
}
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Export the retry version as well for consistency with other bulk processors
|
|
91
|
+
export const bulkCentralTendencyRetry = bulkCentralTendency;
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# bulk-filter
|
|
2
|
+
|
|
3
|
+
Filter very long lists in manageable chunks using `listFilter`. Failed batches can be retried.
|
|
4
|
+
|
|
5
|
+
```javascript
|
|
6
|
+
import bulkFilter from './index.js';
|
|
7
|
+
|
|
8
|
+
const diary = [
|
|
9
|
+
'Walked the dog and bought milk.',
|
|
10
|
+
'One day I hope to sail across the Atlantic.',
|
|
11
|
+
'Cleaned out the garage.',
|
|
12
|
+
"Maybe I'll start that bakery I keep dreaming about.",
|
|
13
|
+
];
|
|
14
|
+
|
|
15
|
+
const aspirations = await bulkFilter(
|
|
16
|
+
diary,
|
|
17
|
+
'Keep only lines about hopes or big dreams',
|
|
18
|
+
{ chunkSize: 2 }
|
|
19
|
+
);
|
|
20
|
+
// => ['One day I hope to sail across the Atlantic.', "Maybe I'll start that bakery I keep dreaming about."]
|
|
21
|
+
```
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import bulkFilter from './index.js';
|
|
3
|
+
import { longTestTimeout } from '../../constants/common.js';
|
|
4
|
+
|
|
5
|
+
describe('bulk-filter examples', () => {
|
|
6
|
+
it(
|
|
7
|
+
'filters with listFilter',
|
|
8
|
+
async () => {
|
|
9
|
+
const notes = [
|
|
10
|
+
'Saw a dolphin while surfing',
|
|
11
|
+
'Finished laundry',
|
|
12
|
+
'Dream of traveling to Iceland',
|
|
13
|
+
'Paid the electricity bill',
|
|
14
|
+
];
|
|
15
|
+
const dreams = await bulkFilter(notes, 'keep only lines about aspirations or dreams', {
|
|
16
|
+
chunkSize: 2,
|
|
17
|
+
});
|
|
18
|
+
expect(dreams.length).toBeGreaterThan(0);
|
|
19
|
+
},
|
|
20
|
+
longTestTimeout
|
|
21
|
+
);
|
|
22
|
+
});
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import listFilter from '../../verblets/list-filter/index.js';
|
|
2
|
+
|
|
3
|
+
const buildMask = async (list, instructions, chunkSize, config = {}) => {
|
|
4
|
+
const mask = new Array(list.length);
|
|
5
|
+
for (let i = 0; i < list.length; i += chunkSize) {
|
|
6
|
+
const batch = list.slice(i, i + chunkSize);
|
|
7
|
+
try {
|
|
8
|
+
// eslint-disable-next-line no-await-in-loop
|
|
9
|
+
const result = await listFilter(batch, instructions, config);
|
|
10
|
+
const valid = result.every((item) => batch.includes(item));
|
|
11
|
+
if (!valid) {
|
|
12
|
+
for (let j = 0; j < batch.length; j += 1) {
|
|
13
|
+
mask[i + j] = undefined;
|
|
14
|
+
}
|
|
15
|
+
continue;
|
|
16
|
+
}
|
|
17
|
+
for (let j = 0; j < batch.length; j += 1) {
|
|
18
|
+
mask[i + j] = result.includes(batch[j]);
|
|
19
|
+
}
|
|
20
|
+
} catch {
|
|
21
|
+
for (let j = 0; j < batch.length; j += 1) {
|
|
22
|
+
mask[i + j] = undefined;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
return mask;
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
export const bulkFilterRetry = async (list, instructions, config = {}) => {
|
|
30
|
+
const { chunkSize = 10, maxAttempts = 3, llm, ...options } = config;
|
|
31
|
+
const filterConfig = { llm, ...options };
|
|
32
|
+
let mask = await buildMask(list, instructions, chunkSize, filterConfig);
|
|
33
|
+
for (let attempt = 1; attempt < maxAttempts; attempt += 1) {
|
|
34
|
+
const missingIdx = [];
|
|
35
|
+
const missingItems = [];
|
|
36
|
+
mask.forEach((val, idx) => {
|
|
37
|
+
if (val === undefined) {
|
|
38
|
+
missingIdx.push(idx);
|
|
39
|
+
missingItems.push(list[idx]);
|
|
40
|
+
}
|
|
41
|
+
});
|
|
42
|
+
if (missingItems.length === 0) break;
|
|
43
|
+
// eslint-disable-next-line no-await-in-loop
|
|
44
|
+
const retryMask = await buildMask(missingItems, instructions, chunkSize, filterConfig);
|
|
45
|
+
retryMask.forEach((val, i) => {
|
|
46
|
+
if (val !== undefined) {
|
|
47
|
+
mask[missingIdx[i]] = val;
|
|
48
|
+
}
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
return list.filter((_, idx) => mask[idx]);
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
export default async function bulkFilter(list, instructions, config = {}) {
|
|
55
|
+
const { chunkSize = 10, llm, ...options } = config;
|
|
56
|
+
const mask = await buildMask(list, instructions, chunkSize, { llm, ...options });
|
|
57
|
+
return list.filter((_, idx) => mask[idx]);
|
|
58
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import { beforeEach, describe, expect, it, vi } from 'vitest';
|
|
2
|
+
import bulkFilter, { bulkFilterRetry } from './index.js';
|
|
3
|
+
import listFilter from '../../verblets/list-filter/index.js';
|
|
4
|
+
|
|
5
|
+
vi.mock('../../verblets/list-filter/index.js', () => ({
|
|
6
|
+
default: vi.fn(async (items, instructions) => {
|
|
7
|
+
if (items.includes('FAIL')) throw new Error('fail');
|
|
8
|
+
return items.filter((l) => l.includes(instructions));
|
|
9
|
+
}),
|
|
10
|
+
}));
|
|
11
|
+
|
|
12
|
+
beforeEach(() => {
|
|
13
|
+
vi.clearAllMocks();
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
describe('bulk-filter', () => {
|
|
17
|
+
it('filters items in batches', async () => {
|
|
18
|
+
const result = await bulkFilter(['a', 'b', 'c'], 'a', { chunkSize: 2 });
|
|
19
|
+
expect(result).toStrictEqual(['a']);
|
|
20
|
+
expect(listFilter).toHaveBeenCalledTimes(2);
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
it('retries failed batches', async () => {
|
|
24
|
+
let call = 0;
|
|
25
|
+
listFilter.mockImplementation(async (items) => {
|
|
26
|
+
call += 1;
|
|
27
|
+
if (call === 1) throw new Error('fail');
|
|
28
|
+
return items.filter((l) => l.includes('a'));
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
const result = await bulkFilterRetry(['FAIL', 'a', 'b'], 'a', {
|
|
32
|
+
chunkSize: 2,
|
|
33
|
+
maxAttempts: 2,
|
|
34
|
+
});
|
|
35
|
+
expect(result).toStrictEqual(['a']);
|
|
36
|
+
expect(listFilter).toHaveBeenCalledTimes(3);
|
|
37
|
+
});
|
|
38
|
+
});
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# bulk-find
|
|
2
|
+
|
|
3
|
+
Scan long lists in manageable batches to locate the item that best matches your instructions.
|
|
4
|
+
|
|
5
|
+
```javascript
|
|
6
|
+
import bulkFind from './index.js';
|
|
7
|
+
|
|
8
|
+
const emails = [
|
|
9
|
+
'update from accounting',
|
|
10
|
+
'party invitation',
|
|
11
|
+
'weekly newsletter',
|
|
12
|
+
// ... potentially thousands more
|
|
13
|
+
];
|
|
14
|
+
const best = await bulkFind(emails, 'Which email is most urgent?');
|
|
15
|
+
// => 'update from accounting'
|
|
16
|
+
```
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import bulkFind from './index.js';
|
|
3
|
+
import { longTestTimeout } from '../../constants/common.js';
|
|
4
|
+
|
|
5
|
+
describe('bulk-find examples', () => {
|
|
6
|
+
it(
|
|
7
|
+
'finds the best match across batches',
|
|
8
|
+
async () => {
|
|
9
|
+
const titles = [
|
|
10
|
+
'ancient mystery',
|
|
11
|
+
'space odyssey',
|
|
12
|
+
'underwater adventure',
|
|
13
|
+
'future tech thriller',
|
|
14
|
+
];
|
|
15
|
+
const result = await bulkFind(titles, 'Which title feels most futuristic?', { chunkSize: 2 });
|
|
16
|
+
expect(result).toBeDefined();
|
|
17
|
+
},
|
|
18
|
+
longTestTimeout
|
|
19
|
+
);
|
|
20
|
+
});
|