mdi-llmkit 0.1.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/README.md +116 -34
  2. package/dist/src/comparison/compareLists.d.ts +97 -0
  3. package/dist/src/comparison/compareLists.js +375 -0
  4. package/dist/src/comparison/index.d.ts +1 -0
  5. package/dist/src/comparison/index.js +1 -0
  6. package/dist/src/gptApi/functions.d.ts +21 -0
  7. package/dist/src/gptApi/functions.js +154 -0
  8. package/dist/src/gptApi/gptConversation.d.ts +43 -0
  9. package/dist/src/gptApi/gptConversation.js +146 -0
  10. package/dist/src/gptApi/index.d.ts +3 -0
  11. package/dist/src/gptApi/index.js +3 -0
  12. package/dist/src/gptApi/jsonSchemaFormat.d.ts +14 -0
  13. package/dist/src/gptApi/jsonSchemaFormat.js +198 -0
  14. package/dist/src/index.d.ts +3 -0
  15. package/dist/src/index.js +3 -0
  16. package/dist/src/jsonSurgery/jsonSurgery.d.ts +81 -0
  17. package/dist/src/jsonSurgery/jsonSurgery.js +776 -0
  18. package/dist/src/jsonSurgery/placemarkedJSON.d.ts +57 -0
  19. package/dist/src/jsonSurgery/placemarkedJSON.js +151 -0
  20. package/dist/tests/comparison/compareLists.test.d.ts +1 -0
  21. package/dist/tests/comparison/compareLists.test.js +434 -0
  22. package/dist/tests/gptApi/gptConversation.test.d.ts +1 -0
  23. package/dist/tests/gptApi/gptConversation.test.js +157 -0
  24. package/dist/tests/gptApi/gptSubmit.test.d.ts +1 -0
  25. package/dist/tests/gptApi/gptSubmit.test.js +161 -0
  26. package/dist/tests/gptApi/jsonSchemaFormat.test.d.ts +1 -0
  27. package/dist/tests/gptApi/jsonSchemaFormat.test.js +372 -0
  28. package/dist/tests/jsonSurgery/jsonSurgery.test.d.ts +1 -0
  29. package/dist/tests/jsonSurgery/jsonSurgery.test.js +729 -0
  30. package/dist/tests/jsonSurgery/placemarkedJSON.test.d.ts +1 -0
  31. package/dist/tests/jsonSurgery/placemarkedJSON.test.js +209 -0
  32. package/dist/tests/setupEnv.d.ts +1 -0
  33. package/dist/tests/setupEnv.js +4 -0
  34. package/dist/tests/subpathExports.test.d.ts +1 -0
  35. package/dist/tests/subpathExports.test.js +47 -0
  36. package/package.json +18 -5
package/README.md CHANGED
@@ -13,14 +13,14 @@ npm install mdi-llmkit openai
13
13
  ### `gptSubmit`
14
14
 
15
15
  ```ts
16
- import OpenAI from "openai";
17
- import { gptSubmit } from "mdi-llmkit";
16
+ import OpenAI from 'openai';
17
+ import { gptSubmit } from 'mdi-llmkit';
18
18
 
19
19
  const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
20
20
 
21
21
  const reply = await gptSubmit(
22
- [{ role: "user", content: "Say hello." }],
23
- client,
22
+ [{ role: 'user', content: 'Say hello.' }],
23
+ client
24
24
  );
25
25
 
26
26
  console.log(reply);
@@ -29,52 +29,132 @@ console.log(reply);
29
29
  ### `GptConversation`
30
30
 
31
31
  ```ts
32
- import OpenAI from "openai";
33
- import { GptConversation } from "mdi-llmkit";
32
+ import OpenAI from 'openai';
33
+ import { GptConversation } from 'mdi-llmkit';
34
34
 
35
35
  const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
36
36
  const conversation = new GptConversation([], { openaiClient: client });
37
37
 
38
- const reply = await conversation.submitUserMessage("Give me three project name ideas.");
38
+ const reply = await conversation.submitUserMessage(
39
+ 'Give me three project name ideas.'
40
+ );
39
41
  console.log(reply);
40
42
  ```
41
43
 
42
44
  ### `JSONSchemaFormat`
43
45
 
44
46
  ```ts
45
- import { JSONSchemaFormat, JSON_INTEGER, gptSubmit } from "mdi-llmkit";
47
+ import { JSONSchemaFormat, JSON_INTEGER, gptSubmit } from 'mdi-llmkit';
46
48
 
47
49
  const responseFormat = JSONSchemaFormat(
48
- {
49
- answer: "The final answer",
50
- confidence: ["Confidence score", [0, 100], []],
51
- rank: JSON_INTEGER,
52
- },
53
- {
54
- name: "answer_payload",
55
- description: "Structured answer payload",
56
- },
50
+ 'answer_payload',
51
+ {
52
+ answer: 'The final answer',
53
+ confidence: ['Confidence score', [0, 100], []],
54
+ rank: JSON_INTEGER,
55
+ },
56
+ 'Structured answer payload'
57
57
  );
58
58
 
59
59
  const result = await gptSubmit(
60
- [{ role: "user", content: "Return answer as structured JSON." }],
61
- client,
62
- { jsonResponse: responseFormat },
60
+ [{ role: 'user', content: 'Return answer as structured JSON.' }],
61
+ client,
62
+ { jsonResponse: responseFormat }
63
+ );
64
+ ```
65
+
66
+ ## `jsonSurgery`
67
+
68
+ `jsonSurgery` applies iterative, model-guided edits to a JSON-compatible object using
69
+ structured JSON-path operations (`assign`, `append`, `insert`, `delete`, `rename`).
70
+
71
+ ```ts
72
+ import { jsonSurgery } from 'mdi-llmkit/jsonSurgery';
73
+ ```
74
+
75
+ - It deep-copies the input object and returns the modified copy.
76
+ - It supports optional schema guidance and key-skipping for model-visible context.
77
+ - It supports validation/progress callbacks and soft iteration/time limits.
78
+
79
+ ## `compareItemLists` (comparison)
80
+
81
+ `compareItemLists` performs a semantic diff between a "before" list and an "after" list,
82
+ including LLM-assisted rename/add/remove decisions.
83
+
84
+ Types:
85
+
86
+ - `SemanticallyComparableListItem`
87
+ - `string`
88
+ - `{ name: string; description?: string }`
89
+ - `ItemComparisonResult`
90
+ - `Removed | Added | Renamed | Unchanged`
91
+ - `OnComparingItemCallback`
92
+ - `(item, isFromBeforeList, isStarting, result, newName, error, totalProcessedSoFar, totalLeftToProcess) => void`
93
+
94
+ Behavior notes:
95
+
96
+ - Item matching is name-based and case-insensitive.
97
+ - `description` provides extra model context but is not identity.
98
+ - Names are expected to be unique within each list (case-insensitive).
99
+ - Progress callback is fired at item start (`isStarting=true`) and finish (`isStarting=false`).
100
+
101
+ Example:
102
+
103
+ ```ts
104
+ import OpenAI from 'openai';
105
+ import {
106
+ compareItemLists,
107
+ ItemComparisonResult,
108
+ type OnComparingItemCallback,
109
+ } from 'mdi-llmkit/comparison';
110
+
111
+ const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
112
+
113
+ const onComparingItem: OnComparingItemCallback = (
114
+ item,
115
+ isFromBeforeList,
116
+ isStarting,
117
+ result,
118
+ newName,
119
+ error,
120
+ processed,
121
+ left
122
+ ) => {
123
+ if (error) {
124
+ console.warn('Comparison warning:', error);
125
+ }
126
+ if (!isStarting && result === ItemComparisonResult.Renamed) {
127
+ console.log('Renamed:', item, '->', newName);
128
+ }
129
+ console.log({ isFromBeforeList, isStarting, result, processed, left });
130
+ };
131
+
132
+ const comparison = await compareItemLists(
133
+ client,
134
+ [{ name: 'Widget A', description: 'Legacy widget' }, 'Widget B'],
135
+ [
136
+ { name: 'Widget Alpha', description: 'Migrated name for Widget A' },
137
+ 'Widget B',
138
+ ],
139
+ 'Widgets migrated from legacy catalog to new naming standards.',
140
+ onComparingItem
63
141
  );
142
+
143
+ console.log(comparison);
64
144
  ```
65
145
 
66
146
  ## JSON Response Mode
67
147
 
68
148
  ```ts
69
- import OpenAI from "openai";
70
- import { gptSubmit } from "mdi-llmkit";
149
+ import OpenAI from 'openai';
150
+ import { gptSubmit } from 'mdi-llmkit';
71
151
 
72
152
  const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
73
153
 
74
154
  const result = await gptSubmit(
75
- [{ role: "user", content: "Return JSON with keys a and b." }],
76
- client,
77
- { jsonResponse: true },
155
+ [{ role: 'user', content: 'Return JSON with keys a and b.' }],
156
+ client,
157
+ { jsonResponse: true }
78
158
  );
79
159
 
80
160
  console.log(result);
@@ -83,6 +163,8 @@ console.log(result);
83
163
  ## Notes
84
164
 
85
165
  - Current TypeScript parity slices include `gptSubmit`, `GptConversation`, and `JSONSchemaFormat`.
166
+ - You can import GPT API symbols via subpath imports, e.g. `import { GptConversation } from "mdi-llmkit/gptApi"`.
167
+ - Comparison symbols are available via `mdi-llmkit/comparison`.
86
168
  - Integer schemas can be expressed with `JSON_INTEGER`; numeric (float-capable) schemas can use `JSON_NUMBER`.
87
169
 
88
170
  ## Migration from Python
@@ -91,18 +173,18 @@ console.log(result);
91
173
  - Argument style: Python keyword args map to a TypeScript options object.
92
174
  - Conversation submit methods: Python `submit_user_message(...)` maps to `submitUserMessage(...)`.
93
175
  - JSON schema DSL: Python tuple metadata uses TypeScript array metadata.
94
- - Python: `("Age", (0, 120), int)`
95
- - TypeScript: `["Age", [0, 120], JSON_INTEGER]`
176
+ - Python: `("Age", (0, 120), int)`
177
+ - TypeScript: `["Age", [0, 120], JSON_INTEGER]`
96
178
  - JSON schema type markers in TypeScript:
97
- - `JSON_INTEGER` for integer-only values.
98
- - `JSON_NUMBER` for float-capable numeric values.
179
+ - `JSON_INTEGER` for integer-only values.
180
+ - `JSON_NUMBER` for float-capable numeric values.
99
181
 
100
182
  ## CI and Release
101
183
 
102
184
  - CI workflow: `.github/workflows/typescript-ci.yml`
103
- - Runs on push to `main` and on pull requests when TypeScript package files change.
104
- - Executes `npm ci`, `npm test`, and `npm run build` in `packages/typescript-mdi-llmkit`.
185
+ - Runs on push to `main` and on pull requests when TypeScript package files change.
186
+ - Executes `npm ci`, `npm test`, and `npm run build` in `packages/typescript-mdi-llmkit`.
105
187
  - Release workflow: `.github/workflows/typescript-release.yml`
106
- - Runs on tags matching `typescript-v*` (for example: `typescript-v0.1.0`).
107
- - Requires repository secret `NPM_TOKEN` with publish permission to npm.
108
- - Executes tests/build before `npm publish --access public --provenance`.
188
+ - Runs on tags matching `typescript-v*` (for example: `typescript-v1.0.1`).
189
+ - Requires repository secret `NPM_TOKEN` with publish permission to npm.
190
+ - Executes tests/build before `npm publish --access public --provenance`.
@@ -0,0 +1,97 @@
1
+ /**
2
+ * Utilities for semantic comparison of two item lists using deterministic pre-processing
3
+ * plus LLM-assisted decisions for ambiguous cases.
4
+ *
5
+ * High-level flow:
6
+ * 1) Normalize/sort items and remove obvious case-insensitive exact matches.
7
+ * 2) Ask the LLM to classify remaining "before" items as removed or renamed.
8
+ * 3) Ask the LLM whether remaining "after" items should be considered added.
9
+ * 4) Return `removed`, `added`, `renamed`, and `unchanged` buckets.
10
+ *
11
+ * Key assumptions:
12
+ * - Item names are unique within each input list (case-insensitive).
13
+ * - Comparison is name-based (`string` value or object `name` field).
14
+ * - Optional object `description` is context-only and does not affect identity.
15
+ *
16
+ * Progress reporting:
17
+ * - `OnComparingItemCallback` can be provided to receive start/finish events for each item,
18
+ * including source list, result classification, optional rename target, and running counts.
19
+ */
20
+ import { OpenAI } from 'openai';
21
+ /**
22
+ * Item shape accepted by `compareItemLists` for semantic comparison.
23
+ *
24
+ * - A raw string is treated as the item's comparable name.
25
+ * - An object uses `name` as the comparable value and may include optional
26
+ * `description` to provide additional LLM context.
27
+ */
28
+ export type SemanticallyComparableListItem = string | {
29
+ name: string;
30
+ description?: string;
31
+ };
32
+ /**
33
+ * Final classification of an item during comparison.
34
+ */
35
+ export declare enum ItemComparisonResult {
36
+ /** Item existed in "before" and is considered deleted in "after". */
37
+ Removed = "removed",
38
+ /** Item exists in "after" and is considered newly introduced. */
39
+ Added = "added",
40
+ /** Item from "before" was matched to a different name in "after". */
41
+ Renamed = "renamed",
42
+ /** Item is treated as unchanged or unresolved for downstream purposes. */
43
+ Unchanged = "unchanged"
44
+ }
45
+ /**
46
+ * Progress callback for per-item comparison lifecycle.
47
+ *
48
+ * @param item The concrete item currently being evaluated.
49
+ * @param isFromBeforeList `true` when the item comes from `listBefore`, `false` when
50
+ * it comes from `listAfter`.
51
+ * @param isStarting `true` when evaluation for this item begins, `false` when that
52
+ * evaluation completes.
53
+ * @param result Current/final classification for this callback event. For start events,
54
+ * this is a provisional value; for finish events, it is final for that item.
55
+ * @param newName The matched new name when `result` is `Renamed`; otherwise `undefined`.
56
+ * @param error Optional warning/error message for this event; `undefined` when none.
57
+ * @param totalProcessedSoFar Number of items fully processed so far.
58
+ * @param totalLeftToProcess Number of items remaining after this event.
59
+ */
60
+ export type OnComparingItemCallback = (item: SemanticallyComparableListItem, isFromBeforeList: boolean, isStarting: boolean, result: ItemComparisonResult, newName: string | undefined, error: string | undefined, totalProcessedSoFar: number, totalLeftToProcess: number) => void;
61
+ /**
62
+ * Result of comparing two lists of strings.
63
+ */
64
+ export interface StringListComparison {
65
+ removed: string[];
66
+ added: string[];
67
+ renamed: Record<string, string>;
68
+ unchanged: string[];
69
+ }
70
+ /**
71
+ * Compares two lists of strings and identifies differences, including potential renames.
72
+ * The lists presumably use strings. However, in situations where the AI might benefit from
73
+ * additional context, the lists may contain objects with `name` and optional `description`
74
+ * properties; in these situations, it's the `name` property that is compared.
75
+ * The comparison is case insensitive.
76
+ *
77
+ * IMPORTANT: Item names are expected to be unique within each input list (case-insensitive).
78
+ * Duplicate names in either list are not supported and may produce incorrect results.
79
+ * @param before - The list of strings/items before the changes.
80
+ * @param after - The list of strings/items after the changes.
81
+ * @param explanation Optional explanation that provides context for the comparison, e.g.
82
+ * a description of the items or the nature of the changes.
83
+ * @param onComparingItem Optional callback invoked at the start and end of each item
84
+ * evaluation. It receives the current item, whether it is from the "before" list,
85
+ * whether processing is starting (`true`) or finishing (`false`), the
86
+ * current/final classification, renamed target (if applicable), and
87
+ * optional warning/error message, and processed/remaining item counts.
88
+ * `totalProcessedSoFar` increases only when an item
89
+ * finishes; `totalLeftToProcess` is the number of items not yet finished.
90
+ * @returns An object containing removed, added, renamed, and unchanged strings
91
+ */
92
+ export declare const compareItemLists: (openaiClient: OpenAI, listBefore: SemanticallyComparableListItem[], listAfter: SemanticallyComparableListItem[], explanation?: string, onComparingItem?: OnComparingItemCallback) => Promise<{
93
+ removed: string[];
94
+ added: string[];
95
+ renamed: Record<string, string>;
96
+ unchanged: string[];
97
+ }>;
@@ -0,0 +1,375 @@
1
+ import { GptConversation } from '../gptApi/gptConversation.js';
2
+ import { JSONSchemaFormat } from '../gptApi/jsonSchemaFormat.js';
3
+ /**
4
+ * Final classification of an item during comparison.
5
+ */
6
+ export var ItemComparisonResult;
7
+ (function (ItemComparisonResult) {
8
+ /** Item existed in "before" and is considered deleted in "after". */
9
+ ItemComparisonResult["Removed"] = "removed";
10
+ /** Item exists in "after" and is considered newly introduced. */
11
+ ItemComparisonResult["Added"] = "added";
12
+ /** Item from "before" was matched to a different name in "after". */
13
+ ItemComparisonResult["Renamed"] = "renamed";
14
+ /** Item is treated as unchanged or unresolved for downstream purposes. */
15
+ ItemComparisonResult["Unchanged"] = "unchanged";
16
+ })(ItemComparisonResult || (ItemComparisonResult = {}));
17
+ /**
18
+ * Returns the comparable name for a list item.
19
+ */
20
+ const getItemName = (item) => {
21
+ return typeof item === 'string' ? item : item.name;
22
+ };
23
+ /**
24
+ * Ensures a list has no duplicate item names after case-insensitive normalization.
25
+ * Throws an error listing duplicates when the uniqueness precondition is violated.
26
+ */
27
+ const assertUniqueNamesInList = (listToCheck, listName) => {
28
+ const seenNames = new Set();
29
+ const duplicateNames = new Set();
30
+ for (const item of listToCheck) {
31
+ const name = getItemName(item).trim().toLowerCase();
32
+ if (seenNames.has(name)) {
33
+ duplicateNames.add(name);
34
+ }
35
+ else {
36
+ seenNames.add(name);
37
+ }
38
+ }
39
+ if (duplicateNames.size > 0) {
40
+ throw new Error(`compareItemLists: Duplicate item names found in ${listName} list (case-insensitive): ` +
41
+ `${Array.from(duplicateNames)
42
+ .sort()
43
+ .map((name) => JSON.stringify(name))
44
+ .join(', ')}`);
45
+ }
46
+ };
47
+ /**
48
+ * Formats a list item for prompt inclusion, including optional description context.
49
+ */
50
+ const itemToPromptString = (item) => {
51
+ if (typeof item === 'string') {
52
+ return `- ${JSON.stringify(item)}`;
53
+ }
54
+ else {
55
+ let s = `- ${JSON.stringify(item.name)}`;
56
+ if (item.description &&
57
+ item.description.trim().toLowerCase() !== item.name.trim().toLowerCase()) {
58
+ s += ` (details: ${JSON.stringify(item.description)})`;
59
+ }
60
+ return s;
61
+ }
62
+ };
63
+ /**
64
+ * Sort comparator for list items by case-insensitive name.
65
+ */
66
+ const compareItemsByName = (a, b) => {
67
+ const nameA = getItemName(a).toLowerCase();
68
+ const nameB = getItemName(b).toLowerCase();
69
+ return nameA.localeCompare(nameB);
70
+ };
71
+ /**
72
+ * Compares two names case-insensitively while tolerating JSON-escaped variants.
73
+ */
74
+ const areNamesEquivalent = (a, b) => {
75
+ a = a.trim().toLowerCase();
76
+ b = b.trim().toLowerCase();
77
+ if (a === b || a === JSON.stringify(b) || JSON.stringify(a) === b) {
78
+ return true;
79
+ }
80
+ return false;
81
+ };
82
+ /**
83
+ * Removes every item whose name matches the target (case-insensitive, JSON-tolerant).
84
+ */
85
+ const removeItemsByName = (listToModify, itemNameToRemove) => {
86
+ itemNameToRemove = itemNameToRemove.trim().toLowerCase();
87
+ return listToModify.filter((item) => {
88
+ const name = getItemName(item).trim().toLowerCase();
89
+ if (areNamesEquivalent(name, itemNameToRemove)) {
90
+ return false; // Remove this item
91
+ }
92
+ return true; // Keep this item
93
+ });
94
+ };
95
+ /**
96
+ * Compares two lists of strings and identifies differences, including potential renames.
97
+ * The lists presumably use strings. However, in situations where the AI might benefit from
98
+ * additional context, the lists may contain objects with `name` and optional `description`
99
+ * properties; in these situations, it's the `name` property that is compared.
100
+ * The comparison is case insensitive.
101
+ *
102
+ * IMPORTANT: Item names are expected to be unique within each input list (case-insensitive).
103
+ * Duplicate names in either list are not supported and may produce incorrect results.
104
+ * @param before - The list of strings/items before the changes.
105
+ * @param after - The list of strings/items after the changes.
106
+ * @param explanation Optional explanation that provides context for the comparison, e.g.
107
+ * a description of the items or the nature of the changes.
108
+ * @param onComparingItem Optional callback invoked at the start and end of each item
109
+ * evaluation. It receives the current item, whether it is from the "before" list,
110
+ * whether processing is starting (`true`) or finishing (`false`), the
111
+ * current/final classification, renamed target (if applicable), and
112
+ * optional warning/error message, and processed/remaining item counts.
113
+ * `totalProcessedSoFar` increases only when an item
114
+ * finishes; `totalLeftToProcess` is the number of items not yet finished.
115
+ * @returns An object containing removed, added, renamed, and unchanged strings
116
+ */
117
+ export const compareItemLists = async (openaiClient, listBefore, listAfter, explanation, onComparingItem) => {
118
+ // Make sure we don't modify the original lists.
119
+ listBefore = JSON.parse(JSON.stringify(listBefore));
120
+ listAfter = JSON.parse(JSON.stringify(listAfter));
121
+ const retval = {
122
+ removed: [],
123
+ added: [],
124
+ renamed: {},
125
+ unchanged: [],
126
+ };
127
+ assertUniqueNamesInList(listBefore, 'before');
128
+ assertUniqueNamesInList(listAfter, 'after');
129
+ listBefore.sort(compareItemsByName);
130
+ listAfter.sort(compareItemsByName);
131
+ const setStringsBefore = new Set(listBefore.map((item) => getItemName(item)));
132
+ const setStringsAfter = new Set(listAfter.map((item) => getItemName(item)));
133
+ // Determine which strings are common to both lists.
134
+ // We can't just do a simple set intersection, because we want the comparison
135
+ // to be case insensitive. So we have to do it manually.
136
+ // We'll just perform an n^2 comparison since the lists are expected to be small.
137
+ const setStringsCommon = new Set();
138
+ for (const strBefore of setStringsBefore) {
139
+ for (const strAfter of setStringsAfter) {
140
+ if (strBefore.toLowerCase() === strAfter.toLowerCase()) {
141
+ setStringsCommon.add(strBefore);
142
+ break;
143
+ }
144
+ }
145
+ }
146
+ // This already gives us the unchanged items.
147
+ retval.unchanged = Array.from(setStringsCommon).sort();
148
+ // Remove the unchanged items from both lists, leaving only items that might have been
149
+ // removed, added, or renamed.
150
+ // Remember that we can't just do set subtraction because of case insensitivity, and
151
+ // because the original lists may contain objects rather than just strings.
152
+ for (const strCommon of setStringsCommon) {
153
+ listBefore = removeItemsByName(listBefore, strCommon);
154
+ listAfter = removeItemsByName(listAfter, strCommon);
155
+ }
156
+ // Now the two lists contain only items with different names.
157
+ // However, some of these items may be renames rather than pure additions/removals.
158
+ // The only way to tell is with AI.
159
+ const convo = new GptConversation([], { openaiClient });
160
+ convo.addSystemMessage(`
161
+ You are a data analyst who has been hired to try to preserve the integrity of a list of
162
+ data items that have recently undergone migration from one data system to another.
163
+
164
+ You will be given two lists of items: a "before" list and an "after" list.
165
+ (The exact nature of the items is not important. They could be names of products from
166
+ receipts or purchase orders, for example.)
167
+
168
+ In the migration from the old data system to the new, some items may have been removed,
169
+ some items may have been added, and some items may have been renamed. We can't tell
170
+ just by performing string comparisons on the two lists, because the renames may be subtle.
171
+
172
+ We're going to go through the items in the "before" list, one by one. For each one,
173
+ you will look for the best matching item in the "after" list. If you find a good match,
174
+ you will consider that item to be a rename of the original item. If you don't find a
175
+ good match, you will consider that item to have been removed.
176
+ `);
177
+ if (explanation) {
178
+ convo.addSystemMessage(`
179
+ Here is some additional context that may help you make better decisions about which items
180
+ have been renamed versus removed/added:
181
+
182
+ ${explanation}
183
+ `);
184
+ }
185
+ convo.addUserMessage(`
186
+ "BEFORE" LIST:
187
+
188
+ ${listBefore.map(itemToPromptString).join('\n')}
189
+ `);
190
+ // Counts used for onComparingItem telemetry across both loops.
191
+ let totalProcessedItems = 0;
192
+ // First, go through each item in the "before" list, and submit it to the LLM
193
+ // for presentation.
194
+ for (let iItem = 0; iItem < listBefore.length; iItem++) {
195
+ const itemBefore = listBefore[iItem];
196
+ onComparingItem?.(itemBefore, true, true, ItemComparisonResult.Unchanged, undefined, undefined, totalProcessedItems, listBefore.length - iItem + listAfter.length);
197
+ try {
198
+ const convoIter = convo.clone();
199
+ // We rebuild the "after" list each time, since items may get removed from it
200
+ // as they get matched.
201
+ convoIter.addUserMessage(`
202
+ "AFTER" LIST:
203
+
204
+ ${listAfter.map(itemToPromptString).join('\n')}
205
+ `);
206
+ convoIter.addUserMessage(`
207
+ For the moment, let's focus on this item from the "before" list:
208
+
209
+ ${itemToPromptString(itemBefore)}
210
+
211
+ Look through the entire "after" list and try to find an item that might be a rename
212
+ or alternative version of this item.
213
+
214
+ Feel free to think aloud, brainstorm, and reason through the possibilities. Later on,
215
+ I'll ask you to formalize your decision in JSON format; but for now, just explore the options.
216
+
217
+ If you find an item that seems like a good match, tell us what it is.
218
+ !IMPORTANT: You may only pick *one* item from the "after" list as a potential rename of this item.
219
+
220
+ If you don't find any good match, simply say that no good match was found. In this situation,
221
+ we'll consider this item as having been removed/deleted.
222
+
223
+ Naturally, if you have any higher-level instructions or context that apply to this item,
224
+ please take them into account as you reason through the possibilities.
225
+ `);
226
+ await convoIter.submit();
227
+ await convoIter.submit(undefined, undefined, {
228
+ jsonResponse: JSONSchemaFormat('list_comparison_item_rename_exploration', {
229
+ is_renamed: [
230
+ Boolean,
231
+ 'Whether the item from the "before" list has been renamed in the "after" list.',
232
+ ],
233
+ new_name: [
234
+ String,
235
+ 'The new name of the item in the "after" list, if it has been renamed. ' +
236
+ 'This needs to be an *exact character-for-character match* of the name of ' +
237
+ 'exactly *one* item in the "after" list, written *exactly* as it appears ' +
238
+ 'in the "after" list. If the item was not renamed, this should be an empty string.',
239
+ ],
240
+ is_deleted: [
241
+ Boolean,
242
+ 'Whether the item from the "before" list has been deleted/removed in the ' +
243
+ '"after" list. Presumably, if is_renamed is true, this should be false, ' +
244
+ 'and vice versa.',
245
+ ],
246
+ }),
247
+ });
248
+ const isItemDeleted = convoIter.getLastReplyDictField('is_deleted');
249
+ const isItemRenamed = convoIter.getLastReplyDictField('is_renamed');
250
+ if (!isItemDeleted && !isItemRenamed) {
251
+ // Item is unchanged - shouldn't happen since we already filtered those out,
252
+ // but just in case, we handle it.
253
+ const warningMessage = `LLM indicated item is neither renamed nor deleted, which should not happen. ` +
254
+ `Marking as unchanged: ${getItemName(itemBefore)}`;
255
+ retval.unchanged.push(getItemName(itemBefore));
256
+ totalProcessedItems++;
257
+ onComparingItem?.(itemBefore, true, false, ItemComparisonResult.Unchanged, undefined, warningMessage, totalProcessedItems, listBefore.length - (iItem + 1) + listAfter.length);
258
+ continue;
259
+ }
260
+ if (isItemDeleted) {
261
+ // This is the easy case - item was deleted.
262
+ retval.removed.push(getItemName(itemBefore));
263
+ totalProcessedItems++;
264
+ onComparingItem?.(itemBefore, true, false, ItemComparisonResult.Removed, undefined, undefined, totalProcessedItems, listBefore.length - (iItem + 1) + listAfter.length);
265
+ continue;
266
+ }
267
+ if (isItemRenamed) {
268
+ const newNameAccordingToLLM = `${convoIter.getLastReplyDictField('new_name', '')}`.trim();
269
+ if (!newNameAccordingToLLM) {
270
+ // Invalid response - no new name provided.
271
+ // Do not mark the item as removed. Mark it as unchanged.
272
+ const warningMessage = `LLM indicated item was renamed but did not provide a new name. ` +
273
+ `Skipping rename for item: ${getItemName(itemBefore)}`;
274
+ retval.unchanged.push(getItemName(itemBefore));
275
+ totalProcessedItems++;
276
+ onComparingItem?.(itemBefore, true, false, ItemComparisonResult.Unchanged, undefined, warningMessage, totalProcessedItems, listBefore.length - (iItem + 1) + listAfter.length);
277
+ continue;
278
+ }
279
+ // Find the actual item in listAfter that matches this name.
280
+ // We do this because the LLM might return a name that is slightly different
281
+ // from the actual name in the list (e.g. different casing, or with/without
282
+ // quotes, etc.)
283
+ let nameOfMatchedItem = null;
284
+ for (const itemAfter of listAfter) {
285
+ const nameAfter = getItemName(itemAfter);
286
+ if (areNamesEquivalent(nameAfter, newNameAccordingToLLM)) {
287
+ nameOfMatchedItem = nameAfter;
288
+ break;
289
+ }
290
+ }
291
+ if (!nameOfMatchedItem) {
292
+ // Couldn't find a matching item in listAfter.
293
+ // Do not mark the item as removed. Mark it as unchanged.
294
+ const warningMessage = `LLM indicated item was renamed to "${newNameAccordingToLLM}", ` +
295
+ `but no matching item was found in the "after" list. ` +
296
+ `Skipping rename for item: ${getItemName(itemBefore)}`;
297
+ retval.unchanged.push(getItemName(itemBefore));
298
+ totalProcessedItems++;
299
+ onComparingItem?.(itemBefore, true, false, ItemComparisonResult.Unchanged, undefined, warningMessage, totalProcessedItems, listBefore.length - (iItem + 1) + listAfter.length);
300
+ continue;
301
+ }
302
+ // Valid rename.
303
+ retval.renamed[getItemName(itemBefore)] = nameOfMatchedItem;
304
+ // Remove the matched item from listAfter so it can't be matched again.
305
+ listAfter = removeItemsByName(listAfter, nameOfMatchedItem);
306
+ totalProcessedItems++;
307
+ onComparingItem?.(itemBefore, true, false, ItemComparisonResult.Renamed, nameOfMatchedItem, undefined, totalProcessedItems, listBefore.length - (iItem + 1) + listAfter.length);
308
+ }
309
+ }
310
+ catch (error) {
311
+ const warningMessage = `LLM processing failed for "before" item ${JSON.stringify(getItemName(itemBefore))}; marking as unchanged.`;
312
+ retval.unchanged.push(getItemName(itemBefore));
313
+ totalProcessedItems++;
314
+ onComparingItem?.(itemBefore, true, false, ItemComparisonResult.Unchanged, undefined, warningMessage, totalProcessedItems, listBefore.length - (iItem + 1) + listAfter.length);
315
+ continue;
316
+ }
317
+ }
318
+ // At this point, any remaining items in listAfter are probably added.
319
+ // However, there could be additional instructions that indicate otherwise.
320
+ for (let iItem = 0; iItem < listAfter.length; iItem++) {
321
+ const itemAfter = listAfter[iItem];
322
+ onComparingItem?.(itemAfter, false, true, ItemComparisonResult.Unchanged, undefined, undefined, totalProcessedItems, listAfter.length - iItem);
323
+ try {
324
+ const convoIter = convo.clone();
325
+ convoIter.addUserMessage(`
326
+ At the moment, let's focus on this item from the "after" list:
327
+
328
+ ${itemToPromptString(itemAfter)}
329
+
330
+ We think that this item was newly added, because we can't find any matching item
331
+ from the "before" list. However, it's possible that we have instructions or context
332
+ that indicate otherwise.
333
+
334
+ At this point, we don't have the option of matching this item to any item from the "before"
335
+ list, since we've already processed all those items. However, we still have the option
336
+ of rejecting this item from addition -- in which case, it will be considered as not having
337
+ been added at all (or, in other words, it will be ignored in downstream processing).
338
+
339
+ What do you think? Should we consider this item as truly added, or should we reject / ignore
340
+ this item?
341
+ `);
342
+ await convoIter.submit();
343
+ await convoIter.submit(undefined, undefined, {
344
+ jsonResponse: JSONSchemaFormat('list_comparison_item_addition_decision', {
345
+ is_added: [
346
+ Boolean,
347
+ `Whether this item from the "after" list should be considered as truly added. ` +
348
+ `If false, the item will be ignored in downstream processing.`,
349
+ ],
350
+ }),
351
+ });
352
+ const isItemAdded = convoIter.getLastReplyDictField('is_added');
353
+ if (isItemAdded) {
354
+ retval.added.push(getItemName(itemAfter));
355
+ totalProcessedItems++;
356
+ onComparingItem?.(itemAfter, false, false, ItemComparisonResult.Added, undefined, undefined, totalProcessedItems, listAfter.length - (iItem + 1));
357
+ continue;
358
+ }
359
+ totalProcessedItems++;
360
+ onComparingItem?.(itemAfter, false, false, ItemComparisonResult.Unchanged, undefined, undefined, totalProcessedItems, listAfter.length - (iItem + 1));
361
+ }
362
+ catch (error) {
363
+ const warningMessage = `LLM processing failed for "after" item ${JSON.stringify(getItemName(itemAfter))}; skipping add classification for this item.`;
364
+ totalProcessedItems++;
365
+ onComparingItem?.(itemAfter, false, false, ItemComparisonResult.Unchanged, undefined, warningMessage, totalProcessedItems, listAfter.length - (iItem + 1));
366
+ continue;
367
+ }
368
+ }
369
+ return {
370
+ removed: [...new Set(retval.removed)].sort(),
371
+ added: [...new Set(retval.added)].sort(),
372
+ renamed: retval.renamed,
373
+ unchanged: [...new Set(retval.unchanged)].sort(),
374
+ };
375
+ };