mdi-llmkit 1.1.1 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/semanticMatch/compareLists.d.ts +27 -76
- package/dist/src/semanticMatch/compareLists.js +68 -356
- package/dist/src/semanticMatch/find.d.ts +38 -0
- package/dist/src/semanticMatch/find.js +134 -0
- package/dist/src/semanticMatch/index.d.ts +1 -0
- package/dist/src/semanticMatch/index.js +1 -0
- package/dist/src/semanticMatch/semanticItem.d.ts +78 -0
- package/dist/src/semanticMatch/semanticItem.js +110 -0
- package/dist/tests/semanticMatch/compareLists.test.js +130 -387
- package/dist/tests/semanticMatch/find.test.d.ts +1 -0
- package/dist/tests/semanticMatch/find.test.js +234 -0
- package/dist/tests/semanticMatch/semanticItem.test.d.ts +1 -0
- package/dist/tests/semanticMatch/semanticItem.test.js +159 -0
- package/dist/tests/subpathExports.test.js +4 -0
- package/package.json +1 -1
|
@@ -1,38 +1,22 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
3
|
-
* plus LLM-assisted decisions for ambiguous cases.
|
|
2
|
+
* Semantic comparison for before/after item lists.
|
|
4
3
|
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
4
|
+
* This module compares two lists that represent the same domain at different points
|
|
5
|
+
* in time (for example, before and after a migration) and classifies items as:
|
|
6
|
+
* - unchanged,
|
|
7
|
+
* - renamed,
|
|
8
|
+
* - removed, or
|
|
9
|
+
* - added.
|
|
10
10
|
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
* - Comparison is name-based (`string` value or object `name` field).
|
|
14
|
-
* - Optional object `description` is context-only and does not affect identity.
|
|
15
|
-
*
|
|
16
|
-
* Progress reporting:
|
|
17
|
-
* - `OnComparingItemCallback` can be provided to receive start/finish events for each item,
|
|
18
|
-
* including source list, result classification, optional rename target, and running counts.
|
|
11
|
+
* It is designed for cases where exact string comparison is not sufficient because
|
|
12
|
+
* names may change while meaning stays the same.
|
|
19
13
|
*/
|
|
20
14
|
import { OpenAI } from 'openai';
|
|
21
|
-
|
|
22
|
-
* Item shape accepted by `compareItemLists` for semantic comparison.
|
|
23
|
-
*
|
|
24
|
-
* - A raw string is treated as the item's comparable name.
|
|
25
|
-
* - An object uses `name` as the comparable value and may include optional
|
|
26
|
-
* `description` to provide additional LLM context.
|
|
27
|
-
*/
|
|
28
|
-
export type SemanticallyComparableListItem = string | {
|
|
29
|
-
name: string;
|
|
30
|
-
description?: string;
|
|
31
|
-
};
|
|
15
|
+
import { SemanticItem } from './semanticItem.js';
|
|
32
16
|
/**
|
|
33
17
|
* Final classification of an item during comparison.
|
|
34
18
|
*/
|
|
35
|
-
export declare enum
|
|
19
|
+
export declare enum ItemComparisonClassification {
|
|
36
20
|
/** Item existed in "before" and is considered deleted in "after". */
|
|
37
21
|
Removed = "removed",
|
|
38
22
|
/** Item exists in "after" and is considered newly introduced. */
|
|
@@ -42,56 +26,23 @@ export declare enum ItemComparisonResult {
|
|
|
42
26
|
/** Item is treated as unchanged or unresolved for downstream purposes. */
|
|
43
27
|
Unchanged = "unchanged"
|
|
44
28
|
}
|
|
29
|
+
export declare const ItemComparisonResult: typeof ItemComparisonClassification;
|
|
30
|
+
export type ItemComparisonResult = {
|
|
31
|
+
item: SemanticItem;
|
|
32
|
+
classification: ItemComparisonClassification;
|
|
33
|
+
newName: string | undefined;
|
|
34
|
+
};
|
|
45
35
|
/**
|
|
46
|
-
*
|
|
47
|
-
*
|
|
48
|
-
*
|
|
49
|
-
*
|
|
50
|
-
*
|
|
51
|
-
* @param
|
|
52
|
-
* evaluation completes.
|
|
53
|
-
* @param result Current/final classification for this callback event. For start events,
|
|
54
|
-
* this is a provisional value; for finish events, it is final for that item.
|
|
55
|
-
* @param newName The matched new name when `result` is `Renamed`; otherwise `undefined`.
|
|
56
|
-
* @param error Optional warning/error message for this event; `undefined` when none.
|
|
57
|
-
* @param totalProcessedSoFar Number of items fully processed so far.
|
|
58
|
-
* @param totalLeftToProcess Number of items remaining after this event.
|
|
59
|
-
*/
|
|
60
|
-
export type OnComparingItemCallback = (item: SemanticallyComparableListItem, isFromBeforeList: boolean, isStarting: boolean, result: ItemComparisonResult, newName: string | undefined, error: string | undefined, totalProcessedSoFar: number, totalLeftToProcess: number) => void;
|
|
61
|
-
/**
|
|
62
|
-
* Result of comparing two lists of strings.
|
|
63
|
-
*/
|
|
64
|
-
export interface StringListComparison {
|
|
65
|
-
removed: string[];
|
|
66
|
-
added: string[];
|
|
67
|
-
renamed: Record<string, string>;
|
|
68
|
-
unchanged: string[];
|
|
69
|
-
}
|
|
70
|
-
/**
|
|
71
|
-
* Compares two lists of strings and identifies differences, including potential renames.
|
|
72
|
-
* The lists presumably use strings. However, in situations where the AI might benefit from
|
|
73
|
-
* additional context, the lists may contain objects with `name` and optional `description`
|
|
74
|
-
* properties; in these situations, it's the `name` property that is compared.
|
|
75
|
-
* The comparison is case insensitive.
|
|
76
|
-
*
|
|
77
|
-
* IMPORTANT: Item names are expected to be unique within each input list (case-insensitive).
|
|
78
|
-
* Duplicate names in either list are not supported and may produce incorrect results.
|
|
79
|
-
* @param before - The list of strings/items before the changes.
|
|
80
|
-
* @param after - The list of strings/items after the changes.
|
|
36
|
+
* Compares two lists of items and classifies each item from the "before" list as removed,
|
|
37
|
+
* renamed, or unchanged based on whether it has a semantic match in the "after" list.
|
|
38
|
+
* Any items in the "after" list that don't match to an item in the "before" list are
|
|
39
|
+
* classified as added.
|
|
40
|
+
* @param before - The list of items before the changes.
|
|
41
|
+
* @param after - The list of items after the changes.
|
|
81
42
|
* @param explanation Optional explanation that provides context for the comparison, e.g.
|
|
82
43
|
* a description of the items or the nature of the changes.
|
|
83
|
-
* @
|
|
84
|
-
*
|
|
85
|
-
*
|
|
86
|
-
* current/final classification, renamed target (if applicable), and
|
|
87
|
-
* optional warning/error message, and processed/remaining item counts.
|
|
88
|
-
* `totalProcessedSoFar` increases only when an item
|
|
89
|
-
* finishes; `totalLeftToProcess` is the number of items not yet finished.
|
|
90
|
-
* @returns An object containing removed, added, renamed, and unchanged strings
|
|
44
|
+
* @returns An array of item comparison results. This includes all items from the "before"
|
|
45
|
+
* list with their classification (removed/renamed/unchanged), and any unmatched items from
|
|
46
|
+
* the "after" list classified as added.
|
|
91
47
|
*/
|
|
92
|
-
export declare const compareItemLists: (openaiClient: OpenAI, listBefore:
|
|
93
|
-
removed: string[];
|
|
94
|
-
added: string[];
|
|
95
|
-
renamed: Record<string, string>;
|
|
96
|
-
unchanged: string[];
|
|
97
|
-
}>;
|
|
48
|
+
export declare const compareItemLists: (openaiClient: OpenAI, listBefore: SemanticItem[], listAfter: SemanticItem[], explanation?: string) => Promise<ItemComparisonResult[]>;
|
|
@@ -1,375 +1,87 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Semantic comparison for before/after item lists.
|
|
3
|
+
*
|
|
4
|
+
* This module compares two lists that represent the same domain at different points
|
|
5
|
+
* in time (for example, before and after a migration) and classifies items as:
|
|
6
|
+
* - unchanged,
|
|
7
|
+
* - renamed,
|
|
8
|
+
* - removed, or
|
|
9
|
+
* - added.
|
|
10
|
+
*
|
|
11
|
+
* It is designed for cases where exact string comparison is not sufficient because
|
|
12
|
+
* names may change while meaning stays the same.
|
|
13
|
+
*/
|
|
14
|
+
import { areItemsEqual, getItemName } from './semanticItem.js';
|
|
15
|
+
import { findSemanticMatch } from './find.js';
|
|
3
16
|
/**
|
|
4
17
|
* Final classification of an item during comparison.
|
|
5
18
|
*/
|
|
6
|
-
export var
|
|
7
|
-
(function (
|
|
19
|
+
export var ItemComparisonClassification;
|
|
20
|
+
(function (ItemComparisonClassification) {
|
|
8
21
|
/** Item existed in "before" and is considered deleted in "after". */
|
|
9
|
-
|
|
22
|
+
ItemComparisonClassification["Removed"] = "removed";
|
|
10
23
|
/** Item exists in "after" and is considered newly introduced. */
|
|
11
|
-
|
|
24
|
+
ItemComparisonClassification["Added"] = "added";
|
|
12
25
|
/** Item from "before" was matched to a different name in "after". */
|
|
13
|
-
|
|
26
|
+
ItemComparisonClassification["Renamed"] = "renamed";
|
|
14
27
|
/** Item is treated as unchanged or unresolved for downstream purposes. */
|
|
15
|
-
|
|
16
|
-
})(
|
|
17
|
-
|
|
18
|
-
* Returns the comparable name for a list item.
|
|
19
|
-
*/
|
|
20
|
-
const getItemName = (item) => {
|
|
21
|
-
return typeof item === 'string' ? item : item.name;
|
|
22
|
-
};
|
|
23
|
-
/**
|
|
24
|
-
* Ensures a list has no duplicate item names after case-insensitive normalization.
|
|
25
|
-
* Throws an error listing duplicates when the uniqueness precondition is violated.
|
|
26
|
-
*/
|
|
27
|
-
const assertUniqueNamesInList = (listToCheck, listName) => {
|
|
28
|
-
const seenNames = new Set();
|
|
29
|
-
const duplicateNames = new Set();
|
|
30
|
-
for (const item of listToCheck) {
|
|
31
|
-
const name = getItemName(item).trim().toLowerCase();
|
|
32
|
-
if (seenNames.has(name)) {
|
|
33
|
-
duplicateNames.add(name);
|
|
34
|
-
}
|
|
35
|
-
else {
|
|
36
|
-
seenNames.add(name);
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
if (duplicateNames.size > 0) {
|
|
40
|
-
throw new Error(`compareItemLists: Duplicate item names found in ${listName} list (case-insensitive): ` +
|
|
41
|
-
`${Array.from(duplicateNames)
|
|
42
|
-
.sort()
|
|
43
|
-
.map((name) => JSON.stringify(name))
|
|
44
|
-
.join(', ')}`);
|
|
45
|
-
}
|
|
46
|
-
};
|
|
28
|
+
ItemComparisonClassification["Unchanged"] = "unchanged";
|
|
29
|
+
})(ItemComparisonClassification || (ItemComparisonClassification = {}));
|
|
30
|
+
export const ItemComparisonResult = ItemComparisonClassification;
|
|
47
31
|
/**
|
|
48
|
-
*
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
else {
|
|
55
|
-
let s = `- ${JSON.stringify(item.name)}`;
|
|
56
|
-
if (item.description &&
|
|
57
|
-
item.description.trim().toLowerCase() !== item.name.trim().toLowerCase()) {
|
|
58
|
-
s += ` (details: ${JSON.stringify(item.description)})`;
|
|
59
|
-
}
|
|
60
|
-
return s;
|
|
61
|
-
}
|
|
62
|
-
};
|
|
63
|
-
/**
|
|
64
|
-
* Sort comparator for list items by case-insensitive name.
|
|
65
|
-
*/
|
|
66
|
-
const compareItemsByName = (a, b) => {
|
|
67
|
-
const nameA = getItemName(a).toLowerCase();
|
|
68
|
-
const nameB = getItemName(b).toLowerCase();
|
|
69
|
-
return nameA.localeCompare(nameB);
|
|
70
|
-
};
|
|
71
|
-
/**
|
|
72
|
-
* Compares two names case-insensitively while tolerating JSON-escaped variants.
|
|
73
|
-
*/
|
|
74
|
-
const areNamesEquivalent = (a, b) => {
|
|
75
|
-
a = a.trim().toLowerCase();
|
|
76
|
-
b = b.trim().toLowerCase();
|
|
77
|
-
if (a === b || a === JSON.stringify(b) || JSON.stringify(a) === b) {
|
|
78
|
-
return true;
|
|
79
|
-
}
|
|
80
|
-
return false;
|
|
81
|
-
};
|
|
82
|
-
/**
|
|
83
|
-
* Removes every item whose name matches the target (case-insensitive, JSON-tolerant).
|
|
84
|
-
*/
|
|
85
|
-
const removeItemsByName = (listToModify, itemNameToRemove) => {
|
|
86
|
-
itemNameToRemove = itemNameToRemove.trim().toLowerCase();
|
|
87
|
-
return listToModify.filter((item) => {
|
|
88
|
-
const name = getItemName(item).trim().toLowerCase();
|
|
89
|
-
if (areNamesEquivalent(name, itemNameToRemove)) {
|
|
90
|
-
return false; // Remove this item
|
|
91
|
-
}
|
|
92
|
-
return true; // Keep this item
|
|
93
|
-
});
|
|
94
|
-
};
|
|
95
|
-
/**
|
|
96
|
-
* Compares two lists of strings and identifies differences, including potential renames.
|
|
97
|
-
* The lists presumably use strings. However, in situations where the AI might benefit from
|
|
98
|
-
* additional context, the lists may contain objects with `name` and optional `description`
|
|
99
|
-
* properties; in these situations, it's the `name` property that is compared.
|
|
100
|
-
* The comparison is case insensitive.
|
|
101
|
-
*
|
|
102
|
-
* IMPORTANT: Item names are expected to be unique within each input list (case-insensitive).
|
|
103
|
-
* Duplicate names in either list are not supported and may produce incorrect results.
|
|
104
|
-
* @param before - The list of strings/items before the changes.
|
|
105
|
-
* @param after - The list of strings/items after the changes.
|
|
32
|
+
* Compares two lists of items and classifies each item from the "before" list as removed,
|
|
33
|
+
* renamed, or unchanged based on whether it has a semantic match in the "after" list.
|
|
34
|
+
* Any items in the "after" list that don't match to an item in the "before" list are
|
|
35
|
+
* classified as added.
|
|
36
|
+
* @param before - The list of items before the changes.
|
|
37
|
+
* @param after - The list of items after the changes.
|
|
106
38
|
* @param explanation Optional explanation that provides context for the comparison, e.g.
|
|
107
39
|
* a description of the items or the nature of the changes.
|
|
108
|
-
* @
|
|
109
|
-
*
|
|
110
|
-
*
|
|
111
|
-
* current/final classification, renamed target (if applicable), and
|
|
112
|
-
* optional warning/error message, and processed/remaining item counts.
|
|
113
|
-
* `totalProcessedSoFar` increases only when an item
|
|
114
|
-
* finishes; `totalLeftToProcess` is the number of items not yet finished.
|
|
115
|
-
* @returns An object containing removed, added, renamed, and unchanged strings
|
|
40
|
+
* @returns An array of item comparison results. This includes all items from the "before"
|
|
41
|
+
* list with their classification (removed/renamed/unchanged), and any unmatched items from
|
|
42
|
+
* the "after" list classified as added.
|
|
116
43
|
*/
|
|
117
|
-
export const compareItemLists = async (openaiClient, listBefore, listAfter, explanation
|
|
118
|
-
//
|
|
119
|
-
|
|
120
|
-
listAfter =
|
|
121
|
-
const retval =
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
listAfter.sort(compareItemsByName);
|
|
131
|
-
const setStringsBefore = new Set(listBefore.map((item) => getItemName(item)));
|
|
132
|
-
const setStringsAfter = new Set(listAfter.map((item) => getItemName(item)));
|
|
133
|
-
// Determine which strings are common to both lists.
|
|
134
|
-
// We can't just do a simple set intersection, because we want the comparison
|
|
135
|
-
// to be case insensitive. So we have to do it manually.
|
|
136
|
-
// We'll just perform an n^2 comparison since the lists are expected to be small.
|
|
137
|
-
const setStringsCommon = new Set();
|
|
138
|
-
for (const strBefore of setStringsBefore) {
|
|
139
|
-
for (const strAfter of setStringsAfter) {
|
|
140
|
-
if (strBefore.toLowerCase() === strAfter.toLowerCase()) {
|
|
141
|
-
setStringsCommon.add(strBefore);
|
|
142
|
-
break;
|
|
143
|
-
}
|
|
144
|
-
}
|
|
145
|
-
}
|
|
146
|
-
// This already gives us the unchanged items.
|
|
147
|
-
retval.unchanged = Array.from(setStringsCommon).sort();
|
|
148
|
-
// Remove the unchanged items from both lists, leaving only items that might have been
|
|
149
|
-
// removed, added, or renamed.
|
|
150
|
-
// Remember that we can't just do set subtraction because of case insensitivity, and
|
|
151
|
-
// because the original lists may contain objects rather than just strings.
|
|
152
|
-
for (const strCommon of setStringsCommon) {
|
|
153
|
-
listBefore = removeItemsByName(listBefore, strCommon);
|
|
154
|
-
listAfter = removeItemsByName(listAfter, strCommon);
|
|
155
|
-
}
|
|
156
|
-
// Now the two lists contain only items with different names.
|
|
157
|
-
// However, some of these items may be renames rather than pure additions/removals.
|
|
158
|
-
// The only way to tell is with AI.
|
|
159
|
-
const convo = new GptConversation([], { openaiClient });
|
|
160
|
-
convo.addSystemMessage(`
|
|
161
|
-
You are a data analyst who has been hired to try to preserve the integrity of a list of
|
|
162
|
-
data items that have recently undergone migration from one data system to another.
|
|
163
|
-
|
|
164
|
-
You will be given two lists of items: a "before" list and an "after" list.
|
|
165
|
-
(The exact nature of the items is not important. They could be names of products from
|
|
166
|
-
receipts or purchase orders, for example.)
|
|
167
|
-
|
|
168
|
-
In the migration from the old data system to the new, some items may have been removed,
|
|
169
|
-
some items may have been added, and some items may have been renamed. We can't tell
|
|
170
|
-
just by performing string comparisons on the two lists, because the renames may be subtle.
|
|
171
|
-
|
|
172
|
-
We're going to go through the items in the "before" list, one by one. For each one,
|
|
173
|
-
you will look for the best matching item in the "after" list. If you find a good match,
|
|
174
|
-
you will consider that item to be a rename of the original item. If you don't find a
|
|
175
|
-
good match, you will consider that item to have been removed.
|
|
176
|
-
`);
|
|
177
|
-
if (explanation) {
|
|
178
|
-
convo.addSystemMessage(`
|
|
179
|
-
Here is some additional context that may help you make better decisions about which items
|
|
180
|
-
have been renamed versus removed/added:
|
|
181
|
-
|
|
182
|
-
${explanation}
|
|
183
|
-
`);
|
|
184
|
-
}
|
|
185
|
-
convo.addUserMessage(`
|
|
186
|
-
"BEFORE" LIST:
|
|
187
|
-
|
|
188
|
-
${listBefore.map(itemToPromptString).join('\n')}
|
|
189
|
-
`);
|
|
190
|
-
// Counts used for onComparingItem telemetry across both loops.
|
|
191
|
-
let totalProcessedItems = 0;
|
|
192
|
-
// First, go through each item in the "before" list, and submit it to the LLM
|
|
193
|
-
// for presentation.
|
|
194
|
-
for (let iItem = 0; iItem < listBefore.length; iItem++) {
|
|
195
|
-
const itemBefore = listBefore[iItem];
|
|
196
|
-
onComparingItem?.(itemBefore, true, true, ItemComparisonResult.Unchanged, undefined, undefined, totalProcessedItems, listBefore.length - iItem + listAfter.length);
|
|
197
|
-
try {
|
|
198
|
-
const convoIter = convo.clone();
|
|
199
|
-
// We rebuild the "after" list each time, since items may get removed from it
|
|
200
|
-
// as they get matched.
|
|
201
|
-
convoIter.addUserMessage(`
|
|
202
|
-
"AFTER" LIST:
|
|
203
|
-
|
|
204
|
-
${listAfter.map(itemToPromptString).join('\n')}
|
|
205
|
-
`);
|
|
206
|
-
convoIter.addUserMessage(`
|
|
207
|
-
For the moment, let's focus on this item from the "before" list:
|
|
208
|
-
|
|
209
|
-
${itemToPromptString(itemBefore)}
|
|
210
|
-
|
|
211
|
-
Look through the entire "after" list and try to find an item that might be a rename
|
|
212
|
-
or alternative version of this item.
|
|
213
|
-
|
|
214
|
-
Feel free to think aloud, brainstorm, and reason through the possibilities. Later on,
|
|
215
|
-
I'll ask you to formalize your decision in JSON format; but for now, just explore the options.
|
|
216
|
-
|
|
217
|
-
If you find an item that seems like a good match, tell us what it is.
|
|
218
|
-
!IMPORTANT: You may only pick *one* item from the "after" list as a potential rename of this item.
|
|
219
|
-
|
|
220
|
-
If you don't find any good match, simply say that no good match was found. In this situation,
|
|
221
|
-
we'll consider this item as having been removed/deleted.
|
|
222
|
-
|
|
223
|
-
Naturally, if you have any higher-level instructions or context that apply to this item,
|
|
224
|
-
please take them into account as you reason through the possibilities.
|
|
225
|
-
`);
|
|
226
|
-
await convoIter.submit();
|
|
227
|
-
await convoIter.submit(undefined, undefined, {
|
|
228
|
-
jsonResponse: JSONSchemaFormat('list_comparison_item_rename_exploration', {
|
|
229
|
-
is_renamed: [
|
|
230
|
-
Boolean,
|
|
231
|
-
'Whether the item from the "before" list has been renamed in the "after" list.',
|
|
232
|
-
],
|
|
233
|
-
new_name: [
|
|
234
|
-
String,
|
|
235
|
-
'The new name of the item in the "after" list, if it has been renamed. ' +
|
|
236
|
-
'This needs to be an *exact character-for-character match* of the name of ' +
|
|
237
|
-
'exactly *one* item in the "after" list, written *exactly* as it appears ' +
|
|
238
|
-
'in the "after" list. If the item was not renamed, this should be an empty string.',
|
|
239
|
-
],
|
|
240
|
-
is_deleted: [
|
|
241
|
-
Boolean,
|
|
242
|
-
'Whether the item from the "before" list has been deleted/removed in the ' +
|
|
243
|
-
'"after" list. Presumably, if is_renamed is true, this should be false, ' +
|
|
244
|
-
'and vice versa.',
|
|
245
|
-
],
|
|
246
|
-
}),
|
|
44
|
+
export const compareItemLists = async (openaiClient, listBefore, listAfter, explanation) => {
|
|
45
|
+
// We're going to be removing items from the "after" list as we match them,
|
|
46
|
+
// so we make a copy of it to avoid mutating the original array.
|
|
47
|
+
listAfter = [...listAfter];
|
|
48
|
+
const retval = [];
|
|
49
|
+
for (const itemBefore of listBefore) {
|
|
50
|
+
const indexMatchedInAfter = await findSemanticMatch(openaiClient, listAfter, itemBefore, explanation);
|
|
51
|
+
if (indexMatchedInAfter === -1) {
|
|
52
|
+
// No good match found in "after" list, so this item is probably removed.
|
|
53
|
+
retval.push({
|
|
54
|
+
item: itemBefore,
|
|
55
|
+
classification: ItemComparisonClassification.Removed,
|
|
56
|
+
newName: undefined,
|
|
247
57
|
});
|
|
248
|
-
const isItemDeleted = convoIter.getLastReplyDictField('is_deleted');
|
|
249
|
-
const isItemRenamed = convoIter.getLastReplyDictField('is_renamed');
|
|
250
|
-
if (!isItemDeleted && !isItemRenamed) {
|
|
251
|
-
// Item is unchanged - shouldn't happen since we already filtered those out,
|
|
252
|
-
// but just in case, we handle it.
|
|
253
|
-
const warningMessage = `LLM indicated item is neither renamed nor deleted, which should not happen. ` +
|
|
254
|
-
`Marking as unchanged: ${getItemName(itemBefore)}`;
|
|
255
|
-
retval.unchanged.push(getItemName(itemBefore));
|
|
256
|
-
totalProcessedItems++;
|
|
257
|
-
onComparingItem?.(itemBefore, true, false, ItemComparisonResult.Unchanged, undefined, warningMessage, totalProcessedItems, listBefore.length - (iItem + 1) + listAfter.length);
|
|
258
|
-
continue;
|
|
259
|
-
}
|
|
260
|
-
if (isItemDeleted) {
|
|
261
|
-
// This is the easy case - item was deleted.
|
|
262
|
-
retval.removed.push(getItemName(itemBefore));
|
|
263
|
-
totalProcessedItems++;
|
|
264
|
-
onComparingItem?.(itemBefore, true, false, ItemComparisonResult.Removed, undefined, undefined, totalProcessedItems, listBefore.length - (iItem + 1) + listAfter.length);
|
|
265
|
-
continue;
|
|
266
|
-
}
|
|
267
|
-
if (isItemRenamed) {
|
|
268
|
-
const newNameAccordingToLLM = `${convoIter.getLastReplyDictField('new_name', '')}`.trim();
|
|
269
|
-
if (!newNameAccordingToLLM) {
|
|
270
|
-
// Invalid response - no new name provided.
|
|
271
|
-
// Do not mark the item as removed. Mark it as unchanged.
|
|
272
|
-
const warningMessage = `LLM indicated item was renamed but did not provide a new name. ` +
|
|
273
|
-
`Skipping rename for item: ${getItemName(itemBefore)}`;
|
|
274
|
-
retval.unchanged.push(getItemName(itemBefore));
|
|
275
|
-
totalProcessedItems++;
|
|
276
|
-
onComparingItem?.(itemBefore, true, false, ItemComparisonResult.Unchanged, undefined, warningMessage, totalProcessedItems, listBefore.length - (iItem + 1) + listAfter.length);
|
|
277
|
-
continue;
|
|
278
|
-
}
|
|
279
|
-
// Find the actual item in listAfter that matches this name.
|
|
280
|
-
// We do this because the LLM might return a name that is slightly different
|
|
281
|
-
// from the actual name in the list (e.g. different casing, or with/without
|
|
282
|
-
// quotes, etc.)
|
|
283
|
-
let nameOfMatchedItem = null;
|
|
284
|
-
for (const itemAfter of listAfter) {
|
|
285
|
-
const nameAfter = getItemName(itemAfter);
|
|
286
|
-
if (areNamesEquivalent(nameAfter, newNameAccordingToLLM)) {
|
|
287
|
-
nameOfMatchedItem = nameAfter;
|
|
288
|
-
break;
|
|
289
|
-
}
|
|
290
|
-
}
|
|
291
|
-
if (!nameOfMatchedItem) {
|
|
292
|
-
// Couldn't find a matching item in listAfter.
|
|
293
|
-
// Do not mark the item as removed. Mark it as unchanged.
|
|
294
|
-
const warningMessage = `LLM indicated item was renamed to "${newNameAccordingToLLM}", ` +
|
|
295
|
-
`but no matching item was found in the "after" list. ` +
|
|
296
|
-
`Skipping rename for item: ${getItemName(itemBefore)}`;
|
|
297
|
-
retval.unchanged.push(getItemName(itemBefore));
|
|
298
|
-
totalProcessedItems++;
|
|
299
|
-
onComparingItem?.(itemBefore, true, false, ItemComparisonResult.Unchanged, undefined, warningMessage, totalProcessedItems, listBefore.length - (iItem + 1) + listAfter.length);
|
|
300
|
-
continue;
|
|
301
|
-
}
|
|
302
|
-
// Valid rename.
|
|
303
|
-
retval.renamed[getItemName(itemBefore)] = nameOfMatchedItem;
|
|
304
|
-
// Remove the matched item from listAfter so it can't be matched again.
|
|
305
|
-
listAfter = removeItemsByName(listAfter, nameOfMatchedItem);
|
|
306
|
-
totalProcessedItems++;
|
|
307
|
-
onComparingItem?.(itemBefore, true, false, ItemComparisonResult.Renamed, nameOfMatchedItem, undefined, totalProcessedItems, listBefore.length - (iItem + 1) + listAfter.length);
|
|
308
|
-
}
|
|
309
|
-
}
|
|
310
|
-
catch (error) {
|
|
311
|
-
const warningMessage = `LLM processing failed for "before" item ${JSON.stringify(getItemName(itemBefore))}; marking as unchanged.`;
|
|
312
|
-
retval.unchanged.push(getItemName(itemBefore));
|
|
313
|
-
totalProcessedItems++;
|
|
314
|
-
onComparingItem?.(itemBefore, true, false, ItemComparisonResult.Unchanged, undefined, warningMessage, totalProcessedItems, listBefore.length - (iItem + 1) + listAfter.length);
|
|
315
58
|
continue;
|
|
316
59
|
}
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
try {
|
|
324
|
-
const convoIter = convo.clone();
|
|
325
|
-
convoIter.addUserMessage(`
|
|
326
|
-
At the moment, let's focus on this item from the "after" list:
|
|
327
|
-
|
|
328
|
-
${itemToPromptString(itemAfter)}
|
|
329
|
-
|
|
330
|
-
We think that this item was newly added, because we can't find any matching item
|
|
331
|
-
from the "before" list. However, it's possible that we have instructions or context
|
|
332
|
-
that indicate otherwise.
|
|
333
|
-
|
|
334
|
-
At this point, we don't have the option of matching this item to any item from the "before"
|
|
335
|
-
list, since we've already processed all those items. However, we still have the option
|
|
336
|
-
of rejecting this item from addition -- in which case, it will be considered as not having
|
|
337
|
-
been added at all (or, in other words, it will be ignored in downstream processing).
|
|
338
|
-
|
|
339
|
-
What do you think? Should we consider this item as truly added, or should we reject / ignore
|
|
340
|
-
this item?
|
|
341
|
-
`);
|
|
342
|
-
await convoIter.submit();
|
|
343
|
-
await convoIter.submit(undefined, undefined, {
|
|
344
|
-
jsonResponse: JSONSchemaFormat('list_comparison_item_addition_decision', {
|
|
345
|
-
is_added: [
|
|
346
|
-
Boolean,
|
|
347
|
-
`Whether this item from the "after" list should be considered as truly added. ` +
|
|
348
|
-
`If false, the item will be ignored in downstream processing.`,
|
|
349
|
-
],
|
|
350
|
-
}),
|
|
60
|
+
const itemAfter = listAfter[indexMatchedInAfter];
|
|
61
|
+
if (areItemsEqual(itemBefore, itemAfter)) {
|
|
62
|
+
retval.push({
|
|
63
|
+
item: itemBefore,
|
|
64
|
+
classification: ItemComparisonClassification.Unchanged,
|
|
65
|
+
newName: undefined,
|
|
351
66
|
});
|
|
352
|
-
const isItemAdded = convoIter.getLastReplyDictField('is_added');
|
|
353
|
-
if (isItemAdded) {
|
|
354
|
-
retval.added.push(getItemName(itemAfter));
|
|
355
|
-
totalProcessedItems++;
|
|
356
|
-
onComparingItem?.(itemAfter, false, false, ItemComparisonResult.Added, undefined, undefined, totalProcessedItems, listAfter.length - (iItem + 1));
|
|
357
|
-
continue;
|
|
358
|
-
}
|
|
359
|
-
totalProcessedItems++;
|
|
360
|
-
onComparingItem?.(itemAfter, false, false, ItemComparisonResult.Unchanged, undefined, undefined, totalProcessedItems, listAfter.length - (iItem + 1));
|
|
361
67
|
}
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
68
|
+
else {
|
|
69
|
+
retval.push({
|
|
70
|
+
item: itemBefore,
|
|
71
|
+
classification: ItemComparisonClassification.Renamed,
|
|
72
|
+
newName: getItemName(itemAfter),
|
|
73
|
+
});
|
|
367
74
|
}
|
|
75
|
+
// Remove the matched item from the "after" list so it can't be matched again.
|
|
76
|
+
listAfter.splice(indexMatchedInAfter, 1);
|
|
77
|
+
}
|
|
78
|
+
// All of the remaining items in the "after" list are considered added.
|
|
79
|
+
for (const itemAfter of listAfter) {
|
|
80
|
+
retval.push({
|
|
81
|
+
item: itemAfter,
|
|
82
|
+
classification: ItemComparisonClassification.Added,
|
|
83
|
+
newName: undefined,
|
|
84
|
+
});
|
|
368
85
|
}
|
|
369
|
-
return
|
|
370
|
-
removed: [...new Set(retval.removed)].sort(),
|
|
371
|
-
added: [...new Set(retval.added)].sort(),
|
|
372
|
-
renamed: retval.renamed,
|
|
373
|
-
unchanged: [...new Set(retval.unchanged)].sort(),
|
|
374
|
-
};
|
|
86
|
+
return retval;
|
|
375
87
|
};
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic match finder for migration-style item lists.
|
|
3
|
+
*
|
|
4
|
+
* This module provides a helper that determines whether a "test item" is already present
|
|
5
|
+
* in an existing list, even when names are different. In this context, a semantic match
|
|
6
|
+
* means two labels point to the same underlying concept (for example, renamed fields,
|
|
7
|
+
* wording changes, or synonyms).
|
|
8
|
+
*
|
|
9
|
+
* Matching strategy:
|
|
10
|
+
* 1) Check for exact name equality.
|
|
11
|
+
* 2) If no exact match exists, use an LLM to infer conceptual equivalence.
|
|
12
|
+
*
|
|
13
|
+
* The exported function returns the index of the first matching list item when a match
|
|
14
|
+
* is found, or `-1` when no sufficiently similar item exists.
|
|
15
|
+
*/
|
|
16
|
+
import { OpenAI } from 'openai';
|
|
17
|
+
import { SemanticItem } from './semanticItem.js';
|
|
18
|
+
/**
|
|
19
|
+
* Finds the best semantic match for a test item within a list of items.
|
|
20
|
+
*
|
|
21
|
+
* A semantic match means two items represent the same underlying concept even if their
|
|
22
|
+
* names differ (for example, due to renaming, wording changes, or synonyms).
|
|
23
|
+
*
|
|
24
|
+
* The function first checks for an exact name match and returns its index immediately
|
|
25
|
+
* if found.
|
|
26
|
+
* If no exact match exists, it asks the LLM to decide whether the test item is represented
|
|
27
|
+
* in the list under a different name and returns the index of the first matching list item,
|
|
28
|
+
* or `-1` when no good semantic match is found.
|
|
29
|
+
*
|
|
30
|
+
* @param openaiClient An instance of the OpenAI client to use for LLM interactions.
|
|
31
|
+
* @param itemlist The list of strings/items to compare.
|
|
32
|
+
* @param itemToFind The item for which we want to find a semantic match in the list.
|
|
33
|
+
* @param explanation Optional explanation that provides context for the comparison, e.g.
|
|
34
|
+
* a description of the items or the nature of the changes.
|
|
35
|
+
* @returns The index of the first matching item from the list, or `-1` if no good match
|
|
36
|
+
* is found.
|
|
37
|
+
*/
|
|
38
|
+
export declare const findSemanticMatch: (openaiClient: OpenAI, itemlist: SemanticItem[], itemToFind: SemanticItem, explanation?: string) => Promise<number>;
|