mdi-llmkit 1.1.0 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -76,7 +76,7 @@ import { jsonSurgery } from 'mdi-llmkit/jsonSurgery';
76
76
  - It supports optional schema guidance and key-skipping for model-visible context.
77
77
  - It supports validation/progress callbacks and soft iteration/time limits.
78
78
 
79
- ## `compareItemLists` (comparison)
79
+ ## `compareItemLists` (semanticMatch)
80
80
 
81
81
  `compareItemLists` performs a semantic diff between a "before" list and an "after" list,
82
82
  including LLM-assisted rename/add/remove decisions.
@@ -106,7 +106,7 @@ import {
106
106
  compareItemLists,
107
107
  ItemComparisonResult,
108
108
  type OnComparingItemCallback,
109
- } from 'mdi-llmkit/comparison';
109
+ } from 'mdi-llmkit/semanticMatch';
110
110
 
111
111
  const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
112
112
 
@@ -0,0 +1,48 @@
1
+ /**
2
+ * Semantic comparison for before/after item lists.
3
+ *
4
+ * This module compares two lists that represent the same domain at different points
5
+ * in time (for example, before and after a migration) and classifies items as:
6
+ * - unchanged,
7
+ * - renamed,
8
+ * - removed, or
9
+ * - added.
10
+ *
11
+ * It is designed for cases where exact string comparison is not sufficient because
12
+ * names may change while meaning stays the same.
13
+ */
14
+ import { OpenAI } from 'openai';
15
+ import { SemanticItem } from './semanticItem.js';
16
+ /**
17
+ * Final classification of an item during comparison.
18
+ */
19
+ export declare enum ItemComparisonClassification {
20
+ /** Item existed in "before" and is considered deleted in "after". */
21
+ Removed = "removed",
22
+ /** Item exists in "after" and is considered newly introduced. */
23
+ Added = "added",
24
+ /** Item from "before" was matched to a different name in "after". */
25
+ Renamed = "renamed",
26
+ /** Item is treated as unchanged or unresolved for downstream purposes. */
27
+ Unchanged = "unchanged"
28
+ }
29
+ export declare const ItemComparisonResult: typeof ItemComparisonClassification;
30
+ export type ItemComparisonResult = {
31
+ item: SemanticItem;
32
+ classification: ItemComparisonClassification;
33
+ newName: string | undefined;
34
+ };
35
+ /**
36
+ * Compares two lists of items and classifies each item from the "before" list as removed,
37
+ * renamed, or unchanged based on whether it has a semantic match in the "after" list.
38
+ * Any items in the "after" list that don't match to an item in the "before" list are
39
+ * classified as added.
40
+ * @param before - The list of items before the changes.
41
+ * @param after - The list of items after the changes.
42
+ * @param explanation Optional explanation that provides context for the comparison, e.g.
43
+ * a description of the items or the nature of the changes.
44
+ * @returns An array of item comparison results. This includes all items from the "before"
45
+ * list with their classification (removed/renamed/unchanged), and any unmatched items from
46
+ * the "after" list classified as added.
47
+ */
48
+ export declare const compareItemLists: (openaiClient: OpenAI, listBefore: SemanticItem[], listAfter: SemanticItem[], explanation?: string) => Promise<ItemComparisonResult[]>;
@@ -0,0 +1,87 @@
1
+ /**
2
+ * Semantic comparison for before/after item lists.
3
+ *
4
+ * This module compares two lists that represent the same domain at different points
5
+ * in time (for example, before and after a migration) and classifies items as:
6
+ * - unchanged,
7
+ * - renamed,
8
+ * - removed, or
9
+ * - added.
10
+ *
11
+ * It is designed for cases where exact string comparison is not sufficient because
12
+ * names may change while meaning stays the same.
13
+ */
14
+ import { areItemsEqual, getItemName } from './semanticItem.js';
15
+ import { findSemanticMatch } from './find.js';
16
+ /**
17
+ * Final classification of an item during comparison.
18
+ */
19
+ export var ItemComparisonClassification;
20
+ (function (ItemComparisonClassification) {
21
+ /** Item existed in "before" and is considered deleted in "after". */
22
+ ItemComparisonClassification["Removed"] = "removed";
23
+ /** Item exists in "after" and is considered newly introduced. */
24
+ ItemComparisonClassification["Added"] = "added";
25
+ /** Item from "before" was matched to a different name in "after". */
26
+ ItemComparisonClassification["Renamed"] = "renamed";
27
+ /** Item is treated as unchanged or unresolved for downstream purposes. */
28
+ ItemComparisonClassification["Unchanged"] = "unchanged";
29
+ })(ItemComparisonClassification || (ItemComparisonClassification = {}));
30
+ export const ItemComparisonResult = ItemComparisonClassification;
31
+ /**
32
+ * Compares two lists of items and classifies each item from the "before" list as removed,
33
+ * renamed, or unchanged based on whether it has a semantic match in the "after" list.
34
+ * Any items in the "after" list that don't match to an item in the "before" list are
35
+ * classified as added.
36
+ * @param before - The list of items before the changes.
37
+ * @param after - The list of items after the changes.
38
+ * @param explanation Optional explanation that provides context for the comparison, e.g.
39
+ * a description of the items or the nature of the changes.
40
+ * @returns An array of item comparison results. This includes all items from the "before"
41
+ * list with their classification (removed/renamed/unchanged), and any unmatched items from
42
+ * the "after" list classified as added.
43
+ */
44
+ export const compareItemLists = async (openaiClient, listBefore, listAfter, explanation) => {
45
+ // We're going to be removing items from the "after" list as we match them,
46
+ // so we make a copy of it to avoid mutating the original array.
47
+ listAfter = [...listAfter];
48
+ const retval = [];
49
+ for (const itemBefore of listBefore) {
50
+ const indexMatchedInAfter = await findSemanticMatch(openaiClient, listAfter, itemBefore, explanation);
51
+ if (indexMatchedInAfter === -1) {
52
+ // No good match found in "after" list, so this item is probably removed.
53
+ retval.push({
54
+ item: itemBefore,
55
+ classification: ItemComparisonClassification.Removed,
56
+ newName: undefined,
57
+ });
58
+ continue;
59
+ }
60
+ const itemAfter = listAfter[indexMatchedInAfter];
61
+ if (areItemsEqual(itemBefore, itemAfter)) {
62
+ retval.push({
63
+ item: itemBefore,
64
+ classification: ItemComparisonClassification.Unchanged,
65
+ newName: undefined,
66
+ });
67
+ }
68
+ else {
69
+ retval.push({
70
+ item: itemBefore,
71
+ classification: ItemComparisonClassification.Renamed,
72
+ newName: getItemName(itemAfter),
73
+ });
74
+ }
75
+ // Remove the matched item from the "after" list so it can't be matched again.
76
+ listAfter.splice(indexMatchedInAfter, 1);
77
+ }
78
+ // All of the remaining items in the "after" list are considered added.
79
+ for (const itemAfter of listAfter) {
80
+ retval.push({
81
+ item: itemAfter,
82
+ classification: ItemComparisonClassification.Added,
83
+ newName: undefined,
84
+ });
85
+ }
86
+ return retval;
87
+ };
@@ -0,0 +1,38 @@
1
+ /**
2
+ * Semantic match finder for migration-style item lists.
3
+ *
4
+ * This module provides a helper that determines whether a "test item" is already present
5
+ * in an existing list, even when names are different. In this context, a semantic match
6
+ * means two labels point to the same underlying concept (for example, renamed fields,
7
+ * wording changes, or synonyms).
8
+ *
9
+ * Matching strategy:
10
+ * 1) Check for exact name equality.
11
+ * 2) If no exact match exists, use an LLM to infer conceptual equivalence.
12
+ *
13
+ * The exported function returns the index of the first matching list item when a match
14
+ * is found, or `-1` when no sufficiently similar item exists.
15
+ */
16
+ import { OpenAI } from 'openai';
17
+ import { SemanticItem } from './semanticItem.js';
18
+ /**
19
+ * Finds the best semantic match for a test item within a list of items.
20
+ *
21
+ * A semantic match means two items represent the same underlying concept even if their
22
+ * names differ (for example, due to renaming, wording changes, or synonyms).
23
+ *
24
+ * The function first checks for an exact name match and returns its index immediately
25
+ * if found.
26
+ * If no exact match exists, it asks the LLM to decide whether the test item is represented
27
+ * in the list under a different name and returns the index of the first matching list item,
28
+ * or `-1` when no good semantic match is found.
29
+ *
30
+ * @param openaiClient An instance of the OpenAI client to use for LLM interactions.
31
+ * @param itemlist The list of strings/items to compare.
32
+ * @param itemToFind The item for which we want to find a semantic match in the list.
33
+ * @param explanation Optional explanation that provides context for the comparison, e.g.
34
+ * a description of the items or the nature of the changes.
35
+ * @returns The index of the first matching item from the list, or `-1` if no good match
36
+ * is found.
37
+ */
38
+ export declare const findSemanticMatch: (openaiClient: OpenAI, itemlist: SemanticItem[], itemToFind: SemanticItem, explanation?: string) => Promise<number>;
@@ -0,0 +1,134 @@
1
+ /**
2
+ * Semantic match finder for migration-style item lists.
3
+ *
4
+ * This module provides a helper that determines whether a "test item" is already present
5
+ * in an existing list, even when names are different. In this context, a semantic match
6
+ * means two labels point to the same underlying concept (for example, renamed fields,
7
+ * wording changes, or synonyms).
8
+ *
9
+ * Matching strategy:
10
+ * 1) Check for exact name equality.
11
+ * 2) If no exact match exists, use an LLM to infer conceptual equivalence.
12
+ *
13
+ * The exported function returns the index of the first matching list item when a match
14
+ * is found, or `-1` when no sufficiently similar item exists.
15
+ */
16
+ import { areItemsEqual, itemToPromptString, } from './semanticItem.js';
17
+ import { GptConversation } from '../gptApi/gptConversation.js';
18
+ import { JSONSchemaFormat } from '../gptApi/jsonSchemaFormat.js';
19
+ /**
20
+ * Finds the best semantic match for a test item within a list of items.
21
+ *
22
+ * A semantic match means two items represent the same underlying concept even if their
23
+ * names differ (for example, due to renaming, wording changes, or synonyms).
24
+ *
25
+ * The function first checks for an exact name match and returns its index immediately
26
+ * if found.
27
+ * If no exact match exists, it asks the LLM to decide whether the test item is represented
28
+ * in the list under a different name and returns the index of the first matching list item,
29
+ * or `-1` when no good semantic match is found.
30
+ *
31
+ * @param openaiClient An instance of the OpenAI client to use for LLM interactions.
32
+ * @param itemlist The list of strings/items to compare.
33
+ * @param itemToFind The item for which we want to find a semantic match in the list.
34
+ * @param explanation Optional explanation that provides context for the comparison, e.g.
35
+ * a description of the items or the nature of the changes.
36
+ * @returns The index of the first matching item from the list, or `-1` if no good match
37
+ * is found.
38
+ */
39
+ export const findSemanticMatch = async (openaiClient, itemlist, itemToFind, explanation) => {
40
+ // First check if there's an exact match for the item in the list.
41
+ // If so, we can skip the LLM and just return that.
42
+ for (let i = 0; i < itemlist.length; i++) {
43
+ const item = itemlist[i];
44
+ if (areItemsEqual(item, itemToFind)) {
45
+ return i;
46
+ }
47
+ }
48
+ const convo = new GptConversation([], { openaiClient });
49
+ convo.addSystemMessage(`
50
+ You are a data analyst who has been hired to try to preserve the integrity of a list of
51
+ data items. The user will show you a list of items from a data migration, followed by
52
+ a "test item". Your job is to determine whether the test item is already present in
53
+ the list (but maybe under a different name), or whether it is not present in the list
54
+ at all.
55
+
56
+ We've already determined that the test item does not have an exact name match in the list,
57
+ but it might have been renamed or expressed in a different way.
58
+
59
+ Let me give you a few examples of what I mean by this:
60
+
61
+ - Imagine that the list is ["Customer ID", "Order Date", "Total Amount"], and the test item
62
+ is "Client Identifier". Then you'd return "Customer ID".
63
+
64
+ - Imagine that the list is ["Customer ID", "Order Date", "Total Amount"], and the test item
65
+ is "Date of Order". Then you'd return "Order Date".
66
+
67
+ - Imagine that the list is ["Customer ID", "Order Date", "Total Amount"], and the test item
68
+ is "Product Name". Then you'd return null, because none of the items in the list are
69
+ semantically similar to "Product Name".
70
+
71
+ - Imagine that the list is ["Dragonfly", "Butterfly", "Firefly"], and the test item is
72
+ "Lightning Bug". Then you'd return "Firefly".
73
+
74
+ - Imagine that the list is ["Dragonfly", "Butterfly", "Firefly"], and the test item is
75
+ "Spider". Then you'd return null, because none of the items in the list are
76
+ semantically similar to "Spider".
77
+ `);
78
+ if (explanation) {
79
+ convo.addSystemMessage(`
80
+ Here is some additional context that may help you make better decisions about which items
81
+ have been renamed versus removed/added:
82
+
83
+ ${explanation}
84
+ `);
85
+ }
86
+ let sList = '';
87
+ for (let iItem = 0; iItem < itemlist.length; iItem++) {
88
+ const item = itemlist[iItem];
89
+ sList += `- ITEM #${iItem + 1}. ${itemToPromptString(item)}\n`;
90
+ }
91
+ convo.addUserMessage(`
92
+ Here is the list of items:
93
+
94
+ ${sList}
95
+
96
+ And here is the test item to compare against that list:
97
+
98
+ - ${itemToPromptString(itemToFind)}
99
+ `);
100
+ await convo.submit(undefined, undefined, {
101
+ jsonResponse: JSONSchemaFormat('list_comparison_find_potentially_renamed_item', {
102
+ discussion: [
103
+ String,
104
+ 'Your reasoning process as you compare the test item to the items in the list. ' +
105
+ 'This is for debugging purposes and will not be parsed by any downstream logic, ' +
106
+ 'but please provide as much detail as possible about your thinking here, ' +
107
+ 'as it will help us understand your decision-making process.',
108
+ ],
109
+ is_testitem_in_list: [
110
+ Boolean,
111
+ 'Whether the test item is present in the list.',
112
+ ],
113
+ item_number_in_list: [
114
+ Number,
115
+ `The item number (as indicated by "ITEM #") of the item that you've identified as ` +
116
+ `matching the test item. If you don't think any item in the list matches the ` +
117
+ `test item, then set this to -1.`,
118
+ ],
119
+ }),
120
+ });
121
+ const isTestItemInList = convo.getLastReplyDictField('is_testitem_in_list');
122
+ const itemNumberInList = convo.getLastReplyDictField('item_number_in_list');
123
+ if (!isTestItemInList) {
124
+ return -1;
125
+ }
126
+ if (!Number.isInteger(itemNumberInList)) {
127
+ return -1;
128
+ }
129
+ const index = itemNumberInList - 1;
130
+ if (index < 0 || index >= itemlist.length) {
131
+ return -1;
132
+ }
133
+ return index;
134
+ };
@@ -1 +1,2 @@
1
1
  export * from './compareLists.js';
2
+ export * from './semanticItem.js';
@@ -1 +1,2 @@
1
1
  export * from './compareLists.js';
2
+ export * from './semanticItem.js';
@@ -0,0 +1,78 @@
1
+ /**
2
+ * Shared item primitives and helpers used by semantic list comparison.
3
+ *
4
+ * This module intentionally focuses on item-level behavior:
5
+ * - `SemanticItem` defines the accepted item shape (`string` or `{ name, description? }`).
6
+ * - `getItemName` normalizes an item to its comparable name.
7
+ * - `itemToPromptString` formats an item for prompt text, including optional details.
8
+ * - `compareItems` provides case-insensitive ordering by item name.
9
+ * - `areItemsEqual` provides equality checks across comparable item content.
10
+ *
11
+ * Matching orchestration (removed/added/renamed classification) is implemented in
12
+ * higher-level modules and consumes these utilities.
13
+ */
14
+ /**
15
+ * Item shape accepted by `compareItemLists` for semantic comparison.
16
+ *
17
+ * - A raw string is treated as the item's comparable name.
18
+ * - An object uses `name` as the comparable value and may include optional
19
+ * `description` to provide additional LLM context.
20
+ */
21
+ export type SemanticItem = string | {
22
+ name: string;
23
+ description?: string;
24
+ };
25
+ /**
26
+ * Returns the comparable name for a list item.
27
+ * @param item The item to extract the name from.
28
+ * @returns The name of the item, which is used for comparison and matching.
29
+ */
30
+ export declare const getItemName: (item: SemanticItem) => string;
31
+ /**
32
+ * Returns the description of a list item, if available and non-redundant with the name.
33
+ * If the item is a string or if the description is missing or effectively the same as the name,
34
+ * this function returns `undefined`.
35
+ * @param item The item to extract the description from.
36
+ * @returns The description of the item, or `undefined` if not available or redundant.
37
+ */
38
+ export declare const getItemDescription: (item: SemanticItem) => string | undefined;
39
+ /**
40
+ * Formats a list item for prompt inclusion, including optional description context.
41
+ * The output is a string that starts with "- " followed by the item name, and if a
42
+ * description is provided and is not redundant with the name, it includes the description
43
+ * in parentheses. The item name and description are JSON-stringified to prevent formatting
44
+ * issues in the prompt (e.g. with newlines or special characters).
45
+ * @param item The item to format for the prompt.
46
+ * @returns A string representation of the item suitable for inclusion in the prompt.
47
+ */
48
+ export declare const itemToPromptString: (item: SemanticItem) => string;
49
+ /**
50
+ * Sort comparator for list items.
51
+ *
52
+ * Ordering behavior:
53
+ * 1) Compare names case-insensitively after trimming leading/trailing whitespace.
54
+ * 2) If names are equal, compare non-redundant descriptions case-insensitively
55
+ * as a tie-breaker. We only compare descriptions when both items have a
56
+ * non-redundant description.
57
+ */
58
+ export declare const compareItems: (a: SemanticItem, b: SemanticItem) => number;
59
+ /**
60
+ * Equality check for two items.
61
+ *
62
+ * Equality uses the same semantics as `compareItems`:
63
+ * - names are compared case-insensitively after trimming;
64
+ * - when names tie, non-redundant descriptions are compared
65
+ * case-insensitively after trimming.
66
+ * @param a The first item to compare.
67
+ * @param b The second item to compare.
68
+ * @returns `true` if the items are equal under comparator semantics, `false` otherwise.
69
+ */
70
+ export declare const areItemsEqual: (a: SemanticItem, b: SemanticItem) => boolean;
71
+ /**
72
+ * Removes an item from a list based on full item equivalence.
73
+ * @param itemList The list of items to remove from.
74
+ * @param itemToRemove The item to remove from the list. Any item equal to this item
75
+ * under `areItemsEqual` semantics will be removed.
76
+ * @returns A new list with the specified item removed.
77
+ */
78
+ export declare const removeItemFromList: (itemList: SemanticItem[], itemToRemove: SemanticItem) => SemanticItem[];
@@ -0,0 +1,110 @@
1
+ /**
2
+ * Shared item primitives and helpers used by semantic list comparison.
3
+ *
4
+ * This module intentionally focuses on item-level behavior:
5
+ * - `SemanticItem` defines the accepted item shape (`string` or `{ name, description? }`).
6
+ * - `getItemName` normalizes an item to its comparable name.
7
+ * - `itemToPromptString` formats an item for prompt text, including optional details.
8
+ * - `compareItems` provides case-insensitive ordering by item name.
9
+ * - `areItemsEqual` provides equality checks across comparable item content.
10
+ *
11
+ * Matching orchestration (removed/added/renamed classification) is implemented in
12
+ * higher-level modules and consumes these utilities.
13
+ */
14
+ /**
15
+ * Returns the comparable name for a list item.
16
+ * @param item The item to extract the name from.
17
+ * @returns The name of the item, which is used for comparison and matching.
18
+ */
19
+ export const getItemName = (item) => {
20
+ return typeof item === 'string' ? item : item.name;
21
+ };
22
+ /**
23
+ * Returns the description of a list item, if available and non-redundant with the name.
24
+ * If the item is a string or if the description is missing or effectively the same as the name,
25
+ * this function returns `undefined`.
26
+ * @param item The item to extract the description from.
27
+ * @returns The description of the item, or `undefined` if not available or redundant.
28
+ */
29
+ export const getItemDescription = (item) => {
30
+ if (typeof item === 'string') {
31
+ return undefined;
32
+ }
33
+ if (!item.description) {
34
+ return undefined;
35
+ }
36
+ // If the description is the same as the name (ignoring case and whitespace),
37
+ // then it's not really providing any additional context, so we can ignore it.
38
+ if (item.description.trim().toLowerCase() === item.name.trim().toLowerCase()) {
39
+ return undefined;
40
+ }
41
+ return item.description;
42
+ };
43
+ /**
44
+ * Formats a list item for prompt inclusion, including optional description context.
45
+ * The output is a string that starts with "- " followed by the item name, and if a
46
+ * description is provided and is not redundant with the name, it includes the description
47
+ * in parentheses. The item name and description are JSON-stringified to prevent formatting
48
+ * issues in the prompt (e.g. with newlines or special characters).
49
+ * @param item The item to format for the prompt.
50
+ * @returns A string representation of the item suitable for inclusion in the prompt.
51
+ */
52
+ export const itemToPromptString = (item) => {
53
+ let s = `- ${JSON.stringify(getItemName(item))}`;
54
+ const description = getItemDescription(item);
55
+ if (description) {
56
+ s += ` (details: ${JSON.stringify(description)})`;
57
+ }
58
+ return s;
59
+ };
60
+ /**
61
+ * Sort comparator for list items.
62
+ *
63
+ * Ordering behavior:
64
+ * 1) Compare names case-insensitively after trimming leading/trailing whitespace.
65
+ * 2) If names are equal, compare non-redundant descriptions case-insensitively
66
+ * as a tie-breaker. We only compare descriptions when both items have a
67
+ * non-redundant description.
68
+ */
69
+ export const compareItems = (a, b) => {
70
+ const nameA = getItemName(a).trim().toLowerCase();
71
+ const nameB = getItemName(b).trim().toLowerCase();
72
+ const byName = nameA.localeCompare(nameB);
73
+ if (byName !== 0) {
74
+ return byName;
75
+ }
76
+ const descA = (getItemDescription(a) ?? '').trim().toLowerCase();
77
+ const descB = (getItemDescription(b) ?? '').trim().toLowerCase();
78
+ // In order to compare descriptions, both items should have a description.
79
+ // If only one item has a description, we don't bother comparing the
80
+ // description field.
81
+ if (!descA || !descB) {
82
+ return 0;
83
+ }
84
+ // If we have two descriptions, we can use them as a tie-breaker.
85
+ return descA.localeCompare(descB);
86
+ };
87
+ /**
88
+ * Equality check for two items.
89
+ *
90
+ * Equality uses the same semantics as `compareItems`:
91
+ * - names are compared case-insensitively after trimming;
92
+ * - when names tie, non-redundant descriptions are compared
93
+ * case-insensitively after trimming.
94
+ * @param a The first item to compare.
95
+ * @param b The second item to compare.
96
+ * @returns `true` if the items are equal under comparator semantics, `false` otherwise.
97
+ */
98
+ export const areItemsEqual = (a, b) => {
99
+ return compareItems(a, b) === 0;
100
+ };
101
+ /**
102
+ * Removes an item from a list based on full item equivalence.
103
+ * @param itemList The list of items to remove from.
104
+ * @param itemToRemove The item to remove from the list. Any item equal to this item
105
+ * under `areItemsEqual` semantics will be removed.
106
+ * @returns A new list with the specified item removed.
107
+ */
108
+ export const removeItemFromList = (itemList, itemToRemove) => {
109
+ return itemList.filter(item => !areItemsEqual(item, itemToRemove));
110
+ };