lionagi 0.0.305__py3-none-any.whl → 0.0.307__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lionagi/__init__.py +2 -5
- lionagi/core/__init__.py +7 -4
- lionagi/core/agent/__init__.py +3 -0
- lionagi/core/agent/base_agent.py +46 -0
- lionagi/core/branch/__init__.py +4 -0
- lionagi/core/branch/base/__init__.py +0 -0
- lionagi/core/branch/base_branch.py +100 -78
- lionagi/core/branch/branch.py +22 -34
- lionagi/core/branch/branch_flow_mixin.py +3 -7
- lionagi/core/branch/executable_branch.py +192 -0
- lionagi/core/branch/util.py +77 -162
- lionagi/core/direct/__init__.py +13 -0
- lionagi/core/direct/parallel_predict.py +127 -0
- lionagi/core/direct/parallel_react.py +0 -0
- lionagi/core/direct/parallel_score.py +0 -0
- lionagi/core/direct/parallel_select.py +0 -0
- lionagi/core/direct/parallel_sentiment.py +0 -0
- lionagi/core/direct/predict.py +174 -0
- lionagi/core/direct/react.py +33 -0
- lionagi/core/direct/score.py +163 -0
- lionagi/core/direct/select.py +144 -0
- lionagi/core/direct/sentiment.py +51 -0
- lionagi/core/direct/utils.py +83 -0
- lionagi/core/flow/__init__.py +0 -3
- lionagi/core/flow/monoflow/{mono_react.py → ReAct.py} +52 -9
- lionagi/core/flow/monoflow/__init__.py +9 -0
- lionagi/core/flow/monoflow/{mono_chat.py → chat.py} +11 -11
- lionagi/core/flow/monoflow/{mono_chat_mixin.py → chat_mixin.py} +33 -27
- lionagi/core/flow/monoflow/{mono_followup.py → followup.py} +7 -6
- lionagi/core/flow/polyflow/__init__.py +1 -0
- lionagi/core/flow/polyflow/{polychat.py → chat.py} +15 -3
- lionagi/core/mail/__init__.py +8 -0
- lionagi/core/mail/mail_manager.py +88 -40
- lionagi/core/mail/schema.py +32 -6
- lionagi/core/messages/__init__.py +3 -0
- lionagi/core/messages/schema.py +56 -25
- lionagi/core/prompt/__init__.py +0 -0
- lionagi/core/prompt/prompt_template.py +0 -0
- lionagi/core/schema/__init__.py +7 -5
- lionagi/core/schema/action_node.py +29 -0
- lionagi/core/schema/base_mixin.py +56 -59
- lionagi/core/schema/base_node.py +35 -38
- lionagi/core/schema/condition.py +24 -0
- lionagi/core/schema/data_logger.py +98 -98
- lionagi/core/schema/data_node.py +19 -19
- lionagi/core/schema/prompt_template.py +0 -0
- lionagi/core/schema/structure.py +293 -190
- lionagi/core/session/__init__.py +1 -3
- lionagi/core/session/session.py +196 -214
- lionagi/core/tool/tool_manager.py +95 -103
- lionagi/integrations/__init__.py +1 -3
- lionagi/integrations/bridge/langchain_/documents.py +17 -18
- lionagi/integrations/bridge/langchain_/langchain_bridge.py +14 -14
- lionagi/integrations/bridge/llamaindex_/llama_index_bridge.py +22 -22
- lionagi/integrations/bridge/llamaindex_/node_parser.py +12 -12
- lionagi/integrations/bridge/llamaindex_/reader.py +11 -11
- lionagi/integrations/bridge/llamaindex_/textnode.py +7 -7
- lionagi/integrations/config/openrouter_configs.py +0 -1
- lionagi/integrations/provider/oai.py +26 -26
- lionagi/integrations/provider/services.py +38 -38
- lionagi/libs/__init__.py +34 -1
- lionagi/libs/ln_api.py +211 -221
- lionagi/libs/ln_async.py +53 -60
- lionagi/libs/ln_convert.py +118 -120
- lionagi/libs/ln_dataframe.py +32 -33
- lionagi/libs/ln_func_call.py +334 -342
- lionagi/libs/ln_nested.py +99 -107
- lionagi/libs/ln_parse.py +175 -158
- lionagi/libs/sys_util.py +52 -52
- lionagi/tests/test_core/test_base_branch.py +427 -427
- lionagi/tests/test_core/test_branch.py +292 -292
- lionagi/tests/test_core/test_mail_manager.py +57 -57
- lionagi/tests/test_core/test_session.py +254 -266
- lionagi/tests/test_core/test_session_base_util.py +299 -300
- lionagi/tests/test_core/test_tool_manager.py +70 -74
- lionagi/tests/test_libs/test_nested.py +2 -7
- lionagi/tests/test_libs/test_parse.py +2 -2
- lionagi/version.py +1 -1
- {lionagi-0.0.305.dist-info → lionagi-0.0.307.dist-info}/METADATA +4 -2
- lionagi-0.0.307.dist-info/RECORD +115 -0
- lionagi-0.0.305.dist-info/RECORD +0 -94
- {lionagi-0.0.305.dist-info → lionagi-0.0.307.dist-info}/LICENSE +0 -0
- {lionagi-0.0.305.dist-info → lionagi-0.0.307.dist-info}/WHEEL +0 -0
- {lionagi-0.0.305.dist-info → lionagi-0.0.307.dist-info}/top_level.txt +0 -0
lionagi/libs/ln_parse.py
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
import re
|
2
2
|
import inspect
|
3
|
+
import itertools
|
3
4
|
from collections.abc import Callable
|
4
5
|
from typing import Any
|
5
6
|
import numpy as np
|
6
7
|
import lionagi.libs.ln_convert as convert
|
7
8
|
|
8
|
-
|
9
9
|
md_json_char_map = {"\n": "\\n", "\r": "\\r", "\t": "\\t", '"': '\\"'}
|
10
10
|
|
11
11
|
|
@@ -20,29 +20,35 @@ class ParseUtil:
|
|
20
20
|
the string by appending necessary closing characters before retrying.
|
21
21
|
|
22
22
|
Args:
|
23
|
-
|
24
|
-
|
23
|
+
s (str): The JSON string to parse.
|
24
|
+
strict (bool, optional): If True, enforces strict JSON syntax. Defaults to False.
|
25
25
|
|
26
26
|
Returns:
|
27
|
-
|
27
|
+
The parsed JSON object, typically a dictionary or list.
|
28
28
|
|
29
29
|
Raises:
|
30
|
-
|
30
|
+
ValueError: If parsing fails even after attempting to correct the string.
|
31
31
|
|
32
32
|
Example:
|
33
|
-
|
34
|
-
|
33
|
+
>>> fuzzy_parse_json('{"name": "John", "age": 30, "city": "New York"')
|
34
|
+
{'name': 'John', 'age': 30, 'city': 'New York'}
|
35
35
|
"""
|
36
36
|
try:
|
37
37
|
return convert.to_dict(str_to_parse, strict=strict)
|
38
|
-
except:
|
38
|
+
except Exception:
|
39
39
|
fixed_s = ParseUtil.fix_json_string(str_to_parse)
|
40
40
|
try:
|
41
41
|
return convert.to_dict(fixed_s, strict=strict)
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
42
|
+
|
43
|
+
except Exception:
|
44
|
+
try:
|
45
|
+
fixed_s = fixed_s.replace("'", '"')
|
46
|
+
return convert.to_dict(fixed_s, strict=strict)
|
47
|
+
|
48
|
+
except Exception as e:
|
49
|
+
raise ValueError(
|
50
|
+
f"Failed to parse JSON even after fixing attempts: {e}"
|
51
|
+
) from e
|
46
52
|
|
47
53
|
@staticmethod
|
48
54
|
def fix_json_string(str_to_parse: str) -> str:
|
@@ -70,17 +76,17 @@ class ParseUtil:
|
|
70
76
|
a default mapping is used.
|
71
77
|
|
72
78
|
Args:
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
79
|
+
value: The string to be escaped.
|
80
|
+
char_map: An optional dictionary mapping characters to their escaped versions.
|
81
|
+
If not provided, a default mapping that escapes newlines, carriage returns,
|
82
|
+
tabs, and double quotes is used.
|
77
83
|
|
78
84
|
Returns:
|
79
|
-
|
85
|
+
The escaped JSON string.
|
80
86
|
|
81
87
|
Examples:
|
82
|
-
|
83
|
-
|
88
|
+
>>> escape_chars_in_json('Line 1\nLine 2')
|
89
|
+
'Line 1\\nLine 2'
|
84
90
|
"""
|
85
91
|
|
86
92
|
def replacement(match):
|
@@ -108,22 +114,22 @@ class ParseUtil:
|
|
108
114
|
filtered by language. If a code block is found, it is parsed using the provided parser function.
|
109
115
|
|
110
116
|
Args:
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
+
str_to_parse: The Markdown content to search.
|
118
|
+
language: An optional language specifier for the code block. If provided,
|
119
|
+
only code blocks of this language are considered.
|
120
|
+
regex_pattern: An optional regular expression pattern to use for finding the code block.
|
121
|
+
If provided, it overrides the language parameter.
|
122
|
+
parser: A function to parse the extracted code block string.
|
117
123
|
|
118
124
|
Returns:
|
119
|
-
|
125
|
+
The result of parsing the code block with the provided parser function.
|
120
126
|
|
121
127
|
Raises:
|
122
|
-
|
128
|
+
ValueError: If no code block is found in the Markdown content.
|
123
129
|
|
124
130
|
Examples:
|
125
|
-
|
126
|
-
|
131
|
+
>>> extract_code_block('```python\\nprint("Hello, world!")\\n```', language='python', parser=lambda x: x)
|
132
|
+
'print("Hello, world!")'
|
127
133
|
"""
|
128
134
|
|
129
135
|
if language:
|
@@ -134,7 +140,7 @@ class ParseUtil:
|
|
134
140
|
match = re.search(regex_pattern, str_to_parse, re.DOTALL)
|
135
141
|
code_str = ""
|
136
142
|
if match:
|
137
|
-
code_str = match
|
143
|
+
code_str = match[1].strip()
|
138
144
|
else:
|
139
145
|
raise ValueError(
|
140
146
|
f"No {language or 'specified'} code block found in the Markdown content."
|
@@ -156,29 +162,28 @@ class ParseUtil:
|
|
156
162
|
Markdown string. It then optionally verifies that the parsed JSON object contains all expected keys.
|
157
163
|
|
158
164
|
Args:
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
165
|
+
str_to_parse: The Markdown content to parse.
|
166
|
+
expected_keys: An optional list of keys expected to be present in the parsed JSON object.
|
167
|
+
parser: An optional function to parse the extracted code block. If not provided,
|
168
|
+
`fuzzy_parse_json` is used with default settings.
|
163
169
|
|
164
170
|
Returns:
|
165
|
-
|
171
|
+
The parsed JSON object from the Markdown content.
|
166
172
|
|
167
173
|
Raises:
|
168
|
-
|
169
|
-
|
174
|
+
ValueError: If the JSON code block is missing, or if any of the expected keys are missing
|
175
|
+
from the parsed JSON object.
|
170
176
|
|
171
177
|
Examples:
|
172
|
-
|
173
|
-
|
178
|
+
>>> md_to_json('```json\\n{"key": "value"}\\n```', expected_keys=['key'])
|
179
|
+
{'key': 'value'}
|
174
180
|
"""
|
175
181
|
json_obj = ParseUtil.extract_code_block(
|
176
182
|
str_to_parse, language="json", parser=parser or ParseUtil.fuzzy_parse_json
|
177
183
|
)
|
178
184
|
|
179
185
|
if expected_keys:
|
180
|
-
missing_keys
|
181
|
-
if missing_keys:
|
186
|
+
if missing_keys := [key for key in expected_keys if key not in json_obj]:
|
182
187
|
raise ValueError(
|
183
188
|
f"Missing expected keys in JSON object: {', '.join(missing_keys)}"
|
184
189
|
)
|
@@ -192,26 +197,26 @@ class ParseUtil:
|
|
192
197
|
docstring following the Google style format.
|
193
198
|
|
194
199
|
Args:
|
195
|
-
|
200
|
+
func (Callable): The function from which to extract docstring details.
|
196
201
|
|
197
202
|
Returns:
|
198
|
-
|
199
|
-
|
203
|
+
Tuple[str, Dict[str, str]]: A tuple containing the function description
|
204
|
+
and a dictionary with parameter names as keys and their descriptions as values.
|
200
205
|
|
201
206
|
Examples:
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
207
|
+
>>> def example_function(param1: int, param2: str):
|
208
|
+
... '''Example function.
|
209
|
+
...
|
210
|
+
... Args:
|
211
|
+
... param1 (int): The first parameter.
|
212
|
+
... param2 (str): The second parameter.
|
213
|
+
... '''
|
214
|
+
... pass
|
215
|
+
>>> description, params = _extract_docstring_details_google(example_function)
|
216
|
+
>>> description
|
217
|
+
'Example function.'
|
218
|
+
>>> params == {'param1': 'The first parameter.', 'param2': 'The second parameter.'}
|
219
|
+
True
|
215
220
|
"""
|
216
221
|
docstring = inspect.getdoc(func)
|
217
222
|
if not docstring:
|
@@ -219,19 +224,21 @@ class ParseUtil:
|
|
219
224
|
lines = docstring.split("\n")
|
220
225
|
func_description = lines[0].strip()
|
221
226
|
|
222
|
-
param_start_pos = 0
|
223
227
|
lines_len = len(lines)
|
224
228
|
|
225
229
|
params_description = {}
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
230
|
+
param_start_pos = next(
|
231
|
+
(
|
232
|
+
i + 1
|
233
|
+
for i in range(1, lines_len)
|
234
|
+
if (
|
235
|
+
lines[i].startswith("Args")
|
236
|
+
or lines[i].startswith("Arguments")
|
237
|
+
or lines[i].startswith("Parameters")
|
238
|
+
)
|
239
|
+
),
|
240
|
+
0,
|
241
|
+
)
|
235
242
|
current_param = None
|
236
243
|
for i in range(param_start_pos, lines_len):
|
237
244
|
if lines[i] == "":
|
@@ -239,7 +246,7 @@ class ParseUtil:
|
|
239
246
|
elif lines[i].startswith(" "):
|
240
247
|
param_desc = lines[i].split(":", 1)
|
241
248
|
if len(param_desc) == 1:
|
242
|
-
params_description[current_param] += "
|
249
|
+
params_description[current_param] += f" {param_desc[0].strip()}"
|
243
250
|
continue
|
244
251
|
param, desc = param_desc
|
245
252
|
param = param.split("(")[0].strip()
|
@@ -256,27 +263,27 @@ class ParseUtil:
|
|
256
263
|
docstring following the reStructuredText (reST) style format.
|
257
264
|
|
258
265
|
Args:
|
259
|
-
|
266
|
+
func (Callable): The function from which to extract docstring details.
|
260
267
|
|
261
268
|
Returns:
|
262
|
-
|
263
|
-
|
269
|
+
Tuple[str, Dict[str, str]]: A tuple containing the function description
|
270
|
+
and a dictionary with parameter names as keys and their descriptions as values.
|
264
271
|
|
265
272
|
Examples:
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
273
|
+
>>> def example_function(param1: int, param2: str):
|
274
|
+
... '''Example function.
|
275
|
+
...
|
276
|
+
... :param param1: The first parameter.
|
277
|
+
... :type param1: int
|
278
|
+
... :param param2: The second parameter.
|
279
|
+
... :type param2: str
|
280
|
+
... '''
|
281
|
+
... pass
|
282
|
+
>>> description, params = _extract_docstring_details_rest(example_function)
|
283
|
+
>>> description
|
284
|
+
'Example function.'
|
285
|
+
>>> params == {'param1': 'The first parameter.', 'param2': 'The second parameter.'}
|
286
|
+
True
|
280
287
|
"""
|
281
288
|
docstring = inspect.getdoc(func)
|
282
289
|
if not docstring:
|
@@ -295,7 +302,7 @@ class ParseUtil:
|
|
295
302
|
params_description[param] = desc.strip()
|
296
303
|
current_param = param
|
297
304
|
elif line.startswith(" "):
|
298
|
-
params_description[current_param] += " "
|
305
|
+
params_description[current_param] += f" {line}"
|
299
306
|
|
300
307
|
return func_description, params_description
|
301
308
|
|
@@ -307,30 +314,30 @@ class ParseUtil:
|
|
307
314
|
(reST) style format.
|
308
315
|
|
309
316
|
Args:
|
310
|
-
|
311
|
-
|
317
|
+
func (Callable): The function from which to extract docstring details.
|
318
|
+
style (str): The style of docstring to parse ('google' or 'reST').
|
312
319
|
|
313
320
|
Returns:
|
314
|
-
|
315
|
-
|
321
|
+
Tuple[str, Dict[str, str]]: A tuple containing the function description
|
322
|
+
and a dictionary with parameter names as keys and their descriptions as values.
|
316
323
|
|
317
324
|
Raises:
|
318
|
-
|
325
|
+
ValueError: If an unsupported style is provided.
|
319
326
|
|
320
327
|
Examples:
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
328
|
+
>>> def example_function(param1: int, param2: str):
|
329
|
+
... '''Example function.
|
330
|
+
...
|
331
|
+
... Args:
|
332
|
+
... param1 (int): The first parameter.
|
333
|
+
... param2 (str): The second parameter.
|
334
|
+
... '''
|
335
|
+
... pass
|
336
|
+
>>> description, params = _extract_docstring_details(example_function, style='google')
|
337
|
+
>>> description
|
338
|
+
'Example function.'
|
339
|
+
>>> params == {'param1': 'The first parameter.', 'param2': 'The second parameter.'}
|
340
|
+
True
|
334
341
|
"""
|
335
342
|
if style == "google":
|
336
343
|
func_description, params_description = (
|
@@ -352,16 +359,16 @@ class ParseUtil:
|
|
352
359
|
Converts a Python type to its JSON type equivalent.
|
353
360
|
|
354
361
|
Args:
|
355
|
-
|
362
|
+
py_type (str): The name of the Python type.
|
356
363
|
|
357
364
|
Returns:
|
358
|
-
|
365
|
+
str: The corresponding JSON type.
|
359
366
|
|
360
367
|
Examples:
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
368
|
+
>>> _python_to_json_type('str')
|
369
|
+
'string'
|
370
|
+
>>> _python_to_json_type('int')
|
371
|
+
'number'
|
365
372
|
"""
|
366
373
|
type_mapping = {
|
367
374
|
"str": "string",
|
@@ -381,24 +388,24 @@ class ParseUtil:
|
|
381
388
|
docstrings. The schema includes the function's name, description, and parameters.
|
382
389
|
|
383
390
|
Args:
|
384
|
-
|
385
|
-
|
391
|
+
func (Callable): The function to generate a schema for.
|
392
|
+
style (str): The docstring format ('google' or 'reST').
|
386
393
|
|
387
394
|
Returns:
|
388
|
-
|
395
|
+
Dict[str, Any]: A schema describing the function.
|
389
396
|
|
390
397
|
Examples:
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
398
|
+
>>> def example_function(param1: int, param2: str) -> bool:
|
399
|
+
... '''Example function.
|
400
|
+
...
|
401
|
+
... Args:
|
402
|
+
... param1 (int): The first parameter.
|
403
|
+
... param2 (str): The second parameter.
|
404
|
+
... '''
|
405
|
+
... return True
|
406
|
+
>>> schema = _func_to_schema(example_function)
|
407
|
+
>>> schema['function']['name']
|
408
|
+
'example_function'
|
402
409
|
"""
|
403
410
|
# Extracting function name and docstring details
|
404
411
|
func_name = func.__name__
|
@@ -432,8 +439,7 @@ class ParseUtil:
|
|
432
439
|
"description": param_description,
|
433
440
|
}
|
434
441
|
|
435
|
-
|
436
|
-
schema = {
|
442
|
+
return {
|
437
443
|
"type": "function",
|
438
444
|
"function": {
|
439
445
|
"name": func_name,
|
@@ -442,8 +448,6 @@ class ParseUtil:
|
|
442
448
|
},
|
443
449
|
}
|
444
450
|
|
445
|
-
return schema
|
446
|
-
|
447
451
|
|
448
452
|
class StringMatch:
|
449
453
|
|
@@ -457,16 +461,16 @@ class StringMatch:
|
|
457
461
|
and 1 is an exact match.
|
458
462
|
|
459
463
|
Args:
|
460
|
-
|
461
|
-
|
464
|
+
s: The first string to compare.
|
465
|
+
t: The second string to compare.
|
462
466
|
|
463
467
|
Returns:
|
464
|
-
|
465
|
-
|
468
|
+
A float representing the Jaro distance between the two strings, ranging from 0 to 1,
|
469
|
+
where 1 means the strings are identical.
|
466
470
|
|
467
471
|
Examples:
|
468
|
-
|
469
|
-
|
472
|
+
>>> jaro_distance("martha", "marhta")
|
473
|
+
0.9444444444444445
|
470
474
|
"""
|
471
475
|
s_len = len(s)
|
472
476
|
t_len = len(t)
|
@@ -521,18 +525,18 @@ class StringMatch:
|
|
521
525
|
person names, and is designed to improve the scoring of strings that have a common prefix.
|
522
526
|
|
523
527
|
Args:
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
+
s: The first string to compare.
|
529
|
+
t: The second string to compare.
|
530
|
+
scaling: The scaling factor for how much the score is adjusted upwards for having common prefixes.
|
531
|
+
The scaling factor should be less than 1, and a typical value is 0.1.
|
528
532
|
|
529
533
|
Returns:
|
530
|
-
|
531
|
-
|
534
|
+
A float representing the Jaro-Winkler similarity between the two strings, ranging from 0 to 1,
|
535
|
+
where 1 means the strings are identical.
|
532
536
|
|
533
537
|
Examples:
|
534
|
-
|
535
|
-
|
538
|
+
>>> jaro_winkler_similarity("dixon", "dicksonx")
|
539
|
+
0.8133333333333332
|
536
540
|
"""
|
537
541
|
jaro_sim = StringMatch.jaro_distance(s, t)
|
538
542
|
prefix_len = 0
|
@@ -555,15 +559,15 @@ class StringMatch:
|
|
555
559
|
required to change one word into the other. Each operation has an equal cost.
|
556
560
|
|
557
561
|
Args:
|
558
|
-
|
559
|
-
|
562
|
+
a: The first string to compare.
|
563
|
+
b: The second string to compare.
|
560
564
|
|
561
565
|
Returns:
|
562
|
-
|
566
|
+
An integer representing the Levenshtein distance between the two strings.
|
563
567
|
|
564
568
|
Examples:
|
565
|
-
|
566
|
-
|
569
|
+
>>> levenshtein_distance("kitten", "sitting")
|
570
|
+
3
|
567
571
|
"""
|
568
572
|
m, n = len(a), len(b)
|
569
573
|
# Initialize 2D array (m+1) x (n+1)
|
@@ -576,17 +580,13 @@ class StringMatch:
|
|
576
580
|
d[0][j] = j
|
577
581
|
|
578
582
|
# Compute the distance
|
579
|
-
for i in range(1, m + 1):
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
d[i - 1][j] + 1, # deletion
|
587
|
-
d[i][j - 1] + 1, # insertion
|
588
|
-
d[i - 1][j - 1] + cost,
|
589
|
-
) # substitution
|
583
|
+
for i, j in itertools.product(range(1, m + 1), range(1, n + 1)):
|
584
|
+
cost = 0 if a[i - 1] == b[j - 1] else 1
|
585
|
+
d[i][j] = min(
|
586
|
+
d[i - 1][j] + 1, # deletion
|
587
|
+
d[i][j - 1] + 1, # insertion
|
588
|
+
d[i - 1][j - 1] + cost,
|
589
|
+
) # substitution
|
590
590
|
return d[m][n]
|
591
591
|
|
592
592
|
@staticmethod
|
@@ -620,3 +620,20 @@ class StringMatch:
|
|
620
620
|
corrected_out[k] = v
|
621
621
|
|
622
622
|
return corrected_out
|
623
|
+
|
624
|
+
@staticmethod
|
625
|
+
def choose_most_similar(word, correct_words_list, score_func=None):
|
626
|
+
|
627
|
+
if score_func is None:
|
628
|
+
score_func = StringMatch.jaro_winkler_similarity
|
629
|
+
|
630
|
+
# Calculate Jaro-Winkler similarity scores for each potential match
|
631
|
+
scores = np.array(
|
632
|
+
[
|
633
|
+
score_func(convert.to_str(word), correct_word)
|
634
|
+
for correct_word in correct_words_list
|
635
|
+
]
|
636
|
+
)
|
637
|
+
# Find the index of the highest score
|
638
|
+
max_score_index = np.argmax(scores)
|
639
|
+
return correct_words_list[max_score_index]
|