kensho-kfinance 3.2.14__py3-none-any.whl → 3.2.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kensho-kfinance might be problematic. Click here for more details.
- {kensho_kfinance-3.2.14.dist-info → kensho_kfinance-3.2.16.dist-info}/METADATA +1 -1
- {kensho_kfinance-3.2.14.dist-info → kensho_kfinance-3.2.16.dist-info}/RECORD +14 -14
- kfinance/CHANGELOG.md +7 -0
- kfinance/client/models/decimal_with_unit.py +14 -2
- kfinance/client/models/tests/test_decimal_with_unit.py +9 -0
- kfinance/domains/competitors/competitor_tools.py +3 -1
- kfinance/domains/line_items/line_item_models.py +329 -12
- kfinance/domains/line_items/line_item_tools.py +83 -1
- kfinance/domains/line_items/tests/test_line_item_tools.py +139 -1
- kfinance/version.py +2 -2
- {kensho_kfinance-3.2.14.dist-info → kensho_kfinance-3.2.16.dist-info}/WHEEL +0 -0
- {kensho_kfinance-3.2.14.dist-info → kensho_kfinance-3.2.16.dist-info}/licenses/AUTHORS.md +0 -0
- {kensho_kfinance-3.2.14.dist-info → kensho_kfinance-3.2.16.dist-info}/licenses/LICENSE +0 -0
- {kensho_kfinance-3.2.14.dist-info → kensho_kfinance-3.2.16.dist-info}/top_level.txt +0 -0
|
@@ -1,14 +1,17 @@
|
|
|
1
|
+
from difflib import SequenceMatcher
|
|
1
2
|
from textwrap import dedent
|
|
2
3
|
from typing import Literal, Type
|
|
3
4
|
|
|
4
|
-
from pydantic import BaseModel, Field
|
|
5
|
+
from pydantic import BaseModel, Field, model_validator
|
|
5
6
|
|
|
6
7
|
from kfinance.client.batch_request_handling import Task, process_tasks_in_thread_pool_executor
|
|
7
8
|
from kfinance.client.models.date_and_period_models import PeriodType
|
|
8
9
|
from kfinance.client.permission_models import Permission
|
|
9
10
|
from kfinance.domains.line_items.line_item_models import (
|
|
10
11
|
LINE_ITEM_NAMES_AND_ALIASES,
|
|
12
|
+
LINE_ITEM_TO_DESCRIPTIONS_MAP,
|
|
11
13
|
LineItemResponse,
|
|
14
|
+
LineItemScore,
|
|
12
15
|
)
|
|
13
16
|
from kfinance.integrations.tool_calling.tool_calling_models import (
|
|
14
17
|
KfinanceTool,
|
|
@@ -18,6 +21,75 @@ from kfinance.integrations.tool_calling.tool_calling_models import (
|
|
|
18
21
|
)
|
|
19
22
|
|
|
20
23
|
|
|
24
|
+
def _find_similar_line_items(
|
|
25
|
+
invalid_item: str, descriptors: dict[str, str], max_suggestions: int = 8
|
|
26
|
+
) -> list[LineItemScore]:
|
|
27
|
+
"""Find similar line items using keyword matching and string similarity.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
invalid_item: The invalid line item provided by the user
|
|
31
|
+
descriptors: Dictionary mapping line item names to descriptions
|
|
32
|
+
max_suggestions: Maximum number of suggestions to return
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
List of LineItemScore objects for the best matches
|
|
36
|
+
"""
|
|
37
|
+
if not descriptors:
|
|
38
|
+
return []
|
|
39
|
+
|
|
40
|
+
invalid_lower = invalid_item.lower()
|
|
41
|
+
scores: list[LineItemScore] = []
|
|
42
|
+
|
|
43
|
+
for line_item, description in descriptors.items():
|
|
44
|
+
# Calculate similarity scores
|
|
45
|
+
name_similarity = SequenceMatcher(None, invalid_lower, line_item.lower()).ratio()
|
|
46
|
+
|
|
47
|
+
# Check for keyword matches in the line item name
|
|
48
|
+
invalid_words = set(invalid_lower.replace("_", " ").split())
|
|
49
|
+
item_words = set(line_item.lower().replace("_", " ").split())
|
|
50
|
+
keyword_match_score = len(invalid_words.intersection(item_words)) / max(
|
|
51
|
+
len(invalid_words), 1
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# Check for keyword matches in description
|
|
55
|
+
description_words = set(description.lower().split())
|
|
56
|
+
description_match_score = len(invalid_words.intersection(description_words)) / max(
|
|
57
|
+
len(invalid_words), 1
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# Combined score (weighted)
|
|
61
|
+
total_score = (
|
|
62
|
+
name_similarity * 0.5 # Direct name similarity
|
|
63
|
+
+ keyword_match_score * 0.3 # Keyword matches in name
|
|
64
|
+
+ description_match_score * 0.2 # Keyword matches in description
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
scores.append(LineItemScore(name=line_item, description=description, score=total_score))
|
|
68
|
+
|
|
69
|
+
# Sort by score (descending) and return top matches
|
|
70
|
+
scores.sort(reverse=True, key=lambda x: x.score)
|
|
71
|
+
return [item for item in scores[:max_suggestions] if item.score > 0.1]
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _smart_line_item_validator(v: str) -> str:
|
|
75
|
+
"""Custom validator that provides intelligent suggestions for invalid line items."""
|
|
76
|
+
if v not in LINE_ITEM_NAMES_AND_ALIASES:
|
|
77
|
+
# Find similar items using pre-computed descriptors
|
|
78
|
+
suggestions = _find_similar_line_items(v, LINE_ITEM_TO_DESCRIPTIONS_MAP)
|
|
79
|
+
|
|
80
|
+
if suggestions:
|
|
81
|
+
suggestion_text = "\n\nDid you mean one of these?\n"
|
|
82
|
+
for item in suggestions:
|
|
83
|
+
suggestion_text += f" • '{item.name}': {item.description}\n"
|
|
84
|
+
|
|
85
|
+
error_msg = f"Invalid line_item '{v}'.{suggestion_text}"
|
|
86
|
+
else:
|
|
87
|
+
error_msg = f"Invalid line_item '{v}'. Please refer to the tool documentation for valid options."
|
|
88
|
+
|
|
89
|
+
raise ValueError(error_msg)
|
|
90
|
+
return v
|
|
91
|
+
|
|
92
|
+
|
|
21
93
|
class GetFinancialLineItemFromIdentifiersArgs(ToolArgsWithIdentifiers):
|
|
22
94
|
# Note: mypy will not enforce this literal because of the type: ignore.
|
|
23
95
|
# But pydantic still uses the literal to check for allowed values and only includes
|
|
@@ -31,6 +103,16 @@ class GetFinancialLineItemFromIdentifiersArgs(ToolArgsWithIdentifiers):
|
|
|
31
103
|
start_quarter: ValidQuarter | None = Field(default=None, description="Starting quarter")
|
|
32
104
|
end_quarter: ValidQuarter | None = Field(default=None, description="Ending quarter")
|
|
33
105
|
|
|
106
|
+
@model_validator(mode="before")
|
|
107
|
+
@classmethod
|
|
108
|
+
def validate_line_item_with_suggestions(cls, values: dict) -> dict:
|
|
109
|
+
"""Custom validator that provides intelligent suggestions for invalid line items."""
|
|
110
|
+
if isinstance(values, dict) and "line_item" in values:
|
|
111
|
+
line_item = values["line_item"]
|
|
112
|
+
# Use the helper function to validate and provide suggestions
|
|
113
|
+
_smart_line_item_validator(line_item)
|
|
114
|
+
return values
|
|
115
|
+
|
|
34
116
|
|
|
35
117
|
class GetFinancialLineItemFromIdentifiersResp(ToolRespWithErrors):
|
|
36
118
|
results: dict[str, LineItemResponse]
|
|
@@ -6,11 +6,12 @@ from requests_mock import Mocker
|
|
|
6
6
|
from kfinance.client.kfinance import Client
|
|
7
7
|
from kfinance.conftest import SPGI_COMPANY_ID
|
|
8
8
|
from kfinance.domains.companies.company_models import COMPANY_ID_PREFIX
|
|
9
|
-
from kfinance.domains.line_items.line_item_models import LineItemResponse
|
|
9
|
+
from kfinance.domains.line_items.line_item_models import LineItemResponse, LineItemScore
|
|
10
10
|
from kfinance.domains.line_items.line_item_tools import (
|
|
11
11
|
GetFinancialLineItemFromIdentifiers,
|
|
12
12
|
GetFinancialLineItemFromIdentifiersArgs,
|
|
13
13
|
GetFinancialLineItemFromIdentifiersResp,
|
|
14
|
+
_find_similar_line_items,
|
|
14
15
|
)
|
|
15
16
|
|
|
16
17
|
|
|
@@ -131,3 +132,140 @@ class TestGetFinancialLineItemFromCompanyIds:
|
|
|
131
132
|
assert "revenue" in line_items
|
|
132
133
|
# normal_revenue is an alias for revenue
|
|
133
134
|
assert "normal_revenue" in line_items
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class TestFindSimilarLineItems:
|
|
138
|
+
"""Tests for the _find_similar_line_items function."""
|
|
139
|
+
|
|
140
|
+
# Preset test descriptors to ensure consistent results
|
|
141
|
+
TEST_DESCRIPTORS = {
|
|
142
|
+
"revenue": "Revenue recognized from primary business activities (excludes non-operating income).",
|
|
143
|
+
"total_revenue": "Sum of operating and non-operating revenue streams for the period.",
|
|
144
|
+
"cost_of_goods_sold": "Direct costs attributable to producing goods sold during the period.",
|
|
145
|
+
"cogs": "Direct costs attributable to producing goods sold during the period.",
|
|
146
|
+
"gross_profit": "Revenue minus cost_of_goods_sold or cost_of_revenue for the reported period.",
|
|
147
|
+
"operating_income": "Operating profit after subtracting operating expenses from operating revenue.",
|
|
148
|
+
"net_income": "Bottom-line profit attributable to common shareholders.",
|
|
149
|
+
"research_and_development_expense": "Expenses incurred for research and development activities.",
|
|
150
|
+
"r_and_d_expense": "Expenses incurred for research and development activities.",
|
|
151
|
+
"depreciation_and_amortization": "Combined depreciation and amortization expense for the period.",
|
|
152
|
+
"ebitda": "Earnings before interest, taxes, depreciation, and amortization.",
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
def test_exact_keyword_match(self):
|
|
156
|
+
"""
|
|
157
|
+
GIVEN a preset descriptors dictionary
|
|
158
|
+
WHEN searching for 'revenues' (similar to 'revenue')
|
|
159
|
+
THEN 'revenue' should be in the top suggestions
|
|
160
|
+
"""
|
|
161
|
+
results = _find_similar_line_items("revenues", self.TEST_DESCRIPTORS, max_suggestions=5)
|
|
162
|
+
|
|
163
|
+
assert len(results) > 0
|
|
164
|
+
assert isinstance(results[0], LineItemScore)
|
|
165
|
+
# Check that revenue or total_revenue is in top results
|
|
166
|
+
result_names = [item.name for item in results]
|
|
167
|
+
assert "revenue" in result_names or "total_revenue" in result_names
|
|
168
|
+
|
|
169
|
+
def test_acronym_matching(self):
|
|
170
|
+
"""
|
|
171
|
+
GIVEN a preset descriptors dictionary
|
|
172
|
+
WHEN searching for 'R&D' (abbreviation)
|
|
173
|
+
THEN research and development related items should appear
|
|
174
|
+
"""
|
|
175
|
+
results = _find_similar_line_items("R&D", self.TEST_DESCRIPTORS, max_suggestions=5)
|
|
176
|
+
|
|
177
|
+
result_names = [item.name for item in results]
|
|
178
|
+
# Should find r_and_d_expense or research_and_development_expense
|
|
179
|
+
assert any("research" in name or "r_and_d" in name for name in result_names)
|
|
180
|
+
|
|
181
|
+
def test_multiple_word_matching(self):
|
|
182
|
+
"""
|
|
183
|
+
GIVEN a preset descriptors dictionary
|
|
184
|
+
WHEN searching for 'cost goods'
|
|
185
|
+
THEN 'cost_of_goods_sold' should be suggested
|
|
186
|
+
"""
|
|
187
|
+
results = _find_similar_line_items("cost goods", self.TEST_DESCRIPTORS, max_suggestions=5)
|
|
188
|
+
|
|
189
|
+
result_names = [item.name for item in results]
|
|
190
|
+
assert "cost_of_goods_sold" in result_names or "cogs" in result_names
|
|
191
|
+
|
|
192
|
+
def test_description_matching(self):
|
|
193
|
+
"""
|
|
194
|
+
GIVEN a preset descriptors dictionary
|
|
195
|
+
WHEN searching for 'profit'
|
|
196
|
+
THEN items with 'profit' in description should appear
|
|
197
|
+
"""
|
|
198
|
+
results = _find_similar_line_items("profit", self.TEST_DESCRIPTORS, max_suggestions=5)
|
|
199
|
+
|
|
200
|
+
assert len(results) > 0
|
|
201
|
+
# Should find items like gross_profit, operating_income (operating profit), or net_income
|
|
202
|
+
result_names = [item.name for item in results]
|
|
203
|
+
assert any("profit" in name or "income" in name for name in result_names)
|
|
204
|
+
|
|
205
|
+
def test_empty_descriptors(self):
|
|
206
|
+
"""
|
|
207
|
+
GIVEN an empty descriptors dictionary
|
|
208
|
+
WHEN searching for any term
|
|
209
|
+
THEN should return empty list
|
|
210
|
+
"""
|
|
211
|
+
results = _find_similar_line_items("revenue", {}, max_suggestions=5)
|
|
212
|
+
assert results == []
|
|
213
|
+
|
|
214
|
+
def test_no_matches(self):
|
|
215
|
+
"""
|
|
216
|
+
GIVEN a preset descriptors dictionary
|
|
217
|
+
WHEN searching for completely unrelated term
|
|
218
|
+
THEN should return empty list or very low scores filtered out
|
|
219
|
+
"""
|
|
220
|
+
results = _find_similar_line_items("xyz123abc", self.TEST_DESCRIPTORS, max_suggestions=5)
|
|
221
|
+
# Should return empty or very few results since threshold is > 0.1
|
|
222
|
+
assert len(results) <= 2 # May have some weak matches but should be minimal
|
|
223
|
+
|
|
224
|
+
def test_max_suggestions_respected(self):
|
|
225
|
+
"""
|
|
226
|
+
GIVEN a preset descriptors dictionary
|
|
227
|
+
WHEN searching with max_suggestions=3
|
|
228
|
+
THEN should return at most 3 results
|
|
229
|
+
"""
|
|
230
|
+
results = _find_similar_line_items("income", self.TEST_DESCRIPTORS, max_suggestions=3)
|
|
231
|
+
assert len(results) <= 3
|
|
232
|
+
|
|
233
|
+
def test_score_ordering(self):
|
|
234
|
+
"""
|
|
235
|
+
GIVEN a preset descriptors dictionary
|
|
236
|
+
WHEN searching for a term
|
|
237
|
+
THEN results should be ordered by descending score
|
|
238
|
+
"""
|
|
239
|
+
results = _find_similar_line_items("revenue", self.TEST_DESCRIPTORS, max_suggestions=5)
|
|
240
|
+
|
|
241
|
+
if len(results) > 1:
|
|
242
|
+
for i in range(len(results) - 1):
|
|
243
|
+
assert results[i].score >= results[i + 1].score
|
|
244
|
+
|
|
245
|
+
def test_score_threshold(self):
|
|
246
|
+
"""
|
|
247
|
+
GIVEN a preset descriptors dictionary
|
|
248
|
+
WHEN searching for a term
|
|
249
|
+
THEN all returned results should have score > 0.1
|
|
250
|
+
"""
|
|
251
|
+
results = _find_similar_line_items("revenue", self.TEST_DESCRIPTORS, max_suggestions=10)
|
|
252
|
+
|
|
253
|
+
for item in results:
|
|
254
|
+
assert item.score > 0.1
|
|
255
|
+
|
|
256
|
+
def test_lineitemscore_structure(self):
|
|
257
|
+
"""
|
|
258
|
+
GIVEN a preset descriptors dictionary
|
|
259
|
+
WHEN searching for a term
|
|
260
|
+
THEN each result should be a LineItemScore with name, description, and score
|
|
261
|
+
"""
|
|
262
|
+
results = _find_similar_line_items("revenue", self.TEST_DESCRIPTORS, max_suggestions=5)
|
|
263
|
+
|
|
264
|
+
assert len(results) > 0
|
|
265
|
+
for item in results:
|
|
266
|
+
assert isinstance(item, LineItemScore)
|
|
267
|
+
assert isinstance(item.name, str)
|
|
268
|
+
assert isinstance(item.description, str)
|
|
269
|
+
assert isinstance(item.score, float)
|
|
270
|
+
assert item.name in self.TEST_DESCRIPTORS
|
|
271
|
+
assert item.description == self.TEST_DESCRIPTORS[item.name]
|
kfinance/version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '3.2.
|
|
32
|
-
__version_tuple__ = version_tuple = (3, 2,
|
|
31
|
+
__version__ = version = '3.2.16'
|
|
32
|
+
__version_tuple__ = version_tuple = (3, 2, 16)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|