kensho-kfinance 3.2.14__py3-none-any.whl → 3.2.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kensho-kfinance might be problematic. Click here for more details.

@@ -1,14 +1,17 @@
1
+ from difflib import SequenceMatcher
1
2
  from textwrap import dedent
2
3
  from typing import Literal, Type
3
4
 
4
- from pydantic import BaseModel, Field
5
+ from pydantic import BaseModel, Field, model_validator
5
6
 
6
7
  from kfinance.client.batch_request_handling import Task, process_tasks_in_thread_pool_executor
7
8
  from kfinance.client.models.date_and_period_models import PeriodType
8
9
  from kfinance.client.permission_models import Permission
9
10
  from kfinance.domains.line_items.line_item_models import (
10
11
  LINE_ITEM_NAMES_AND_ALIASES,
12
+ LINE_ITEM_TO_DESCRIPTIONS_MAP,
11
13
  LineItemResponse,
14
+ LineItemScore,
12
15
  )
13
16
  from kfinance.integrations.tool_calling.tool_calling_models import (
14
17
  KfinanceTool,
@@ -18,6 +21,75 @@ from kfinance.integrations.tool_calling.tool_calling_models import (
18
21
  )
19
22
 
20
23
 
24
+ def _find_similar_line_items(
25
+ invalid_item: str, descriptors: dict[str, str], max_suggestions: int = 8
26
+ ) -> list[LineItemScore]:
27
+ """Find similar line items using keyword matching and string similarity.
28
+
29
+ Args:
30
+ invalid_item: The invalid line item provided by the user
31
+ descriptors: Dictionary mapping line item names to descriptions
32
+ max_suggestions: Maximum number of suggestions to return
33
+
34
+ Returns:
35
+ List of LineItemScore objects for the best matches
36
+ """
37
+ if not descriptors:
38
+ return []
39
+
40
+ invalid_lower = invalid_item.lower()
41
+ scores: list[LineItemScore] = []
42
+
43
+ for line_item, description in descriptors.items():
44
+ # Calculate similarity scores
45
+ name_similarity = SequenceMatcher(None, invalid_lower, line_item.lower()).ratio()
46
+
47
+ # Check for keyword matches in the line item name
48
+ invalid_words = set(invalid_lower.replace("_", " ").split())
49
+ item_words = set(line_item.lower().replace("_", " ").split())
50
+ keyword_match_score = len(invalid_words.intersection(item_words)) / max(
51
+ len(invalid_words), 1
52
+ )
53
+
54
+ # Check for keyword matches in description
55
+ description_words = set(description.lower().split())
56
+ description_match_score = len(invalid_words.intersection(description_words)) / max(
57
+ len(invalid_words), 1
58
+ )
59
+
60
+ # Combined score (weighted)
61
+ total_score = (
62
+ name_similarity * 0.5 # Direct name similarity
63
+ + keyword_match_score * 0.3 # Keyword matches in name
64
+ + description_match_score * 0.2 # Keyword matches in description
65
+ )
66
+
67
+ scores.append(LineItemScore(name=line_item, description=description, score=total_score))
68
+
69
+ # Sort by score (descending) and return top matches
70
+ scores.sort(reverse=True, key=lambda x: x.score)
71
+ return [item for item in scores[:max_suggestions] if item.score > 0.1]
72
+
73
+
74
+ def _smart_line_item_validator(v: str) -> str:
75
+ """Custom validator that provides intelligent suggestions for invalid line items."""
76
+ if v not in LINE_ITEM_NAMES_AND_ALIASES:
77
+ # Find similar items using pre-computed descriptors
78
+ suggestions = _find_similar_line_items(v, LINE_ITEM_TO_DESCRIPTIONS_MAP)
79
+
80
+ if suggestions:
81
+ suggestion_text = "\n\nDid you mean one of these?\n"
82
+ for item in suggestions:
83
+ suggestion_text += f" • '{item.name}': {item.description}\n"
84
+
85
+ error_msg = f"Invalid line_item '{v}'.{suggestion_text}"
86
+ else:
87
+ error_msg = f"Invalid line_item '{v}'. Please refer to the tool documentation for valid options."
88
+
89
+ raise ValueError(error_msg)
90
+ return v
91
+
92
+
21
93
  class GetFinancialLineItemFromIdentifiersArgs(ToolArgsWithIdentifiers):
22
94
  # Note: mypy will not enforce this literal because of the type: ignore.
23
95
  # But pydantic still uses the literal to check for allowed values and only includes
@@ -31,6 +103,16 @@ class GetFinancialLineItemFromIdentifiersArgs(ToolArgsWithIdentifiers):
31
103
  start_quarter: ValidQuarter | None = Field(default=None, description="Starting quarter")
32
104
  end_quarter: ValidQuarter | None = Field(default=None, description="Ending quarter")
33
105
 
106
+ @model_validator(mode="before")
107
+ @classmethod
108
+ def validate_line_item_with_suggestions(cls, values: dict) -> dict:
109
+ """Custom validator that provides intelligent suggestions for invalid line items."""
110
+ if isinstance(values, dict) and "line_item" in values:
111
+ line_item = values["line_item"]
112
+ # Use the helper function to validate and provide suggestions
113
+ _smart_line_item_validator(line_item)
114
+ return values
115
+
34
116
 
35
117
  class GetFinancialLineItemFromIdentifiersResp(ToolRespWithErrors):
36
118
  results: dict[str, LineItemResponse]
@@ -6,11 +6,12 @@ from requests_mock import Mocker
6
6
  from kfinance.client.kfinance import Client
7
7
  from kfinance.conftest import SPGI_COMPANY_ID
8
8
  from kfinance.domains.companies.company_models import COMPANY_ID_PREFIX
9
- from kfinance.domains.line_items.line_item_models import LineItemResponse
9
+ from kfinance.domains.line_items.line_item_models import LineItemResponse, LineItemScore
10
10
  from kfinance.domains.line_items.line_item_tools import (
11
11
  GetFinancialLineItemFromIdentifiers,
12
12
  GetFinancialLineItemFromIdentifiersArgs,
13
13
  GetFinancialLineItemFromIdentifiersResp,
14
+ _find_similar_line_items,
14
15
  )
15
16
 
16
17
 
@@ -131,3 +132,140 @@ class TestGetFinancialLineItemFromCompanyIds:
131
132
  assert "revenue" in line_items
132
133
  # normal_revenue is an alias for revenue
133
134
  assert "normal_revenue" in line_items
135
+
136
+
137
+ class TestFindSimilarLineItems:
138
+ """Tests for the _find_similar_line_items function."""
139
+
140
+ # Preset test descriptors to ensure consistent results
141
+ TEST_DESCRIPTORS = {
142
+ "revenue": "Revenue recognized from primary business activities (excludes non-operating income).",
143
+ "total_revenue": "Sum of operating and non-operating revenue streams for the period.",
144
+ "cost_of_goods_sold": "Direct costs attributable to producing goods sold during the period.",
145
+ "cogs": "Direct costs attributable to producing goods sold during the period.",
146
+ "gross_profit": "Revenue minus cost_of_goods_sold or cost_of_revenue for the reported period.",
147
+ "operating_income": "Operating profit after subtracting operating expenses from operating revenue.",
148
+ "net_income": "Bottom-line profit attributable to common shareholders.",
149
+ "research_and_development_expense": "Expenses incurred for research and development activities.",
150
+ "r_and_d_expense": "Expenses incurred for research and development activities.",
151
+ "depreciation_and_amortization": "Combined depreciation and amortization expense for the period.",
152
+ "ebitda": "Earnings before interest, taxes, depreciation, and amortization.",
153
+ }
154
+
155
+ def test_exact_keyword_match(self):
156
+ """
157
+ GIVEN a preset descriptors dictionary
158
+ WHEN searching for 'revenues' (similar to 'revenue')
159
+ THEN 'revenue' should be in the top suggestions
160
+ """
161
+ results = _find_similar_line_items("revenues", self.TEST_DESCRIPTORS, max_suggestions=5)
162
+
163
+ assert len(results) > 0
164
+ assert isinstance(results[0], LineItemScore)
165
+ # Check that revenue or total_revenue is in top results
166
+ result_names = [item.name for item in results]
167
+ assert "revenue" in result_names or "total_revenue" in result_names
168
+
169
+ def test_acronym_matching(self):
170
+ """
171
+ GIVEN a preset descriptors dictionary
172
+ WHEN searching for 'R&D' (abbreviation)
173
+ THEN research and development related items should appear
174
+ """
175
+ results = _find_similar_line_items("R&D", self.TEST_DESCRIPTORS, max_suggestions=5)
176
+
177
+ result_names = [item.name for item in results]
178
+ # Should find r_and_d_expense or research_and_development_expense
179
+ assert any("research" in name or "r_and_d" in name for name in result_names)
180
+
181
+ def test_multiple_word_matching(self):
182
+ """
183
+ GIVEN a preset descriptors dictionary
184
+ WHEN searching for 'cost goods'
185
+ THEN 'cost_of_goods_sold' should be suggested
186
+ """
187
+ results = _find_similar_line_items("cost goods", self.TEST_DESCRIPTORS, max_suggestions=5)
188
+
189
+ result_names = [item.name for item in results]
190
+ assert "cost_of_goods_sold" in result_names or "cogs" in result_names
191
+
192
+ def test_description_matching(self):
193
+ """
194
+ GIVEN a preset descriptors dictionary
195
+ WHEN searching for 'profit'
196
+ THEN items with 'profit' in description should appear
197
+ """
198
+ results = _find_similar_line_items("profit", self.TEST_DESCRIPTORS, max_suggestions=5)
199
+
200
+ assert len(results) > 0
201
+ # Should find items like gross_profit, operating_income (operating profit), or net_income
202
+ result_names = [item.name for item in results]
203
+ assert any("profit" in name or "income" in name for name in result_names)
204
+
205
+ def test_empty_descriptors(self):
206
+ """
207
+ GIVEN an empty descriptors dictionary
208
+ WHEN searching for any term
209
+ THEN should return empty list
210
+ """
211
+ results = _find_similar_line_items("revenue", {}, max_suggestions=5)
212
+ assert results == []
213
+
214
+ def test_no_matches(self):
215
+ """
216
+ GIVEN a preset descriptors dictionary
217
+ WHEN searching for completely unrelated term
218
+ THEN should return empty list or very low scores filtered out
219
+ """
220
+ results = _find_similar_line_items("xyz123abc", self.TEST_DESCRIPTORS, max_suggestions=5)
221
+ # Should return empty or very few results since threshold is > 0.1
222
+ assert len(results) <= 2 # May have some weak matches but should be minimal
223
+
224
+ def test_max_suggestions_respected(self):
225
+ """
226
+ GIVEN a preset descriptors dictionary
227
+ WHEN searching with max_suggestions=3
228
+ THEN should return at most 3 results
229
+ """
230
+ results = _find_similar_line_items("income", self.TEST_DESCRIPTORS, max_suggestions=3)
231
+ assert len(results) <= 3
232
+
233
+ def test_score_ordering(self):
234
+ """
235
+ GIVEN a preset descriptors dictionary
236
+ WHEN searching for a term
237
+ THEN results should be ordered by descending score
238
+ """
239
+ results = _find_similar_line_items("revenue", self.TEST_DESCRIPTORS, max_suggestions=5)
240
+
241
+ if len(results) > 1:
242
+ for i in range(len(results) - 1):
243
+ assert results[i].score >= results[i + 1].score
244
+
245
+ def test_score_threshold(self):
246
+ """
247
+ GIVEN a preset descriptors dictionary
248
+ WHEN searching for a term
249
+ THEN all returned results should have score > 0.1
250
+ """
251
+ results = _find_similar_line_items("revenue", self.TEST_DESCRIPTORS, max_suggestions=10)
252
+
253
+ for item in results:
254
+ assert item.score > 0.1
255
+
256
+ def test_lineitemscore_structure(self):
257
+ """
258
+ GIVEN a preset descriptors dictionary
259
+ WHEN searching for a term
260
+ THEN each result should be a LineItemScore with name, description, and score
261
+ """
262
+ results = _find_similar_line_items("revenue", self.TEST_DESCRIPTORS, max_suggestions=5)
263
+
264
+ assert len(results) > 0
265
+ for item in results:
266
+ assert isinstance(item, LineItemScore)
267
+ assert isinstance(item.name, str)
268
+ assert isinstance(item.description, str)
269
+ assert isinstance(item.score, float)
270
+ assert item.name in self.TEST_DESCRIPTORS
271
+ assert item.description == self.TEST_DESCRIPTORS[item.name]
kfinance/version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '3.2.14'
32
- __version_tuple__ = version_tuple = (3, 2, 14)
31
+ __version__ = version = '3.2.15'
32
+ __version_tuple__ = version_tuple = (3, 2, 15)
33
33
 
34
34
  __commit_id__ = commit_id = None