kodexa 7.0.1a9987865527__py3-none-any.whl → 7.0.1a11915814268__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kodexa/dataclasses/__init__.py +464 -0
- kodexa/dataclasses/templates/llm_data_class.j2 +15 -0
- kodexa/model/model.py +72 -2
- kodexa/model/objects.py +409 -184
- kodexa/model/persistence.py +169 -6
- kodexa/platform/client.py +165 -13
- kodexa/platform/kodexa.py +23 -14
- kodexa/utils/__init__.py +178 -0
- {kodexa-7.0.1a9987865527.dist-info → kodexa-7.0.1a11915814268.dist-info}/METADATA +4 -4
- {kodexa-7.0.1a9987865527.dist-info → kodexa-7.0.1a11915814268.dist-info}/RECORD +12 -9
- {kodexa-7.0.1a9987865527.dist-info → kodexa-7.0.1a11915814268.dist-info}/LICENSE +0 -0
- {kodexa-7.0.1a9987865527.dist-info → kodexa-7.0.1a11915814268.dist-info}/WHEEL +0 -0
kodexa/utils/__init__.py
ADDED
@@ -0,0 +1,178 @@
|
|
1
|
+
"""
|
2
|
+
This module provides a set of functions to manipulate and convert taxonomy objects for use within a data model.
|
3
|
+
It includes functions to convert taxonomy names to various naming conventions such as property names, class names,
|
4
|
+
and group paths. Additionally, it offers utility functions for string manipulation, like converting snake case strings
|
5
|
+
to camel case or title case, making string names safe for use as attribute names, converting strings to hexadecimal
|
6
|
+
color codes, estimating the token count of a text, and recursively finding all non-abstract subclasses of a given class.
|
7
|
+
"""
|
8
|
+
|
9
|
+
import keyword
|
10
|
+
import logging
|
11
|
+
import re
|
12
|
+
from inspect import isabstract
|
13
|
+
|
14
|
+
from kodexa.model.objects import Taxon
|
15
|
+
|
16
|
+
logger = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
|
19
|
+
def taxon_to_property_name(taxon: Taxon):
|
20
|
+
# We need to convert the taxon name to a property name
|
21
|
+
# if the name of the taxon doesn't look like a UUID we will camel case
|
22
|
+
# it otherwise we will camelcase the taxon label
|
23
|
+
safe_property_name = to_snake(safe_name(taxon.label))
|
24
|
+
taxon.external_name = safe_property_name
|
25
|
+
return safe_property_name
|
26
|
+
|
27
|
+
|
28
|
+
def taxon_to_class_name(taxon: Taxon):
|
29
|
+
# We need to convert the taxon name to a class name
|
30
|
+
# if the name of the taxon doesn't look like a UUID we will camel case
|
31
|
+
# it otherwise we will camelcase the taxon label
|
32
|
+
safe_class_name = snake_to_camel(safe_name(taxon.label))
|
33
|
+
taxon.external_name = safe_class_name
|
34
|
+
return safe_class_name
|
35
|
+
|
36
|
+
|
37
|
+
def taxon_to_group_path(taxon: Taxon):
|
38
|
+
# We need to get the "group_name" from one of the taxons
|
39
|
+
# Which is the first part of the taxon path
|
40
|
+
return taxon.path.split('/')[0]
|
41
|
+
|
42
|
+
|
43
|
+
def snake_to_camel(snake_str):
|
44
|
+
components = snake_str.replace(" ", "_").split("_")
|
45
|
+
# We convert first letter of second word to uppercase
|
46
|
+
return components[0].strip().title() + "".join(
|
47
|
+
x.strip().title() for x in components[1:]
|
48
|
+
)
|
49
|
+
|
50
|
+
|
51
|
+
def to_snake(base_str):
|
52
|
+
components = base_str.replace(" ", "_").replace("-", "_").split("_")
|
53
|
+
|
54
|
+
# if the base string starts with a number than we add n_ to the start
|
55
|
+
if components[0].isdigit():
|
56
|
+
components[0] = "n_" + components[0]
|
57
|
+
|
58
|
+
# We convert first letter of second word to uppercase
|
59
|
+
return "_".join(x.strip().lower() for x in components)
|
60
|
+
|
61
|
+
|
62
|
+
def make_safe_attribute_name(name):
|
63
|
+
# Replace invalid characters (anything not a letter, digit, or underscore) with an underscore
|
64
|
+
safe_name = ''.join(char if char.isalnum() or char == '_' else '_' for char in name)
|
65
|
+
|
66
|
+
# If the name starts with a digit, prepend an underscore
|
67
|
+
if safe_name[0].isdigit():
|
68
|
+
safe_name = '_' + safe_name
|
69
|
+
|
70
|
+
# Append an underscore if the name is a Python keyword
|
71
|
+
if keyword.iskeyword(safe_name):
|
72
|
+
safe_name += '_'
|
73
|
+
|
74
|
+
return safe_name
|
75
|
+
|
76
|
+
|
77
|
+
def safe_name(string):
|
78
|
+
"""
|
79
|
+
Removes invalid characters from a string, replaces spaces with underscores, removes leading/trailing underscores and hyphens, and makes the string lowercase. If the resulting string
|
80
|
+
* starts with a number, it prefixes it with "n_".
|
81
|
+
|
82
|
+
:param string: The string to be transformed.
|
83
|
+
:return: The transformed string.
|
84
|
+
"""
|
85
|
+
# Remove invalid characters
|
86
|
+
|
87
|
+
# trim the string
|
88
|
+
string = string.strip()
|
89
|
+
|
90
|
+
string = re.sub(r"[^\w\s-]", "", string)
|
91
|
+
|
92
|
+
# Replace spaces with underscores
|
93
|
+
string = re.sub(r"\s+", "_", string)
|
94
|
+
|
95
|
+
# Remove leading/trailing underscores and hyphens
|
96
|
+
string = string.strip("_-")
|
97
|
+
|
98
|
+
# Make it lowercase
|
99
|
+
string = string.lower()
|
100
|
+
|
101
|
+
if string[0].isdigit():
|
102
|
+
# can't have things starting with a number
|
103
|
+
string = "n_" + string
|
104
|
+
|
105
|
+
# make sure we don't collide with a python keyword
|
106
|
+
return make_safe_attribute_name(string)
|
107
|
+
|
108
|
+
|
109
|
+
def snake_case_to_title_case(snake_case_string):
|
110
|
+
words = snake_case_string.split("_")
|
111
|
+
title_case_words = [word.capitalize() for word in words]
|
112
|
+
return " ".join(title_case_words)
|
113
|
+
|
114
|
+
|
115
|
+
def string_to_hex_color(string):
|
116
|
+
# Remove any leading or trailing whitespace from the string
|
117
|
+
string = string.strip()
|
118
|
+
|
119
|
+
# Calculate the hash value of the string
|
120
|
+
hash_value = hash(string)
|
121
|
+
|
122
|
+
# Convert the hash value to a 24-bit hexadecimal color code
|
123
|
+
hex_color = "#{:06x}".format(hash_value & 0xFFFFFF)
|
124
|
+
|
125
|
+
return hex_color
|
126
|
+
|
127
|
+
|
128
|
+
def get_is_square_bracket_first(string):
|
129
|
+
first_square_bracket = string.find("[")
|
130
|
+
first_bracket = string.find("{")
|
131
|
+
# Check if both "{" and "[" exist in the string
|
132
|
+
if first_bracket != -1 and first_square_bracket != -1:
|
133
|
+
# Compare their indices to determine which appears first
|
134
|
+
if first_bracket < first_square_bracket:
|
135
|
+
return False
|
136
|
+
else:
|
137
|
+
return True
|
138
|
+
# If only one of them exists, return the one that appears
|
139
|
+
elif first_bracket != -1:
|
140
|
+
return False
|
141
|
+
elif first_square_bracket != -1:
|
142
|
+
return True
|
143
|
+
else:
|
144
|
+
return None
|
145
|
+
|
146
|
+
|
147
|
+
def cosine_similarity(v1, v2):
|
148
|
+
"""Compute the cosine similarity between two vectors."""
|
149
|
+
dot_product = sum(a * b for a, b in zip(v1, v2))
|
150
|
+
norm_a = sum(a * a for a in v1) ** 0.5
|
151
|
+
norm_b = sum(b * b for b in v2) ** 0.5
|
152
|
+
return dot_product / (norm_a * norm_b)
|
153
|
+
|
154
|
+
|
155
|
+
def estimate_token_count(text, avg_token_length=1):
|
156
|
+
# Removing spaces to focus on characters that form tokens
|
157
|
+
char_count = len(text.replace(" ", ""))
|
158
|
+
# Estimating token count
|
159
|
+
estimated_tokens = char_count / avg_token_length
|
160
|
+
return round(estimated_tokens)
|
161
|
+
|
162
|
+
|
163
|
+
def get_all_concrete_subclasses(cls):
|
164
|
+
"""
|
165
|
+
Recursively find all non-abstract subclasses of a given class.
|
166
|
+
|
167
|
+
Parameters:
|
168
|
+
cls (class): The parent class to find subclasses for.
|
169
|
+
|
170
|
+
Returns:
|
171
|
+
list: A list of all non-abstract subclasses of cls.
|
172
|
+
"""
|
173
|
+
concrete_subclasses = []
|
174
|
+
for subclass in cls.__subclasses__():
|
175
|
+
if not isabstract(subclass):
|
176
|
+
concrete_subclasses.append(subclass)
|
177
|
+
concrete_subclasses.extend(get_all_concrete_subclasses(subclass))
|
178
|
+
return concrete_subclasses
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: kodexa
|
3
|
-
Version: 7.0.
|
3
|
+
Version: 7.0.1a11915814268
|
4
4
|
Summary: Python SDK for the Kodexa Platform
|
5
5
|
Author: Austin Redenbaugh
|
6
6
|
Author-email: austin@kodexa.com
|
@@ -18,7 +18,7 @@ Requires-Dist: appdirs (>=1.4.4,<2.0.0)
|
|
18
18
|
Requires-Dist: better-exceptions (>=0.3.3,<0.4.0)
|
19
19
|
Requires-Dist: certifi (>=2024.7.4,<2025.0.0)
|
20
20
|
Requires-Dist: chevron (>=0.14.0,<0.15.0)
|
21
|
-
Requires-Dist: deepdiff (>=
|
21
|
+
Requires-Dist: deepdiff (>=8.0.1,<9.0.0)
|
22
22
|
Requires-Dist: msgpack (>=1.0.6,<2.0.0)
|
23
23
|
Requires-Dist: ply (>=3.11,<4.0)
|
24
24
|
Requires-Dist: pydantic (>=2.5.3,<3.0.0)
|
@@ -28,7 +28,7 @@ Requires-Dist: python-dateutil (>=2.8.2,<3.0.0)
|
|
28
28
|
Requires-Dist: pyyaml (>=6.0,<7.0)
|
29
29
|
Requires-Dist: requests (>=2.28.1,<3.0.0)
|
30
30
|
Requires-Dist: semver (>=3.0.1,<4.0.0)
|
31
|
-
Requires-Dist: simpleeval (>=0.
|
31
|
+
Requires-Dist: simpleeval (>=1.0.0,<2.0.0)
|
32
32
|
Requires-Dist: urllib3 (>=2.0.0,<3.0.0)
|
33
33
|
Description-Content-Type: text/markdown
|
34
34
|
|
@@ -66,7 +66,7 @@ working with a Kodexa platform instance.
|
|
66
66
|
|
67
67
|
## Documentation & Examples
|
68
68
|
|
69
|
-
Documentation is available at the [Kodexa
|
69
|
+
Documentation is available at the [Kodexa Support Portal](https://support.kodexa.ai)
|
70
70
|
|
71
71
|
## Set-up
|
72
72
|
|
@@ -3,21 +3,23 @@ kodexa/assistant/__init__.py,sha256=nlXm_YnV_50hgn0TIT2Fkc2fQ-86OjmctY_j8My9nc4,
|
|
3
3
|
kodexa/assistant/assistant.py,sha256=5KFdbqFSLIZJyDRyZdpcfr448fT-CW4JhYu9A6B9DGY,14663
|
4
4
|
kodexa/connectors/__init__.py,sha256=WF6G_MUeU32TlKSUKkpNoNX7dq8iBPliFMep4E8BmZc,328
|
5
5
|
kodexa/connectors/connectors.py,sha256=FpUZDkSyHld2b9eYRuVOWzaFtuGoaRuPXXicJB7THbc,10413
|
6
|
+
kodexa/dataclasses/__init__.py,sha256=DKLWMALnUWYFDMraEc9We85GI_rvpp-z7Q1fKX-3jQI,18561
|
7
|
+
kodexa/dataclasses/templates/llm_data_class.j2,sha256=YWjStW136chV_59JM3AYis3i-0jdrqDvLXsISUW9zDU,660
|
6
8
|
kodexa/model/__init__.py,sha256=rtLXYJBxB-rnukhslN9rlqoB3--1H3253HyHGbD_Gc8,796
|
7
9
|
kodexa/model/base.py,sha256=CaZK8nMhT1LdCpt4aLhebJGcorjq9qRID1FjnXnP14M,521
|
8
10
|
kodexa/model/entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
11
|
kodexa/model/entities/check_response.py,sha256=eqBHxO6G2OAziL3p9bHGI-oiPkAG82H6Choc8wyvtM4,3949
|
10
12
|
kodexa/model/entities/product.py,sha256=ZDpHuBE_9FJ-klnkyBvTfPwYOqBkM1wraZMtHqNA8FQ,3526
|
11
13
|
kodexa/model/entities/product_subscription.py,sha256=UcmWR-qgLfdV7VCtJNwzgkanoS8nBSL6ngVuxQUK1M8,3810
|
12
|
-
kodexa/model/model.py,sha256=
|
13
|
-
kodexa/model/objects.py,sha256=
|
14
|
-
kodexa/model/persistence.py,sha256=
|
14
|
+
kodexa/model/model.py,sha256=wY5HnpsAnKlH_aDEHWNf-ZrhdrBg-DtqGFszjkdZtPU,118340
|
15
|
+
kodexa/model/objects.py,sha256=CE76KwQwIT6FdWJuac8aIumX_Ok6-9oq1JXz0K_gdwo,185117
|
16
|
+
kodexa/model/persistence.py,sha256=PTh9jmqYCDuWfiuCssLttFaYWiMA_fCiwjgsYDW4AhE,68281
|
15
17
|
kodexa/pipeline/__init__.py,sha256=sA7f5D6qkdMrpp2xTIeefnrUBI6xxEEWostvxfX_1Cs,236
|
16
18
|
kodexa/pipeline/pipeline.py,sha256=ZYpJAWcwV4YRK589DUhU0vXGQlkNSj4J2TsGbYqTLjo,25221
|
17
19
|
kodexa/platform/__init__.py,sha256=1O3oiWMg292NPL_NacKDnK1T3_R6cMorrPRue_9e-O4,216
|
18
|
-
kodexa/platform/client.py,sha256=
|
20
|
+
kodexa/platform/client.py,sha256=PWn-Xk3vH993Ne5YXDKqJT9rmj7okwVBivuQv9rt2UQ,226591
|
19
21
|
kodexa/platform/interaction.py,sha256=6zpcwXKNZstUGNS6m4JsoRXAqCZPJHWI-ZN3co8nnF0,1055
|
20
|
-
kodexa/platform/kodexa.py,sha256=
|
22
|
+
kodexa/platform/kodexa.py,sha256=3qRbEtLOw4yl7OV5ISZZ85N9gKeb5DA4XtHzieFIdYc,34796
|
21
23
|
kodexa/selectors/__init__.py,sha256=xA9-4vpyaAZWPSk3bh2kvDLkdv6XEmm7PjFbpziiTIk,100
|
22
24
|
kodexa/selectors/ast.py,sha256=gG-1st841IntgBE5V7p3Cq9azaym2jV5lB_AIywQTCI,13269
|
23
25
|
kodexa/selectors/core.py,sha256=kkt02DN20gXeaDGoGubPPeeTV7rCr4sxTyELrI0l1YU,3691
|
@@ -39,7 +41,8 @@ kodexa/testing/test_components.py,sha256=g5lP-GY0nTHuH5cIEw45vIejEeBaWkPKQGHL36j
|
|
39
41
|
kodexa/testing/test_utils.py,sha256=DrLCkHxdb6AbZ-X3WmTMbQmnVIm55VEBL8MjtUK9POs,14021
|
40
42
|
kodexa/training/__init__.py,sha256=xs2L62YpRkIRfslQwtQZ5Yxjhm7sLzX2TrVX6EuBnZQ,52
|
41
43
|
kodexa/training/train_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
42
|
-
kodexa
|
43
|
-
kodexa-7.0.
|
44
|
-
kodexa-7.0.
|
45
|
-
kodexa-7.0.
|
44
|
+
kodexa/utils/__init__.py,sha256=Pnim1o9_db5YEnNvDTxpM7HG-qTlL6n8JwFwOafU9wo,5928
|
45
|
+
kodexa-7.0.1a11915814268.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
|
46
|
+
kodexa-7.0.1a11915814268.dist-info/METADATA,sha256=84lOTb7ed43y_Zb9Mo1WRZOR16ygEad2lj5MY1hdk_w,3529
|
47
|
+
kodexa-7.0.1a11915814268.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
|
48
|
+
kodexa-7.0.1a11915814268.dist-info/RECORD,,
|
File without changes
|
File without changes
|