kodexa 7.0.1a9987865527__py3-none-any.whl → 7.0.1a11915814268__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,178 @@
1
+ """
2
+ This module provides a set of functions to manipulate and convert taxonomy objects for use within a data model.
3
+ It includes functions to convert taxonomy names to various naming conventions such as property names, class names,
4
+ and group paths. Additionally, it offers utility functions for string manipulation, like converting snake case strings
5
+ to camel case or title case, making string names safe for use as attribute names, converting strings to hexadecimal
6
+ color codes, estimating the token count of a text, and recursively finding all non-abstract subclasses of a given class.
7
+ """
8
+
9
+ import keyword
10
+ import logging
11
+ import re
12
+ from inspect import isabstract
13
+
14
+ from kodexa.model.objects import Taxon
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ def taxon_to_property_name(taxon: Taxon):
20
+ # We need to convert the taxon name to a property name
21
+ # if the name of the taxon doesn't look like a UUID we will camel case
22
+ # it otherwise we will camelcase the taxon label
23
+ safe_property_name = to_snake(safe_name(taxon.label))
24
+ taxon.external_name = safe_property_name
25
+ return safe_property_name
26
+
27
+
28
+ def taxon_to_class_name(taxon: Taxon):
29
+ # We need to convert the taxon name to a class name
30
+ # if the name of the taxon doesn't look like a UUID we will camel case
31
+ # it otherwise we will camelcase the taxon label
32
+ safe_class_name = snake_to_camel(safe_name(taxon.label))
33
+ taxon.external_name = safe_class_name
34
+ return safe_class_name
35
+
36
+
37
+ def taxon_to_group_path(taxon: Taxon):
38
+ # We need to get the "group_name" from one of the taxons
39
+ # Which is the first part of the taxon path
40
+ return taxon.path.split('/')[0]
41
+
42
+
43
+ def snake_to_camel(snake_str):
44
+ components = snake_str.replace(" ", "_").split("_")
45
+ # We convert first letter of second word to uppercase
46
+ return components[0].strip().title() + "".join(
47
+ x.strip().title() for x in components[1:]
48
+ )
49
+
50
+
51
+ def to_snake(base_str):
52
+ components = base_str.replace(" ", "_").replace("-", "_").split("_")
53
+
54
+ # if the base string starts with a number than we add n_ to the start
55
+ if components[0].isdigit():
56
+ components[0] = "n_" + components[0]
57
+
58
+ # We convert first letter of second word to uppercase
59
+ return "_".join(x.strip().lower() for x in components)
60
+
61
+
62
+ def make_safe_attribute_name(name):
63
+ # Replace invalid characters (anything not a letter, digit, or underscore) with an underscore
64
+ safe_name = ''.join(char if char.isalnum() or char == '_' else '_' for char in name)
65
+
66
+ # If the name starts with a digit, prepend an underscore
67
+ if safe_name[0].isdigit():
68
+ safe_name = '_' + safe_name
69
+
70
+ # Append an underscore if the name is a Python keyword
71
+ if keyword.iskeyword(safe_name):
72
+ safe_name += '_'
73
+
74
+ return safe_name
75
+
76
+
77
+ def safe_name(string):
78
+ """
79
+ Removes invalid characters from a string, replaces spaces with underscores, removes leading/trailing underscores and hyphens, and makes the string lowercase. If the resulting string
80
+ * starts with a number, it prefixes it with "n_".
81
+
82
+ :param string: The string to be transformed.
83
+ :return: The transformed string.
84
+ """
85
+ # Remove invalid characters
86
+
87
+ # trim the string
88
+ string = string.strip()
89
+
90
+ string = re.sub(r"[^\w\s-]", "", string)
91
+
92
+ # Replace spaces with underscores
93
+ string = re.sub(r"\s+", "_", string)
94
+
95
+ # Remove leading/trailing underscores and hyphens
96
+ string = string.strip("_-")
97
+
98
+ # Make it lowercase
99
+ string = string.lower()
100
+
101
+ if string[0].isdigit():
102
+ # can't have things starting with a number
103
+ string = "n_" + string
104
+
105
+ # make sure we don't collide with a python keyword
106
+ return make_safe_attribute_name(string)
107
+
108
+
109
+ def snake_case_to_title_case(snake_case_string):
110
+ words = snake_case_string.split("_")
111
+ title_case_words = [word.capitalize() for word in words]
112
+ return " ".join(title_case_words)
113
+
114
+
115
+ def string_to_hex_color(string):
116
+ # Remove any leading or trailing whitespace from the string
117
+ string = string.strip()
118
+
119
+ # Calculate the hash value of the string
120
+ hash_value = hash(string)
121
+
122
+ # Convert the hash value to a 24-bit hexadecimal color code
123
+ hex_color = "#{:06x}".format(hash_value & 0xFFFFFF)
124
+
125
+ return hex_color
126
+
127
+
128
+ def get_is_square_bracket_first(string):
129
+ first_square_bracket = string.find("[")
130
+ first_bracket = string.find("{")
131
+ # Check if both "{" and "[" exist in the string
132
+ if first_bracket != -1 and first_square_bracket != -1:
133
+ # Compare their indices to determine which appears first
134
+ if first_bracket < first_square_bracket:
135
+ return False
136
+ else:
137
+ return True
138
+ # If only one of them exists, return the one that appears
139
+ elif first_bracket != -1:
140
+ return False
141
+ elif first_square_bracket != -1:
142
+ return True
143
+ else:
144
+ return None
145
+
146
+
147
+ def cosine_similarity(v1, v2):
148
+ """Compute the cosine similarity between two vectors."""
149
+ dot_product = sum(a * b for a, b in zip(v1, v2))
150
+ norm_a = sum(a * a for a in v1) ** 0.5
151
+ norm_b = sum(b * b for b in v2) ** 0.5
152
+ return dot_product / (norm_a * norm_b)
153
+
154
+
155
+ def estimate_token_count(text, avg_token_length=1):
156
+ # Removing spaces to focus on characters that form tokens
157
+ char_count = len(text.replace(" ", ""))
158
+ # Estimating token count
159
+ estimated_tokens = char_count / avg_token_length
160
+ return round(estimated_tokens)
161
+
162
+
163
+ def get_all_concrete_subclasses(cls):
164
+ """
165
+ Recursively find all non-abstract subclasses of a given class.
166
+
167
+ Parameters:
168
+ cls (class): The parent class to find subclasses for.
169
+
170
+ Returns:
171
+ list: A list of all non-abstract subclasses of cls.
172
+ """
173
+ concrete_subclasses = []
174
+ for subclass in cls.__subclasses__():
175
+ if not isabstract(subclass):
176
+ concrete_subclasses.append(subclass)
177
+ concrete_subclasses.extend(get_all_concrete_subclasses(subclass))
178
+ return concrete_subclasses
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: kodexa
3
- Version: 7.0.1a9987865527
3
+ Version: 7.0.1a11915814268
4
4
  Summary: Python SDK for the Kodexa Platform
5
5
  Author: Austin Redenbaugh
6
6
  Author-email: austin@kodexa.com
@@ -18,7 +18,7 @@ Requires-Dist: appdirs (>=1.4.4,<2.0.0)
18
18
  Requires-Dist: better-exceptions (>=0.3.3,<0.4.0)
19
19
  Requires-Dist: certifi (>=2024.7.4,<2025.0.0)
20
20
  Requires-Dist: chevron (>=0.14.0,<0.15.0)
21
- Requires-Dist: deepdiff (>=7.0.1,<8.0.0)
21
+ Requires-Dist: deepdiff (>=8.0.1,<9.0.0)
22
22
  Requires-Dist: msgpack (>=1.0.6,<2.0.0)
23
23
  Requires-Dist: ply (>=3.11,<4.0)
24
24
  Requires-Dist: pydantic (>=2.5.3,<3.0.0)
@@ -28,7 +28,7 @@ Requires-Dist: python-dateutil (>=2.8.2,<3.0.0)
28
28
  Requires-Dist: pyyaml (>=6.0,<7.0)
29
29
  Requires-Dist: requests (>=2.28.1,<3.0.0)
30
30
  Requires-Dist: semver (>=3.0.1,<4.0.0)
31
- Requires-Dist: simpleeval (>=0.9.13,<0.10.0)
31
+ Requires-Dist: simpleeval (>=1.0.0,<2.0.0)
32
32
  Requires-Dist: urllib3 (>=2.0.0,<3.0.0)
33
33
  Description-Content-Type: text/markdown
34
34
 
@@ -66,7 +66,7 @@ working with a Kodexa platform instance.
66
66
 
67
67
  ## Documentation & Examples
68
68
 
69
- Documentation is available at the [Kodexa Documentation Portal](https://docs.kodexa.com)
69
+ Documentation is available at the [Kodexa Support Portal](https://support.kodexa.ai)
70
70
 
71
71
  ## Set-up
72
72
 
@@ -3,21 +3,23 @@ kodexa/assistant/__init__.py,sha256=nlXm_YnV_50hgn0TIT2Fkc2fQ-86OjmctY_j8My9nc4,
3
3
  kodexa/assistant/assistant.py,sha256=5KFdbqFSLIZJyDRyZdpcfr448fT-CW4JhYu9A6B9DGY,14663
4
4
  kodexa/connectors/__init__.py,sha256=WF6G_MUeU32TlKSUKkpNoNX7dq8iBPliFMep4E8BmZc,328
5
5
  kodexa/connectors/connectors.py,sha256=FpUZDkSyHld2b9eYRuVOWzaFtuGoaRuPXXicJB7THbc,10413
6
+ kodexa/dataclasses/__init__.py,sha256=DKLWMALnUWYFDMraEc9We85GI_rvpp-z7Q1fKX-3jQI,18561
7
+ kodexa/dataclasses/templates/llm_data_class.j2,sha256=YWjStW136chV_59JM3AYis3i-0jdrqDvLXsISUW9zDU,660
6
8
  kodexa/model/__init__.py,sha256=rtLXYJBxB-rnukhslN9rlqoB3--1H3253HyHGbD_Gc8,796
7
9
  kodexa/model/base.py,sha256=CaZK8nMhT1LdCpt4aLhebJGcorjq9qRID1FjnXnP14M,521
8
10
  kodexa/model/entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
11
  kodexa/model/entities/check_response.py,sha256=eqBHxO6G2OAziL3p9bHGI-oiPkAG82H6Choc8wyvtM4,3949
10
12
  kodexa/model/entities/product.py,sha256=ZDpHuBE_9FJ-klnkyBvTfPwYOqBkM1wraZMtHqNA8FQ,3526
11
13
  kodexa/model/entities/product_subscription.py,sha256=UcmWR-qgLfdV7VCtJNwzgkanoS8nBSL6ngVuxQUK1M8,3810
12
- kodexa/model/model.py,sha256=BURhrOEVTTlKkDJho5CEFeLR9Dyq157mztlvbAdL1d4,115769
13
- kodexa/model/objects.py,sha256=EPTqjlSF2yH8YhM0d3mmwJwwEQRZaFXG8ssznEHxU-s,176702
14
- kodexa/model/persistence.py,sha256=sx5FwTSsWMdAZpAs0-6PqyULHkQyNQClApUKJZ-ly8M,62032
14
+ kodexa/model/model.py,sha256=wY5HnpsAnKlH_aDEHWNf-ZrhdrBg-DtqGFszjkdZtPU,118340
15
+ kodexa/model/objects.py,sha256=CE76KwQwIT6FdWJuac8aIumX_Ok6-9oq1JXz0K_gdwo,185117
16
+ kodexa/model/persistence.py,sha256=PTh9jmqYCDuWfiuCssLttFaYWiMA_fCiwjgsYDW4AhE,68281
15
17
  kodexa/pipeline/__init__.py,sha256=sA7f5D6qkdMrpp2xTIeefnrUBI6xxEEWostvxfX_1Cs,236
16
18
  kodexa/pipeline/pipeline.py,sha256=ZYpJAWcwV4YRK589DUhU0vXGQlkNSj4J2TsGbYqTLjo,25221
17
19
  kodexa/platform/__init__.py,sha256=1O3oiWMg292NPL_NacKDnK1T3_R6cMorrPRue_9e-O4,216
18
- kodexa/platform/client.py,sha256=GCOlFN7BgnCDEg4MNiLgDmSCHGglWdMeCrQQH6y2_oM,222182
20
+ kodexa/platform/client.py,sha256=PWn-Xk3vH993Ne5YXDKqJT9rmj7okwVBivuQv9rt2UQ,226591
19
21
  kodexa/platform/interaction.py,sha256=6zpcwXKNZstUGNS6m4JsoRXAqCZPJHWI-ZN3co8nnF0,1055
20
- kodexa/platform/kodexa.py,sha256=Bvf6x43FWsFuAuQ4N8TvjSZq6niEtBTESmFCWVPASeQ,34024
22
+ kodexa/platform/kodexa.py,sha256=3qRbEtLOw4yl7OV5ISZZ85N9gKeb5DA4XtHzieFIdYc,34796
21
23
  kodexa/selectors/__init__.py,sha256=xA9-4vpyaAZWPSk3bh2kvDLkdv6XEmm7PjFbpziiTIk,100
22
24
  kodexa/selectors/ast.py,sha256=gG-1st841IntgBE5V7p3Cq9azaym2jV5lB_AIywQTCI,13269
23
25
  kodexa/selectors/core.py,sha256=kkt02DN20gXeaDGoGubPPeeTV7rCr4sxTyELrI0l1YU,3691
@@ -39,7 +41,8 @@ kodexa/testing/test_components.py,sha256=g5lP-GY0nTHuH5cIEw45vIejEeBaWkPKQGHL36j
39
41
  kodexa/testing/test_utils.py,sha256=DrLCkHxdb6AbZ-X3WmTMbQmnVIm55VEBL8MjtUK9POs,14021
40
42
  kodexa/training/__init__.py,sha256=xs2L62YpRkIRfslQwtQZ5Yxjhm7sLzX2TrVX6EuBnZQ,52
41
43
  kodexa/training/train_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
- kodexa-7.0.1a9987865527.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
43
- kodexa-7.0.1a9987865527.dist-info/METADATA,sha256=jFlJvhkVVyHXGha9WaOdJoSV4SWR-Gzl2YXE58cmUHA,3534
44
- kodexa-7.0.1a9987865527.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
45
- kodexa-7.0.1a9987865527.dist-info/RECORD,,
44
+ kodexa/utils/__init__.py,sha256=Pnim1o9_db5YEnNvDTxpM7HG-qTlL6n8JwFwOafU9wo,5928
45
+ kodexa-7.0.1a11915814268.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
46
+ kodexa-7.0.1a11915814268.dist-info/METADATA,sha256=84lOTb7ed43y_Zb9Mo1WRZOR16ygEad2lj5MY1hdk_w,3529
47
+ kodexa-7.0.1a11915814268.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
48
+ kodexa-7.0.1a11915814268.dist-info/RECORD,,