dataknobs-structures 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ """Data structures for AI knowledge bases."""
2
+
3
+ from dataknobs_structures.conditional_dict import cdict
4
+ from dataknobs_structures.document import Text, TextMetaData
5
+ from dataknobs_structures.record_store import RecordStore
6
+ from dataknobs_structures.tree import Tree, build_tree_from_string
7
+
8
+ __version__ = "1.0.0"
9
+
10
+ __all__ = [
11
+ "RecordStore",
12
+ "Text",
13
+ "TextMetaData",
14
+ "Tree",
15
+ "build_tree_from_string",
16
+ "cdict",
17
+ ]
@@ -0,0 +1,65 @@
1
+ """Implementation of a conditional associative array (dict) using the strategy
2
+ pattern.
3
+ """
4
+
5
+ from collections.abc import Callable
6
+ from typing import Any, Dict, Optional, Union, Iterable, Tuple
7
+
8
+
9
+ class cdict(dict):
10
+ """A dictionary that conditionally accepts attributes and/or values.
11
+
12
+ This implementation uses the strategy pattern such that a function is
13
+ provided on initialization for validating items that are set. If an
14
+ attribute and/or value is not accepted during an add operation, the
15
+ set operation will fail and the key/value will be added to the "rejected"
16
+ property.
17
+ """
18
+
19
+ def __init__(self, accept_fn: Callable[[Dict, Any, Any], bool], *args: Any, **kwargs: Any) -> None:
20
+ """:param accept_fn: A fn(d, key, value) that returns True to accept
21
+ the key/value into this dict d, or False to reject.
22
+ """
23
+ super().__init__()
24
+ self._rejected: Dict[Any, Any] = dict()
25
+ self.accept_fn = accept_fn
26
+ # super().__init__(*args, **kwargs)
27
+ self.update(*args, **kwargs)
28
+
29
+ @property
30
+ def rejected(self) -> Dict:
31
+ return self._rejected
32
+
33
+ def __setitem__(self, key: Any, value: Any) -> None:
34
+ if self.accept_fn(self, key, value):
35
+ super().__setitem__(key, value)
36
+ else:
37
+ self._rejected[key] = value
38
+
39
+ def setdefault(self, key: Any, default: Any = None) -> Any:
40
+ rv = None
41
+ if key not in self:
42
+ if self.accept_fn(self, key, default):
43
+ super().__setitem__(key, default)
44
+ rv = default
45
+ else:
46
+ self._rejected[key] = default
47
+ else:
48
+ rv = self[key]
49
+ return rv
50
+
51
+ def update(self, *args: Any, **kwargs: Any) -> None:
52
+ # Handle positional argument if present
53
+ if args:
54
+ other = args[0]
55
+ if hasattr(other, "keys"):
56
+ # It's a mapping-like object
57
+ for key in other.keys():
58
+ self.__setitem__(key, other[key])
59
+ else:
60
+ # It's an iterable of key-value pairs
61
+ for key, value in other:
62
+ self.__setitem__(key, value)
63
+ # Handle keyword arguments
64
+ for key in kwargs:
65
+ self.__setitem__(key, kwargs[key])
@@ -0,0 +1,83 @@
1
+ from typing import Any
2
+
3
+ # Key text metadata attributes
4
+ TEXT_ID_ATTR = "text_id"
5
+ TEXT_LABEL_ATTR = "text_label"
6
+ TEXT_LABEL = "text"
7
+
8
+
9
+ class MetaData:
10
+ """Container for managing and providing access to meta-data."""
11
+
12
+ def __init__(self, key_data: dict[str, Any], **kwargs: Any) -> None:
13
+ """
14
+ Initialize with the mandatory or "key" data and any additional optional
15
+ values.
16
+ """
17
+ self._data = key_data.copy() if key_data is not None else {}
18
+ if kwargs is not None:
19
+ self._data.update(kwargs)
20
+
21
+ @property
22
+ def data(self) -> dict[str, Any]:
23
+ """The data dictionary."""
24
+ return self._data
25
+
26
+ def get_value(self, attribute: str, missing: str | None = None) -> Any:
27
+ """
28
+ Get the value for the given attribute, or the "missing" value.
29
+
30
+ :param attribute: The meta-data attribute whose value to get
31
+ :param missing: The missing value
32
+ :return: The attribute's value or the missing value.
33
+ """
34
+ return self.data.get(attribute, missing)
35
+
36
+
37
+ class TextMetaData(MetaData):
38
+ """Container for text meta-data."""
39
+
40
+ def __init__(self, text_id: Any, text_label: str = TEXT_LABEL, **kwargs: Any) -> None:
41
+ super().__init__(
42
+ {
43
+ TEXT_ID_ATTR: text_id,
44
+ TEXT_LABEL_ATTR: text_label,
45
+ },
46
+ **kwargs,
47
+ )
48
+
49
+ @property
50
+ def text_id(self) -> Any:
51
+ return self.data[TEXT_ID_ATTR]
52
+
53
+ @property
54
+ def text_label(self) -> str | Any:
55
+ return self.data[TEXT_LABEL_ATTR]
56
+
57
+
58
+ class Text:
59
+ """Wrapper for a text string for analysis."""
60
+
61
+ def __init__(
62
+ self,
63
+ text: str,
64
+ metadata: TextMetaData | None,
65
+ ) -> None:
66
+ self._text = text
67
+ self._metadata = metadata if metadata is not None else TextMetaData(0, TEXT_LABEL)
68
+
69
+ @property
70
+ def text(self) -> str:
71
+ return self._text
72
+
73
+ @property
74
+ def text_id(self) -> Any:
75
+ return self.metadata.text_id
76
+
77
+ @property
78
+ def text_label(self) -> str:
79
+ return self.metadata.text_label
80
+
81
+ @property
82
+ def metadata(self) -> TextMetaData:
83
+ return self._metadata
@@ -0,0 +1,83 @@
1
+ import json
2
+ import os
3
+ from typing import Any, Dict, List, Optional
4
+
5
+ import pandas as pd
6
+
7
+
8
+ class RecordStore:
9
+ """Wrapper around a sequence of records represented in memory as a list of
10
+ dictionaries and/or as a dataframe and as a tsv file on disk.
11
+ """
12
+
13
+ def __init__(
14
+ self,
15
+ tsv_fpath: Optional[str],
16
+ df: Optional[pd.DataFrame] = None,
17
+ sep: str = "\t",
18
+ ):
19
+ """:param tsv_fpath: The path to the tsv file on disk. If None or
20
+ empty, then data will not be persisted.
21
+ :param df: An initial dataframe
22
+ :param sep: The file separator to use (if not a tab)
23
+ """
24
+ self.tsv_fpath = tsv_fpath
25
+ self.init_df = df
26
+ self.sep = sep
27
+ self._df: Optional[pd.DataFrame] = None
28
+ self._recs: List[Dict[str, Any]] = [] # Initialize as empty list, not None
29
+ self._init_data(df)
30
+
31
+ def _init_data(self, df: Optional[pd.DataFrame] = None) -> None:
32
+ """Initialize store data from the tsv file."""
33
+ if self.tsv_fpath is not None and os.path.exists(self.tsv_fpath):
34
+ self._df = pd.read_csv(self.tsv_fpath, sep=self.sep)
35
+ else:
36
+ self._df = df.copy() if df is not None else None
37
+ self._recs = self._build_recs_from_df()
38
+
39
+ def _build_recs_from_df(self) -> List[Dict[str, Any]]:
40
+ """Build records from the dataframe"""
41
+ if self._df is not None:
42
+ recs = [
43
+ json.loads(rec)
44
+ for rec in self._df.to_json(orient="records", lines=True).strip().split("\n")
45
+ ]
46
+ else:
47
+ recs = []
48
+ return recs
49
+
50
+ @property
51
+ def df(self) -> Optional[pd.DataFrame]:
52
+ """Get the records as a dataframe"""
53
+ if self._df is None and self._recs is not None:
54
+ self._df = pd.DataFrame(self._recs)
55
+ return self._df
56
+
57
+ @property
58
+ def records(self) -> List[Dict[str, Any]]:
59
+ """Get the records as a list of dictionaries"""
60
+ return self._recs or []
61
+
62
+ def clear(self) -> None:
63
+ """Clear the contents, starting from empty, but don't auto-"save"."""
64
+ self._recs.clear()
65
+ self._df = None
66
+
67
+ def add_rec(self, rec: Dict[str, Any]) -> None:
68
+ """Add the record"""
69
+ self._recs.append(rec)
70
+ self._df = None
71
+
72
+ def save(self) -> None:
73
+ """Save the records to disk as a tsv"""
74
+ if self.tsv_fpath is not None and self.df is not None:
75
+ self.df.to_csv(self.tsv_fpath, sep=self.sep, index=False)
76
+
77
+ def restore(self, df: Optional[pd.DataFrame] = None) -> None:
78
+ """Restore records from the version on disk, discarding any changes.
79
+ NOTE: If there is no backing file (e.g., tsv_fpath is None), then
80
+ restore will discard all data and restart with the given df (if not
81
+ None,) the init df or start anew.
82
+ """
83
+ self._init_data(df if df is not None else self.init_df)
@@ -0,0 +1,462 @@
1
+ """Implementation of a simple tree data structure."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections import deque
6
+ from collections.abc import Callable
7
+ from typing import Any, List, Tuple, Union, Optional, Deque
8
+
9
+ import graphviz
10
+ from pyparsing import OneOrMore, nestedExpr
11
+
12
+
13
+ class Tree:
14
+ """Implementation of a simple tree data structure.
15
+
16
+ Where the tree is represented as a node containing:
17
+ * (arbitrary) data
18
+ * a list of (ordered) child nodes
19
+ * a single (optional) parent node
20
+
21
+ And each tree node is doubly linked from parent to child(ren) and from
22
+ child to parent for efficient traversal both up (to parent) and down
23
+ (to children) the tree.
24
+ """
25
+
26
+ def __init__(
27
+ self,
28
+ data: Any,
29
+ parent: Union[Tree, Any] = None,
30
+ child_pos: Optional[int] = None,
31
+ ):
32
+ """Initialize a tree (node), optionally adding it to the given parent
33
+ at an optional child position.
34
+
35
+ :param data: The data to be contained within the node.
36
+ :param parent: The parent node to this node.
37
+ """
38
+ self._data = data
39
+ self._children: Optional[List[Tree]] = None
40
+ self._parent: Optional[Tree] = None
41
+ if parent is not None:
42
+ if not isinstance(parent, Tree):
43
+ parent = Tree(parent)
44
+ parent.add_child(self, child_pos)
45
+
46
+ def __repr__(self) -> str:
47
+ """:return: The string representation of this tree."""
48
+ return self.as_string(delim=" ", multiline=True)
49
+
50
+ @property
51
+ def data(self) -> Any:
52
+ """:return: This node's data."""
53
+ return self._data
54
+
55
+ @data.setter
56
+ def data(self, data: Any) -> None:
57
+ """:return: Set this node's data."""
58
+ self._data = data
59
+
60
+ @property
61
+ def children(self) -> Optional[List[Tree]]:
62
+ """:return: This node's children -- list of child nodes."""
63
+ return self._children
64
+
65
+ @property
66
+ def parent(self) -> Optional[Tree]:
67
+ """:return: This node's parent."""
68
+ return self._parent
69
+
70
+ @parent.setter
71
+ def parent(self, parent: Optional[Tree]) -> None:
72
+ """:return: Set this node's parent."""
73
+ self._parent = parent
74
+
75
+ @property
76
+ def root(self) -> Tree:
77
+ """:return: The root of this node's tree."""
78
+ root = self
79
+ while root.parent is not None:
80
+ root = root.parent
81
+ return root
82
+
83
+ @property
84
+ def sibnum(self) -> int:
85
+ """:return: This node's sibling number (0-based) among its parent's children"""
86
+ return self._parent.children.index(self) if self._parent is not None and self._parent.children is not None else 0
87
+
88
+ @property
89
+ def num_siblings(self) -> int:
90
+ """:return: Get the number of siblings (including self) of this node"""
91
+ return self._parent.num_children if self._parent is not None else 1
92
+
93
+ @property
94
+ def next_sibling(self) -> Optional[Tree]:
95
+ """:return: This node's next sibling (or None)"""
96
+ result = None
97
+ if self._parent and self._parent.children:
98
+ sibs = self._parent.children
99
+ nextsib = sibs.index(self) + 1
100
+ if nextsib < len(sibs):
101
+ result = sibs[nextsib]
102
+ return result
103
+
104
+ @property
105
+ def prev_sibling(self) -> Optional[Tree]:
106
+ """:return: This node's previous sibling (or None)"""
107
+ result = None
108
+ if self._parent and self._parent.children:
109
+ sibs = self._parent.children
110
+ prevsib = sibs.index(self) - 1
111
+ if prevsib >= 0:
112
+ result = sibs[prevsib]
113
+ return result
114
+
115
+ def has_children(self) -> bool:
116
+ """:return: Whether this node has children."""
117
+ return self._children is not None and len(self._children) > 0
118
+
119
+ @property
120
+ def num_children(self) -> int:
121
+ """:return: The number of children under this node."""
122
+ return len(self._children) if self._children is not None else 0
123
+
124
+ def has_parent(self) -> bool:
125
+ """:return: Whether this not has a parent.
126
+
127
+ Note that the "root" of a tree has no parent.
128
+ """
129
+ return self._parent is not None
130
+
131
+ @property
132
+ def depth(self) -> int:
133
+ """:return: The depth of this node in its tree.
134
+
135
+ Where the depth is measured as the number of "hops" from the root,
136
+ whose depth is 0, to children until this node is reached.
137
+ """
138
+ result = 0
139
+ curp = self.parent
140
+ while curp is not None:
141
+ curp = curp.parent
142
+ result += 1
143
+ return result
144
+
145
+ def add_child(self, node_or_data: Union[Tree, Any], child_pos: Optional[int] = None) -> Tree:
146
+ """Add a child node to this node, pruning the child from any other tree.
147
+
148
+ :param node_or_data: The node (or data for a new node) to add
149
+ :param child_pos: The (optional) position at which to insert the node.
150
+ :return: the (passed or new) child_node
151
+ """
152
+ if self._children is None:
153
+ self._children = []
154
+ if isinstance(node_or_data, Tree):
155
+ child = node_or_data
156
+ child.prune()
157
+ if child_pos is not None and child_pos < len(self._children) and child_pos >= 0:
158
+ self._children.insert(child_pos, child)
159
+ else:
160
+ self._children.append(child)
161
+ else:
162
+ child = Tree(node_or_data, self, child_pos=child_pos)
163
+ child.parent = self
164
+ return child
165
+
166
+ def add_edge(
167
+ self,
168
+ parent_node_or_data: Union[Tree, Any],
169
+ child_node_or_data: Union[Tree, Any],
170
+ ) -> Tuple[Tree, Tree]:
171
+ """Add the child to the parent, using an existing (matching) child or parent.
172
+ If the parent and child already exist, but not as parent and child, the
173
+ child node will be moved to be a child of the parent.
174
+
175
+ If neither the parent nor child nodes exist in this tree, the parent
176
+ will be added as a child of this (self) node.
177
+
178
+ :param parent_node_or_data: The parent node (or its data)
179
+ :param child_node_or_data: The child node (or its data)
180
+ :return: The (parent-node, child-node) tuple
181
+ """
182
+ parent = None
183
+ child = None
184
+
185
+ if isinstance(parent_node_or_data, Tree):
186
+ parent = parent_node_or_data
187
+ # if it is not in this tree ...
188
+ if (
189
+ len(
190
+ self.find_nodes(lambda node: node == parent, include_self=True, only_first=True)
191
+ )
192
+ == 0
193
+ ):
194
+ # ...then add it as a child of self
195
+ self.add_child(parent)
196
+ else:
197
+ # can we find the data in this tree ...
198
+ found = self.find_nodes(
199
+ lambda node: node.data == parent_node_or_data, include_self=True, only_first=True
200
+ )
201
+ if len(found) > 0:
202
+ parent = found[0]
203
+ else:
204
+ parent = self.add_child(parent_node_or_data)
205
+
206
+ if isinstance(child_node_or_data, Tree):
207
+ child = parent.add_child(child_node_or_data)
208
+ else:
209
+ # can we find the data in this tree ...
210
+ found = self.find_nodes(
211
+ lambda node: node.data == child_node_or_data, include_self=True, only_first=True
212
+ )
213
+ if len(found) > 0:
214
+ child = parent.add_child(found[0])
215
+ else:
216
+ child = parent.add_child(child_node_or_data)
217
+
218
+ return (parent, child)
219
+
220
+ def prune(self) -> Optional[Tree]:
221
+ """Prune this node from its tree.
222
+ :return: this node's (former) parent.
223
+ """
224
+ result = self._parent
225
+ if self._parent is not None:
226
+ if self._parent.children is not None:
227
+ self._parent.children.remove(self)
228
+ self._parent = None
229
+ return result
230
+
231
+ def find_nodes(
232
+ self,
233
+ accept_node_fn: Callable[[Tree], bool],
234
+ traversal: str = "dfs",
235
+ include_self: bool = True,
236
+ only_first: bool = False,
237
+ highest_only: bool = False,
238
+ ) -> List[Tree]:
239
+ """Find nodes where accept_node_fn(tree) is True,
240
+ using a traversal of:
241
+ 'dfs' -- depth first search
242
+ 'bfs' -- breadth first search
243
+
244
+ :param accept_node_fn: A function returning a boolean for any Tree
245
+ argument; True to select the node or False to skip it
246
+ :param traversal: Either 'dfs' or 'bfs' for depth- or breadth-first
247
+ :param include_self: True to consider this node, False to start with
248
+ its children
249
+ :param only_first: True to stop after finding the first match
250
+ :param highest_only: True to not collect any nodes under a selected node
251
+ :return: The list of matching/accepted nodes
252
+ """
253
+ queue: Deque[Tree] = deque()
254
+ found: List[Tree] = []
255
+ if include_self:
256
+ queue.append(self)
257
+ elif self.children:
258
+ queue.extend(self.children)
259
+ while bool(queue): # true while length(queue) > 0
260
+ item = queue.popleft()
261
+ if accept_node_fn(item):
262
+ found.append(item)
263
+ if only_first:
264
+ break
265
+ elif highest_only:
266
+ continue
267
+ if item.children:
268
+ if traversal == "dfs":
269
+ queue.extendleft(reversed(item.children))
270
+ elif traversal == "bfs":
271
+ queue.extend(item.children)
272
+ return found
273
+
274
+ def collect_terminal_nodes(
275
+ self, accept_node_fn: Optional[Callable[[Tree], bool]] = None, _found: Optional[List[Tree]] = None
276
+ ) -> List[Tree]:
277
+ """Collect this tree's terminal nodes.
278
+
279
+ :param accept_node_fn: Optional function to select which terminal nodes
280
+ to include in the result
281
+ :param _found: The (optional) list to which to add results
282
+ :return: The list of collected nodes
283
+ """
284
+ if _found is None:
285
+ _found = list()
286
+ if not self._children:
287
+ if accept_node_fn is None or accept_node_fn(self):
288
+ _found.append(self)
289
+ else:
290
+ for child in self._children:
291
+ child.collect_terminal_nodes(accept_node_fn=accept_node_fn, _found=_found)
292
+ return _found
293
+
294
+ def get_edges(
295
+ self,
296
+ traversal: str = "bfs",
297
+ include_self: bool = True,
298
+ as_data: bool = True,
299
+ ) -> List[Tuple[Union[Tree, Any], Union[Tree, Any]]]:
300
+ """Get the edges of this tree, either as Tree nodes or data.
301
+
302
+ :param traversal: Either 'dfs' or 'bfs' for depth- or breadth-first
303
+ :param include_self: True to include this node, False to start with
304
+ its children
305
+ :param as_data: If True, then collect node data instead of Tree nodes
306
+ :return: A list of (parent, child) tuples of edge nodes or data
307
+ """
308
+ queue: Deque[Tree] = deque()
309
+ result: List[Tuple[Union[Tree, Any], Union[Tree, Any]]] = []
310
+ if self.children:
311
+ queue.extend(self.children)
312
+ while bool(queue): # true while length(queue) > 0
313
+ item = queue.popleft()
314
+ if item.parent:
315
+ if item.parent != self or include_self:
316
+ result.append((item.parent.data, item.data) if as_data else (item.parent, item))
317
+ if item.children:
318
+ if traversal == "dfs":
319
+ queue.extendleft(reversed(item.children))
320
+ elif traversal == "bfs":
321
+ queue.extend(item.children)
322
+ return result
323
+
324
+ def get_path(self) -> List[Tree]:
325
+ """Get the nodes from the root to this node (inclusive)."""
326
+ path: Deque[Tree] = deque()
327
+ node: Optional[Tree] = self
328
+ while node is not None:
329
+ path.appendleft(node)
330
+ node = node.parent
331
+ return list(path)
332
+
333
+ def is_ancestor(self, other: Tree, self_is_ancestor: bool = False) -> bool:
334
+ """Determine whether this node is an ancestor to the other.
335
+ :param other: The potential descendant of this node
336
+ :param self_is_ancestor: True if this node could be considered to
337
+ be its own ancestor
338
+ :return: True if this node is an ancestor of the other
339
+ """
340
+ result = False
341
+ parent = other if self_is_ancestor else other.parent
342
+ while parent is not None:
343
+ if parent == self:
344
+ result = True
345
+ break
346
+ parent = parent.parent
347
+ return result
348
+
349
+ def find_deepest_common_ancestor(self, other: Optional[Tree]) -> Optional[Tree]:
350
+ """Find the deepest common ancestor to self and other.
351
+ :param other: The other node whose shared ancestor with self to find
352
+ :return: The deepest common ancestor to self and other, or None
353
+ """
354
+ if other is None:
355
+ return None
356
+ if self == other:
357
+ return self
358
+ result: Optional[Tree] = None
359
+ mypath, otherpath = self.get_path(), other.get_path()
360
+ mypathlen, otherpathlen = len(mypath), len(otherpath)
361
+ mypathidx, otherpathidx = 0, 0
362
+ while mypathidx < mypathlen and otherpathidx < otherpathlen:
363
+ mynode, othernode = mypath[mypathidx], otherpath[otherpathidx]
364
+ mypathidx += 1
365
+ otherpathidx += 1
366
+ if mynode != othernode:
367
+ break # diverged
368
+ else:
369
+ result = mynode
370
+ return result
371
+
372
+ def as_string(self, delim: str = " ", multiline: bool = False) -> str:
373
+ """Get a string representing this tree.
374
+ :param delim: The (indentation) delimiter to use between node data
375
+ :param multiline: True to include newlines in the result
376
+ :param: A string representation of this tree and its descendants
377
+ """
378
+ result = ""
379
+ if self._children:
380
+ btwn = "\n" if multiline else ""
381
+ result = "(" + str(self.data)
382
+ for child in self._children:
383
+ d = (child.depth if multiline else 1) * delim
384
+ result += btwn + d + child.as_string(delim=delim, multiline=multiline)
385
+ result += ")"
386
+ else:
387
+ result = str(self.data)
388
+ return result
389
+
390
+ def get_deepest_left(self) -> Tree:
391
+ """:return: The terminal descendent following the left-most branches
392
+ of this node.
393
+ """
394
+ node = self
395
+ while node.has_children() and node.children is not None:
396
+ node = node.children[0]
397
+ return node
398
+
399
+ def get_deepest_right(self) -> Tree:
400
+ """:return: The terminal descendent following the right-most branches
401
+ of this node.
402
+ """
403
+ node = self
404
+ while node.has_children() and node.children is not None:
405
+ node = node.children[-1]
406
+ return node
407
+
408
+ def build_dot(
409
+ self, node_name_fn: Optional[Callable[[Tree], str]] = None, **kwargs: Any
410
+ ) -> graphviz.graphs.Digraph:
411
+ """Build a graphviz dot file for this tree, passing kwargs to
412
+ graphviz.Digraph.
413
+
414
+ :param node_name_fn: A function to build a graph node name string
415
+ from a node. Default is str(node.data).
416
+
417
+ Example Usage:
418
+ dot = build_dot(name='Name', format='png', node_attr={'shape': 'none'})
419
+ print(dot.source) # e.g. to a .dot file
420
+ ipath = dot.render('/tmp/test/testimg', format='png') # to create an image file
421
+ Image(filename=ipath) # to display the image in jupyter
422
+ """
423
+ if node_name_fn is None:
424
+ node_name_fn = lambda n: str(n.data)
425
+ dot = graphviz.Digraph(**kwargs)
426
+ ids = dict() # ids[node] -> id
427
+ for idx, node in enumerate(self.root.find_nodes(lambda _n: True, traversal="bfs")):
428
+ ids[node] = idx
429
+ dot.node(f"N_{idx:03}", node_name_fn(node))
430
+ for node1, node2 in self.get_edges(as_data=False):
431
+ idx1 = ids[node1]
432
+ idx2 = ids[node2]
433
+ dot.edge(f"N_{idx1:03}", f"N_{idx2:03}")
434
+ return dot
435
+
436
+
437
+ def build_tree_from_string(from_string: str) -> Tree:
438
+ """Build a tree object from the given tree string, e.g., output from
439
+ the "Tree.as_string" method.
440
+ :param from_string: The tree string
441
+ :return: The built Tree
442
+ """
443
+ if not from_string.strip().startswith("("):
444
+ return Tree(from_string)
445
+ data = OneOrMore(nestedExpr()).parseString(from_string)
446
+ return build_tree_from_list(data.as_list())
447
+
448
+
449
+ def build_tree_from_list(data: Union[Any, List]) -> Tree:
450
+ """Auxiliary to build_tree for recursively building nodes from a list of
451
+ lists.
452
+ :param data: The tree data as a list of lists.
453
+ :return: The root tree node
454
+ """
455
+ node = None
456
+ if isinstance(data, list) and len(data) > 0:
457
+ node = build_tree_from_list(data[0])
458
+ for cdata in data[1:]:
459
+ node.add_child(build_tree_from_list(cdata))
460
+ else: # e.g. if isinstance(data, str):
461
+ node = Tree(data)
462
+ return node
@@ -0,0 +1,49 @@
1
+ Metadata-Version: 2.4
2
+ Name: dataknobs-structures
3
+ Version: 1.0.0
4
+ Summary: Data structures for AI knowledge bases
5
+ Author-email: Spence Koehler <KoehlerSB747@gmail.com>
6
+ Requires-Python: >=3.10
7
+ Requires-Dist: dataknobs-common>=1.0.0
8
+ Requires-Dist: graphviz>=0.20.3
9
+ Requires-Dist: pandas>=2.2.3
10
+ Requires-Dist: pyparsing>=3.0.0
11
+ Description-Content-Type: text/markdown
12
+
13
+ # dataknobs-structures
14
+
15
+ Data structures for AI knowledge bases.
16
+
17
+ ## Installation
18
+
19
+ ```bash
20
+ pip install dataknobs-structures
21
+ ```
22
+
23
+ ## Features
24
+
25
+ - **ConditionalDict**: Dictionary with conditional value retrieval
26
+ - **Document**: Document representation with metadata
27
+ - **RecordStore**: Efficient record storage and retrieval
28
+ - **Tree**: Tree data structure with various traversal methods
29
+
30
+ ## Usage
31
+
32
+ ```python
33
+ from dataknobs_structures import Tree, Document
34
+
35
+ # Create a tree structure
36
+ tree = Tree()
37
+ tree.add_node("root", "Root Node")
38
+ tree.add_node("child1", "Child 1", parent="root")
39
+
40
+ # Create a document
41
+ doc = Document(
42
+ content="Sample document content",
43
+ metadata={"author": "John Doe", "date": "2024-01-01"}
44
+ )
45
+ ```
46
+
47
+ ## License
48
+
49
+ See LICENSE file in the root repository.
@@ -0,0 +1,8 @@
1
+ dataknobs_structures/__init__.py,sha256=4QTb5e2YVRQBIP_e-8Vq3eLJpZt3-QpEil_6s80iF-g,433
2
+ dataknobs_structures/conditional_dict.py,sha256=h9qnhyDkTZynpAmMZz7_1XE235I9gZ0BS9eh_h14CtU,2294
3
+ dataknobs_structures/document.py,sha256=LiQf0tyEYl0VdIGJ9vZlG2rkGWgJ867XbmrC4JshBTk,2182
4
+ dataknobs_structures/record_store.py,sha256=uPEmusrGPaHtiZF3fWCG_LAIUYoQeKYWAOG87rLmSmw,2931
5
+ dataknobs_structures/tree.py,sha256=13bJvCPn13YJhFcRHoYMuG217o4o3KbCdkYu-S4y4lw,17051
6
+ dataknobs_structures-1.0.0.dist-info/METADATA,sha256=wetQ-K-I8Ldp-6CHA0zbCdXeWP5OKJjkWk5vspo2Ygo,1134
7
+ dataknobs_structures-1.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
8
+ dataknobs_structures-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.27.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any