dataknobs-structures 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataknobs_structures/__init__.py +17 -0
- dataknobs_structures/conditional_dict.py +65 -0
- dataknobs_structures/document.py +83 -0
- dataknobs_structures/record_store.py +83 -0
- dataknobs_structures/tree.py +462 -0
- dataknobs_structures-1.0.0.dist-info/METADATA +49 -0
- dataknobs_structures-1.0.0.dist-info/RECORD +8 -0
- dataknobs_structures-1.0.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Data structures for AI knowledge bases."""
|
|
2
|
+
|
|
3
|
+
from dataknobs_structures.conditional_dict import cdict
|
|
4
|
+
from dataknobs_structures.document import Text, TextMetaData
|
|
5
|
+
from dataknobs_structures.record_store import RecordStore
|
|
6
|
+
from dataknobs_structures.tree import Tree, build_tree_from_string
|
|
7
|
+
|
|
8
|
+
__version__ = "1.0.0"
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"RecordStore",
|
|
12
|
+
"Text",
|
|
13
|
+
"TextMetaData",
|
|
14
|
+
"Tree",
|
|
15
|
+
"build_tree_from_string",
|
|
16
|
+
"cdict",
|
|
17
|
+
]
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Implementation of a conditional associative array (dict) using the strategy
|
|
2
|
+
pattern.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from collections.abc import Callable
|
|
6
|
+
from typing import Any, Dict, Optional, Union, Iterable, Tuple
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class cdict(dict):
|
|
10
|
+
"""A dictionary that conditionally accepts attributes and/or values.
|
|
11
|
+
|
|
12
|
+
This implementation uses the strategy pattern such that a function is
|
|
13
|
+
provided on initialization for validating items that are set. If an
|
|
14
|
+
attribute and/or value is not accepted during an add operation, the
|
|
15
|
+
set operation will fail and the key/value will be added to the "rejected"
|
|
16
|
+
property.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self, accept_fn: Callable[[Dict, Any, Any], bool], *args: Any, **kwargs: Any) -> None:
|
|
20
|
+
""":param accept_fn: A fn(d, key, value) that returns True to accept
|
|
21
|
+
the key/value into this dict d, or False to reject.
|
|
22
|
+
"""
|
|
23
|
+
super().__init__()
|
|
24
|
+
self._rejected: Dict[Any, Any] = dict()
|
|
25
|
+
self.accept_fn = accept_fn
|
|
26
|
+
# super().__init__(*args, **kwargs)
|
|
27
|
+
self.update(*args, **kwargs)
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def rejected(self) -> Dict:
|
|
31
|
+
return self._rejected
|
|
32
|
+
|
|
33
|
+
def __setitem__(self, key: Any, value: Any) -> None:
|
|
34
|
+
if self.accept_fn(self, key, value):
|
|
35
|
+
super().__setitem__(key, value)
|
|
36
|
+
else:
|
|
37
|
+
self._rejected[key] = value
|
|
38
|
+
|
|
39
|
+
def setdefault(self, key: Any, default: Any = None) -> Any:
|
|
40
|
+
rv = None
|
|
41
|
+
if key not in self:
|
|
42
|
+
if self.accept_fn(self, key, default):
|
|
43
|
+
super().__setitem__(key, default)
|
|
44
|
+
rv = default
|
|
45
|
+
else:
|
|
46
|
+
self._rejected[key] = default
|
|
47
|
+
else:
|
|
48
|
+
rv = self[key]
|
|
49
|
+
return rv
|
|
50
|
+
|
|
51
|
+
def update(self, *args: Any, **kwargs: Any) -> None:
|
|
52
|
+
# Handle positional argument if present
|
|
53
|
+
if args:
|
|
54
|
+
other = args[0]
|
|
55
|
+
if hasattr(other, "keys"):
|
|
56
|
+
# It's a mapping-like object
|
|
57
|
+
for key in other.keys():
|
|
58
|
+
self.__setitem__(key, other[key])
|
|
59
|
+
else:
|
|
60
|
+
# It's an iterable of key-value pairs
|
|
61
|
+
for key, value in other:
|
|
62
|
+
self.__setitem__(key, value)
|
|
63
|
+
# Handle keyword arguments
|
|
64
|
+
for key in kwargs:
|
|
65
|
+
self.__setitem__(key, kwargs[key])
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
# Key text metadata attributes
|
|
4
|
+
TEXT_ID_ATTR = "text_id"
|
|
5
|
+
TEXT_LABEL_ATTR = "text_label"
|
|
6
|
+
TEXT_LABEL = "text"
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class MetaData:
|
|
10
|
+
"""Container for managing and providing access to meta-data."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, key_data: dict[str, Any], **kwargs: Any) -> None:
|
|
13
|
+
"""
|
|
14
|
+
Initialize with the mandatory or "key" data and any additional optional
|
|
15
|
+
values.
|
|
16
|
+
"""
|
|
17
|
+
self._data = key_data.copy() if key_data is not None else {}
|
|
18
|
+
if kwargs is not None:
|
|
19
|
+
self._data.update(kwargs)
|
|
20
|
+
|
|
21
|
+
@property
|
|
22
|
+
def data(self) -> dict[str, Any]:
|
|
23
|
+
"""The data dictionary."""
|
|
24
|
+
return self._data
|
|
25
|
+
|
|
26
|
+
def get_value(self, attribute: str, missing: str | None = None) -> Any:
|
|
27
|
+
"""
|
|
28
|
+
Get the value for the given attribute, or the "missing" value.
|
|
29
|
+
|
|
30
|
+
:param attribute: The meta-data attribute whose value to get
|
|
31
|
+
:param missing: The missing value
|
|
32
|
+
:return: The attribute's value or the missing value.
|
|
33
|
+
"""
|
|
34
|
+
return self.data.get(attribute, missing)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class TextMetaData(MetaData):
|
|
38
|
+
"""Container for text meta-data."""
|
|
39
|
+
|
|
40
|
+
def __init__(self, text_id: Any, text_label: str = TEXT_LABEL, **kwargs: Any) -> None:
|
|
41
|
+
super().__init__(
|
|
42
|
+
{
|
|
43
|
+
TEXT_ID_ATTR: text_id,
|
|
44
|
+
TEXT_LABEL_ATTR: text_label,
|
|
45
|
+
},
|
|
46
|
+
**kwargs,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def text_id(self) -> Any:
|
|
51
|
+
return self.data[TEXT_ID_ATTR]
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def text_label(self) -> str | Any:
|
|
55
|
+
return self.data[TEXT_LABEL_ATTR]
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class Text:
|
|
59
|
+
"""Wrapper for a text string for analysis."""
|
|
60
|
+
|
|
61
|
+
def __init__(
|
|
62
|
+
self,
|
|
63
|
+
text: str,
|
|
64
|
+
metadata: TextMetaData | None,
|
|
65
|
+
) -> None:
|
|
66
|
+
self._text = text
|
|
67
|
+
self._metadata = metadata if metadata is not None else TextMetaData(0, TEXT_LABEL)
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def text(self) -> str:
|
|
71
|
+
return self._text
|
|
72
|
+
|
|
73
|
+
@property
|
|
74
|
+
def text_id(self) -> Any:
|
|
75
|
+
return self.metadata.text_id
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
def text_label(self) -> str:
|
|
79
|
+
return self.metadata.text_label
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def metadata(self) -> TextMetaData:
|
|
83
|
+
return self._metadata
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from typing import Any, Dict, List, Optional
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class RecordStore:
|
|
9
|
+
"""Wrapper around a sequence of records represented in memory as a list of
|
|
10
|
+
dictionaries and/or as a dataframe and as a tsv file on disk.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
tsv_fpath: Optional[str],
|
|
16
|
+
df: Optional[pd.DataFrame] = None,
|
|
17
|
+
sep: str = "\t",
|
|
18
|
+
):
|
|
19
|
+
""":param tsv_fpath: The path to the tsv file on disk. If None or
|
|
20
|
+
empty, then data will not be persisted.
|
|
21
|
+
:param df: An initial dataframe
|
|
22
|
+
:param sep: The file separator to use (if not a tab)
|
|
23
|
+
"""
|
|
24
|
+
self.tsv_fpath = tsv_fpath
|
|
25
|
+
self.init_df = df
|
|
26
|
+
self.sep = sep
|
|
27
|
+
self._df: Optional[pd.DataFrame] = None
|
|
28
|
+
self._recs: List[Dict[str, Any]] = [] # Initialize as empty list, not None
|
|
29
|
+
self._init_data(df)
|
|
30
|
+
|
|
31
|
+
def _init_data(self, df: Optional[pd.DataFrame] = None) -> None:
|
|
32
|
+
"""Initialize store data from the tsv file."""
|
|
33
|
+
if self.tsv_fpath is not None and os.path.exists(self.tsv_fpath):
|
|
34
|
+
self._df = pd.read_csv(self.tsv_fpath, sep=self.sep)
|
|
35
|
+
else:
|
|
36
|
+
self._df = df.copy() if df is not None else None
|
|
37
|
+
self._recs = self._build_recs_from_df()
|
|
38
|
+
|
|
39
|
+
def _build_recs_from_df(self) -> List[Dict[str, Any]]:
|
|
40
|
+
"""Build records from the dataframe"""
|
|
41
|
+
if self._df is not None:
|
|
42
|
+
recs = [
|
|
43
|
+
json.loads(rec)
|
|
44
|
+
for rec in self._df.to_json(orient="records", lines=True).strip().split("\n")
|
|
45
|
+
]
|
|
46
|
+
else:
|
|
47
|
+
recs = []
|
|
48
|
+
return recs
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def df(self) -> Optional[pd.DataFrame]:
|
|
52
|
+
"""Get the records as a dataframe"""
|
|
53
|
+
if self._df is None and self._recs is not None:
|
|
54
|
+
self._df = pd.DataFrame(self._recs)
|
|
55
|
+
return self._df
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def records(self) -> List[Dict[str, Any]]:
|
|
59
|
+
"""Get the records as a list of dictionaries"""
|
|
60
|
+
return self._recs or []
|
|
61
|
+
|
|
62
|
+
def clear(self) -> None:
|
|
63
|
+
"""Clear the contents, starting from empty, but don't auto-"save"."""
|
|
64
|
+
self._recs.clear()
|
|
65
|
+
self._df = None
|
|
66
|
+
|
|
67
|
+
def add_rec(self, rec: Dict[str, Any]) -> None:
|
|
68
|
+
"""Add the record"""
|
|
69
|
+
self._recs.append(rec)
|
|
70
|
+
self._df = None
|
|
71
|
+
|
|
72
|
+
def save(self) -> None:
|
|
73
|
+
"""Save the records to disk as a tsv"""
|
|
74
|
+
if self.tsv_fpath is not None and self.df is not None:
|
|
75
|
+
self.df.to_csv(self.tsv_fpath, sep=self.sep, index=False)
|
|
76
|
+
|
|
77
|
+
def restore(self, df: Optional[pd.DataFrame] = None) -> None:
|
|
78
|
+
"""Restore records from the version on disk, discarding any changes.
|
|
79
|
+
NOTE: If there is no backing file (e.g., tsv_fpath is None), then
|
|
80
|
+
restore will discard all data and restart with the given df (if not
|
|
81
|
+
None,) the init df or start anew.
|
|
82
|
+
"""
|
|
83
|
+
self._init_data(df if df is not None else self.init_df)
|
|
@@ -0,0 +1,462 @@
|
|
|
1
|
+
"""Implementation of a simple tree data structure."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections import deque
|
|
6
|
+
from collections.abc import Callable
|
|
7
|
+
from typing import Any, List, Tuple, Union, Optional, Deque
|
|
8
|
+
|
|
9
|
+
import graphviz
|
|
10
|
+
from pyparsing import OneOrMore, nestedExpr
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Tree:
|
|
14
|
+
"""Implementation of a simple tree data structure.
|
|
15
|
+
|
|
16
|
+
Where the tree is represented as a node containing:
|
|
17
|
+
* (arbitrary) data
|
|
18
|
+
* a list of (ordered) child nodes
|
|
19
|
+
* a single (optional) parent node
|
|
20
|
+
|
|
21
|
+
And each tree node is doubly linked from parent to child(ren) and from
|
|
22
|
+
child to parent for efficient traversal both up (to parent) and down
|
|
23
|
+
(to children) the tree.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
data: Any,
|
|
29
|
+
parent: Union[Tree, Any] = None,
|
|
30
|
+
child_pos: Optional[int] = None,
|
|
31
|
+
):
|
|
32
|
+
"""Initialize a tree (node), optionally adding it to the given parent
|
|
33
|
+
at an optional child position.
|
|
34
|
+
|
|
35
|
+
:param data: The data to be contained within the node.
|
|
36
|
+
:param parent: The parent node to this node.
|
|
37
|
+
"""
|
|
38
|
+
self._data = data
|
|
39
|
+
self._children: Optional[List[Tree]] = None
|
|
40
|
+
self._parent: Optional[Tree] = None
|
|
41
|
+
if parent is not None:
|
|
42
|
+
if not isinstance(parent, Tree):
|
|
43
|
+
parent = Tree(parent)
|
|
44
|
+
parent.add_child(self, child_pos)
|
|
45
|
+
|
|
46
|
+
def __repr__(self) -> str:
|
|
47
|
+
""":return: The string representation of this tree."""
|
|
48
|
+
return self.as_string(delim=" ", multiline=True)
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def data(self) -> Any:
|
|
52
|
+
""":return: This node's data."""
|
|
53
|
+
return self._data
|
|
54
|
+
|
|
55
|
+
@data.setter
|
|
56
|
+
def data(self, data: Any) -> None:
|
|
57
|
+
""":return: Set this node's data."""
|
|
58
|
+
self._data = data
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def children(self) -> Optional[List[Tree]]:
|
|
62
|
+
""":return: This node's children -- list of child nodes."""
|
|
63
|
+
return self._children
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def parent(self) -> Optional[Tree]:
|
|
67
|
+
""":return: This node's parent."""
|
|
68
|
+
return self._parent
|
|
69
|
+
|
|
70
|
+
@parent.setter
|
|
71
|
+
def parent(self, parent: Optional[Tree]) -> None:
|
|
72
|
+
""":return: Set this node's parent."""
|
|
73
|
+
self._parent = parent
|
|
74
|
+
|
|
75
|
+
@property
|
|
76
|
+
def root(self) -> Tree:
|
|
77
|
+
""":return: The root of this node's tree."""
|
|
78
|
+
root = self
|
|
79
|
+
while root.parent is not None:
|
|
80
|
+
root = root.parent
|
|
81
|
+
return root
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def sibnum(self) -> int:
|
|
85
|
+
""":return: This node's sibling number (0-based) among its parent's children"""
|
|
86
|
+
return self._parent.children.index(self) if self._parent is not None and self._parent.children is not None else 0
|
|
87
|
+
|
|
88
|
+
@property
|
|
89
|
+
def num_siblings(self) -> int:
|
|
90
|
+
""":return: Get the number of siblings (including self) of this node"""
|
|
91
|
+
return self._parent.num_children if self._parent is not None else 1
|
|
92
|
+
|
|
93
|
+
@property
|
|
94
|
+
def next_sibling(self) -> Optional[Tree]:
|
|
95
|
+
""":return: This node's next sibling (or None)"""
|
|
96
|
+
result = None
|
|
97
|
+
if self._parent and self._parent.children:
|
|
98
|
+
sibs = self._parent.children
|
|
99
|
+
nextsib = sibs.index(self) + 1
|
|
100
|
+
if nextsib < len(sibs):
|
|
101
|
+
result = sibs[nextsib]
|
|
102
|
+
return result
|
|
103
|
+
|
|
104
|
+
@property
|
|
105
|
+
def prev_sibling(self) -> Optional[Tree]:
|
|
106
|
+
""":return: This node's previous sibling (or None)"""
|
|
107
|
+
result = None
|
|
108
|
+
if self._parent and self._parent.children:
|
|
109
|
+
sibs = self._parent.children
|
|
110
|
+
prevsib = sibs.index(self) - 1
|
|
111
|
+
if prevsib >= 0:
|
|
112
|
+
result = sibs[prevsib]
|
|
113
|
+
return result
|
|
114
|
+
|
|
115
|
+
def has_children(self) -> bool:
|
|
116
|
+
""":return: Whether this node has children."""
|
|
117
|
+
return self._children is not None and len(self._children) > 0
|
|
118
|
+
|
|
119
|
+
@property
|
|
120
|
+
def num_children(self) -> int:
|
|
121
|
+
""":return: The number of children under this node."""
|
|
122
|
+
return len(self._children) if self._children is not None else 0
|
|
123
|
+
|
|
124
|
+
def has_parent(self) -> bool:
|
|
125
|
+
""":return: Whether this not has a parent.
|
|
126
|
+
|
|
127
|
+
Note that the "root" of a tree has no parent.
|
|
128
|
+
"""
|
|
129
|
+
return self._parent is not None
|
|
130
|
+
|
|
131
|
+
@property
|
|
132
|
+
def depth(self) -> int:
|
|
133
|
+
""":return: The depth of this node in its tree.
|
|
134
|
+
|
|
135
|
+
Where the depth is measured as the number of "hops" from the root,
|
|
136
|
+
whose depth is 0, to children until this node is reached.
|
|
137
|
+
"""
|
|
138
|
+
result = 0
|
|
139
|
+
curp = self.parent
|
|
140
|
+
while curp is not None:
|
|
141
|
+
curp = curp.parent
|
|
142
|
+
result += 1
|
|
143
|
+
return result
|
|
144
|
+
|
|
145
|
+
def add_child(self, node_or_data: Union[Tree, Any], child_pos: Optional[int] = None) -> Tree:
|
|
146
|
+
"""Add a child node to this node, pruning the child from any other tree.
|
|
147
|
+
|
|
148
|
+
:param node_or_data: The node (or data for a new node) to add
|
|
149
|
+
:param child_pos: The (optional) position at which to insert the node.
|
|
150
|
+
:return: the (passed or new) child_node
|
|
151
|
+
"""
|
|
152
|
+
if self._children is None:
|
|
153
|
+
self._children = []
|
|
154
|
+
if isinstance(node_or_data, Tree):
|
|
155
|
+
child = node_or_data
|
|
156
|
+
child.prune()
|
|
157
|
+
if child_pos is not None and child_pos < len(self._children) and child_pos >= 0:
|
|
158
|
+
self._children.insert(child_pos, child)
|
|
159
|
+
else:
|
|
160
|
+
self._children.append(child)
|
|
161
|
+
else:
|
|
162
|
+
child = Tree(node_or_data, self, child_pos=child_pos)
|
|
163
|
+
child.parent = self
|
|
164
|
+
return child
|
|
165
|
+
|
|
166
|
+
def add_edge(
|
|
167
|
+
self,
|
|
168
|
+
parent_node_or_data: Union[Tree, Any],
|
|
169
|
+
child_node_or_data: Union[Tree, Any],
|
|
170
|
+
) -> Tuple[Tree, Tree]:
|
|
171
|
+
"""Add the child to the parent, using an existing (matching) child or parent.
|
|
172
|
+
If the parent and child already exist, but not as parent and child, the
|
|
173
|
+
child node will be moved to be a child of the parent.
|
|
174
|
+
|
|
175
|
+
If neither the parent nor child nodes exist in this tree, the parent
|
|
176
|
+
will be added as a child of this (self) node.
|
|
177
|
+
|
|
178
|
+
:param parent_node_or_data: The parent node (or its data)
|
|
179
|
+
:param child_node_or_data: The child node (or its data)
|
|
180
|
+
:return: The (parent-node, child-node) tuple
|
|
181
|
+
"""
|
|
182
|
+
parent = None
|
|
183
|
+
child = None
|
|
184
|
+
|
|
185
|
+
if isinstance(parent_node_or_data, Tree):
|
|
186
|
+
parent = parent_node_or_data
|
|
187
|
+
# if it is not in this tree ...
|
|
188
|
+
if (
|
|
189
|
+
len(
|
|
190
|
+
self.find_nodes(lambda node: node == parent, include_self=True, only_first=True)
|
|
191
|
+
)
|
|
192
|
+
== 0
|
|
193
|
+
):
|
|
194
|
+
# ...then add it as a child of self
|
|
195
|
+
self.add_child(parent)
|
|
196
|
+
else:
|
|
197
|
+
# can we find the data in this tree ...
|
|
198
|
+
found = self.find_nodes(
|
|
199
|
+
lambda node: node.data == parent_node_or_data, include_self=True, only_first=True
|
|
200
|
+
)
|
|
201
|
+
if len(found) > 0:
|
|
202
|
+
parent = found[0]
|
|
203
|
+
else:
|
|
204
|
+
parent = self.add_child(parent_node_or_data)
|
|
205
|
+
|
|
206
|
+
if isinstance(child_node_or_data, Tree):
|
|
207
|
+
child = parent.add_child(child_node_or_data)
|
|
208
|
+
else:
|
|
209
|
+
# can we find the data in this tree ...
|
|
210
|
+
found = self.find_nodes(
|
|
211
|
+
lambda node: node.data == child_node_or_data, include_self=True, only_first=True
|
|
212
|
+
)
|
|
213
|
+
if len(found) > 0:
|
|
214
|
+
child = parent.add_child(found[0])
|
|
215
|
+
else:
|
|
216
|
+
child = parent.add_child(child_node_or_data)
|
|
217
|
+
|
|
218
|
+
return (parent, child)
|
|
219
|
+
|
|
220
|
+
def prune(self) -> Optional[Tree]:
|
|
221
|
+
"""Prune this node from its tree.
|
|
222
|
+
:return: this node's (former) parent.
|
|
223
|
+
"""
|
|
224
|
+
result = self._parent
|
|
225
|
+
if self._parent is not None:
|
|
226
|
+
if self._parent.children is not None:
|
|
227
|
+
self._parent.children.remove(self)
|
|
228
|
+
self._parent = None
|
|
229
|
+
return result
|
|
230
|
+
|
|
231
|
+
def find_nodes(
|
|
232
|
+
self,
|
|
233
|
+
accept_node_fn: Callable[[Tree], bool],
|
|
234
|
+
traversal: str = "dfs",
|
|
235
|
+
include_self: bool = True,
|
|
236
|
+
only_first: bool = False,
|
|
237
|
+
highest_only: bool = False,
|
|
238
|
+
) -> List[Tree]:
|
|
239
|
+
"""Find nodes where accept_node_fn(tree) is True,
|
|
240
|
+
using a traversal of:
|
|
241
|
+
'dfs' -- depth first search
|
|
242
|
+
'bfs' -- breadth first search
|
|
243
|
+
|
|
244
|
+
:param accept_node_fn: A function returning a boolean for any Tree
|
|
245
|
+
argument; True to select the node or False to skip it
|
|
246
|
+
:param traversal: Either 'dfs' or 'bfs' for depth- or breadth-first
|
|
247
|
+
:param include_self: True to consider this node, False to start with
|
|
248
|
+
its children
|
|
249
|
+
:param only_first: True to stop after finding the first match
|
|
250
|
+
:param highest_only: True to not collect any nodes under a selected node
|
|
251
|
+
:return: The list of matching/accepted nodes
|
|
252
|
+
"""
|
|
253
|
+
queue: Deque[Tree] = deque()
|
|
254
|
+
found: List[Tree] = []
|
|
255
|
+
if include_self:
|
|
256
|
+
queue.append(self)
|
|
257
|
+
elif self.children:
|
|
258
|
+
queue.extend(self.children)
|
|
259
|
+
while bool(queue): # true while length(queue) > 0
|
|
260
|
+
item = queue.popleft()
|
|
261
|
+
if accept_node_fn(item):
|
|
262
|
+
found.append(item)
|
|
263
|
+
if only_first:
|
|
264
|
+
break
|
|
265
|
+
elif highest_only:
|
|
266
|
+
continue
|
|
267
|
+
if item.children:
|
|
268
|
+
if traversal == "dfs":
|
|
269
|
+
queue.extendleft(reversed(item.children))
|
|
270
|
+
elif traversal == "bfs":
|
|
271
|
+
queue.extend(item.children)
|
|
272
|
+
return found
|
|
273
|
+
|
|
274
|
+
def collect_terminal_nodes(
|
|
275
|
+
self, accept_node_fn: Optional[Callable[[Tree], bool]] = None, _found: Optional[List[Tree]] = None
|
|
276
|
+
) -> List[Tree]:
|
|
277
|
+
"""Collect this tree's terminal nodes.
|
|
278
|
+
|
|
279
|
+
:param accept_node_fn: Optional function to select which terminal nodes
|
|
280
|
+
to include in the result
|
|
281
|
+
:param _found: The (optional) list to which to add results
|
|
282
|
+
:return: The list of collected nodes
|
|
283
|
+
"""
|
|
284
|
+
if _found is None:
|
|
285
|
+
_found = list()
|
|
286
|
+
if not self._children:
|
|
287
|
+
if accept_node_fn is None or accept_node_fn(self):
|
|
288
|
+
_found.append(self)
|
|
289
|
+
else:
|
|
290
|
+
for child in self._children:
|
|
291
|
+
child.collect_terminal_nodes(accept_node_fn=accept_node_fn, _found=_found)
|
|
292
|
+
return _found
|
|
293
|
+
|
|
294
|
+
def get_edges(
|
|
295
|
+
self,
|
|
296
|
+
traversal: str = "bfs",
|
|
297
|
+
include_self: bool = True,
|
|
298
|
+
as_data: bool = True,
|
|
299
|
+
) -> List[Tuple[Union[Tree, Any], Union[Tree, Any]]]:
|
|
300
|
+
"""Get the edges of this tree, either as Tree nodes or data.
|
|
301
|
+
|
|
302
|
+
:param traversal: Either 'dfs' or 'bfs' for depth- or breadth-first
|
|
303
|
+
:param include_self: True to include this node, False to start with
|
|
304
|
+
its children
|
|
305
|
+
:param as_data: If True, then collect node data instead of Tree nodes
|
|
306
|
+
:return: A list of (parent, child) tuples of edge nodes or data
|
|
307
|
+
"""
|
|
308
|
+
queue: Deque[Tree] = deque()
|
|
309
|
+
result: List[Tuple[Union[Tree, Any], Union[Tree, Any]]] = []
|
|
310
|
+
if self.children:
|
|
311
|
+
queue.extend(self.children)
|
|
312
|
+
while bool(queue): # true while length(queue) > 0
|
|
313
|
+
item = queue.popleft()
|
|
314
|
+
if item.parent:
|
|
315
|
+
if item.parent != self or include_self:
|
|
316
|
+
result.append((item.parent.data, item.data) if as_data else (item.parent, item))
|
|
317
|
+
if item.children:
|
|
318
|
+
if traversal == "dfs":
|
|
319
|
+
queue.extendleft(reversed(item.children))
|
|
320
|
+
elif traversal == "bfs":
|
|
321
|
+
queue.extend(item.children)
|
|
322
|
+
return result
|
|
323
|
+
|
|
324
|
+
def get_path(self) -> List[Tree]:
|
|
325
|
+
"""Get the nodes from the root to this node (inclusive)."""
|
|
326
|
+
path: Deque[Tree] = deque()
|
|
327
|
+
node: Optional[Tree] = self
|
|
328
|
+
while node is not None:
|
|
329
|
+
path.appendleft(node)
|
|
330
|
+
node = node.parent
|
|
331
|
+
return list(path)
|
|
332
|
+
|
|
333
|
+
def is_ancestor(self, other: Tree, self_is_ancestor: bool = False) -> bool:
|
|
334
|
+
"""Determine whether this node is an ancestor to the other.
|
|
335
|
+
:param other: The potential descendant of this node
|
|
336
|
+
:param self_is_ancestor: True if this node could be considered to
|
|
337
|
+
be its own ancestor
|
|
338
|
+
:return: True if this node is an ancestor of the other
|
|
339
|
+
"""
|
|
340
|
+
result = False
|
|
341
|
+
parent = other if self_is_ancestor else other.parent
|
|
342
|
+
while parent is not None:
|
|
343
|
+
if parent == self:
|
|
344
|
+
result = True
|
|
345
|
+
break
|
|
346
|
+
parent = parent.parent
|
|
347
|
+
return result
|
|
348
|
+
|
|
349
|
+
def find_deepest_common_ancestor(self, other: Optional[Tree]) -> Optional[Tree]:
|
|
350
|
+
"""Find the deepest common ancestor to self and other.
|
|
351
|
+
:param other: The other node whose shared ancestor with self to find
|
|
352
|
+
:return: The deepest common ancestor to self and other, or None
|
|
353
|
+
"""
|
|
354
|
+
if other is None:
|
|
355
|
+
return None
|
|
356
|
+
if self == other:
|
|
357
|
+
return self
|
|
358
|
+
result: Optional[Tree] = None
|
|
359
|
+
mypath, otherpath = self.get_path(), other.get_path()
|
|
360
|
+
mypathlen, otherpathlen = len(mypath), len(otherpath)
|
|
361
|
+
mypathidx, otherpathidx = 0, 0
|
|
362
|
+
while mypathidx < mypathlen and otherpathidx < otherpathlen:
|
|
363
|
+
mynode, othernode = mypath[mypathidx], otherpath[otherpathidx]
|
|
364
|
+
mypathidx += 1
|
|
365
|
+
otherpathidx += 1
|
|
366
|
+
if mynode != othernode:
|
|
367
|
+
break # diverged
|
|
368
|
+
else:
|
|
369
|
+
result = mynode
|
|
370
|
+
return result
|
|
371
|
+
|
|
372
|
+
def as_string(self, delim: str = " ", multiline: bool = False) -> str:
|
|
373
|
+
"""Get a string representing this tree.
|
|
374
|
+
:param delim: The (indentation) delimiter to use between node data
|
|
375
|
+
:param multiline: True to include newlines in the result
|
|
376
|
+
:param: A string representation of this tree and its descendants
|
|
377
|
+
"""
|
|
378
|
+
result = ""
|
|
379
|
+
if self._children:
|
|
380
|
+
btwn = "\n" if multiline else ""
|
|
381
|
+
result = "(" + str(self.data)
|
|
382
|
+
for child in self._children:
|
|
383
|
+
d = (child.depth if multiline else 1) * delim
|
|
384
|
+
result += btwn + d + child.as_string(delim=delim, multiline=multiline)
|
|
385
|
+
result += ")"
|
|
386
|
+
else:
|
|
387
|
+
result = str(self.data)
|
|
388
|
+
return result
|
|
389
|
+
|
|
390
|
+
def get_deepest_left(self) -> Tree:
|
|
391
|
+
""":return: The terminal descendent following the left-most branches
|
|
392
|
+
of this node.
|
|
393
|
+
"""
|
|
394
|
+
node = self
|
|
395
|
+
while node.has_children() and node.children is not None:
|
|
396
|
+
node = node.children[0]
|
|
397
|
+
return node
|
|
398
|
+
|
|
399
|
+
def get_deepest_right(self) -> Tree:
|
|
400
|
+
""":return: The terminal descendent following the right-most branches
|
|
401
|
+
of this node.
|
|
402
|
+
"""
|
|
403
|
+
node = self
|
|
404
|
+
while node.has_children() and node.children is not None:
|
|
405
|
+
node = node.children[-1]
|
|
406
|
+
return node
|
|
407
|
+
|
|
408
|
+
def build_dot(
|
|
409
|
+
self, node_name_fn: Optional[Callable[[Tree], str]] = None, **kwargs: Any
|
|
410
|
+
) -> graphviz.graphs.Digraph:
|
|
411
|
+
"""Build a graphviz dot file for this tree, passing kwargs to
|
|
412
|
+
graphviz.Digraph.
|
|
413
|
+
|
|
414
|
+
:param node_name_fn: A function to build a graph node name string
|
|
415
|
+
from a node. Default is str(node.data).
|
|
416
|
+
|
|
417
|
+
Example Usage:
|
|
418
|
+
dot = build_dot(name='Name', format='png', node_attr={'shape': 'none'})
|
|
419
|
+
print(dot.source) # e.g. to a .dot file
|
|
420
|
+
ipath = dot.render('/tmp/test/testimg', format='png') # to create an image file
|
|
421
|
+
Image(filename=ipath) # to display the image in jupyter
|
|
422
|
+
"""
|
|
423
|
+
if node_name_fn is None:
|
|
424
|
+
node_name_fn = lambda n: str(n.data)
|
|
425
|
+
dot = graphviz.Digraph(**kwargs)
|
|
426
|
+
ids = dict() # ids[node] -> id
|
|
427
|
+
for idx, node in enumerate(self.root.find_nodes(lambda _n: True, traversal="bfs")):
|
|
428
|
+
ids[node] = idx
|
|
429
|
+
dot.node(f"N_{idx:03}", node_name_fn(node))
|
|
430
|
+
for node1, node2 in self.get_edges(as_data=False):
|
|
431
|
+
idx1 = ids[node1]
|
|
432
|
+
idx2 = ids[node2]
|
|
433
|
+
dot.edge(f"N_{idx1:03}", f"N_{idx2:03}")
|
|
434
|
+
return dot
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
def build_tree_from_string(from_string: str) -> Tree:
|
|
438
|
+
"""Build a tree object from the given tree string, e.g., output from
|
|
439
|
+
the "Tree.as_string" method.
|
|
440
|
+
:param from_string: The tree string
|
|
441
|
+
:return: The built Tree
|
|
442
|
+
"""
|
|
443
|
+
if not from_string.strip().startswith("("):
|
|
444
|
+
return Tree(from_string)
|
|
445
|
+
data = OneOrMore(nestedExpr()).parseString(from_string)
|
|
446
|
+
return build_tree_from_list(data.as_list())
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
def build_tree_from_list(data: Union[Any, List]) -> Tree:
|
|
450
|
+
"""Auxiliary to build_tree for recursively building nodes from a list of
|
|
451
|
+
lists.
|
|
452
|
+
:param data: The tree data as a list of lists.
|
|
453
|
+
:return: The root tree node
|
|
454
|
+
"""
|
|
455
|
+
node = None
|
|
456
|
+
if isinstance(data, list) and len(data) > 0:
|
|
457
|
+
node = build_tree_from_list(data[0])
|
|
458
|
+
for cdata in data[1:]:
|
|
459
|
+
node.add_child(build_tree_from_list(cdata))
|
|
460
|
+
else: # e.g. if isinstance(data, str):
|
|
461
|
+
node = Tree(data)
|
|
462
|
+
return node
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dataknobs-structures
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Data structures for AI knowledge bases
|
|
5
|
+
Author-email: Spence Koehler <KoehlerSB747@gmail.com>
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Requires-Dist: dataknobs-common>=1.0.0
|
|
8
|
+
Requires-Dist: graphviz>=0.20.3
|
|
9
|
+
Requires-Dist: pandas>=2.2.3
|
|
10
|
+
Requires-Dist: pyparsing>=3.0.0
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
|
|
13
|
+
# dataknobs-structures
|
|
14
|
+
|
|
15
|
+
Data structures for AI knowledge bases.
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install dataknobs-structures
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## Features
|
|
24
|
+
|
|
25
|
+
- **ConditionalDict**: Dictionary with conditional value retrieval
|
|
26
|
+
- **Document**: Document representation with metadata
|
|
27
|
+
- **RecordStore**: Efficient record storage and retrieval
|
|
28
|
+
- **Tree**: Tree data structure with various traversal methods
|
|
29
|
+
|
|
30
|
+
## Usage
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
from dataknobs_structures import Tree, Document
|
|
34
|
+
|
|
35
|
+
# Create a tree structure
|
|
36
|
+
tree = Tree()
|
|
37
|
+
tree.add_node("root", "Root Node")
|
|
38
|
+
tree.add_node("child1", "Child 1", parent="root")
|
|
39
|
+
|
|
40
|
+
# Create a document
|
|
41
|
+
doc = Document(
|
|
42
|
+
content="Sample document content",
|
|
43
|
+
metadata={"author": "John Doe", "date": "2024-01-01"}
|
|
44
|
+
)
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## License
|
|
48
|
+
|
|
49
|
+
See LICENSE file in the root repository.
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
dataknobs_structures/__init__.py,sha256=4QTb5e2YVRQBIP_e-8Vq3eLJpZt3-QpEil_6s80iF-g,433
|
|
2
|
+
dataknobs_structures/conditional_dict.py,sha256=h9qnhyDkTZynpAmMZz7_1XE235I9gZ0BS9eh_h14CtU,2294
|
|
3
|
+
dataknobs_structures/document.py,sha256=LiQf0tyEYl0VdIGJ9vZlG2rkGWgJ867XbmrC4JshBTk,2182
|
|
4
|
+
dataknobs_structures/record_store.py,sha256=uPEmusrGPaHtiZF3fWCG_LAIUYoQeKYWAOG87rLmSmw,2931
|
|
5
|
+
dataknobs_structures/tree.py,sha256=13bJvCPn13YJhFcRHoYMuG217o4o3KbCdkYu-S4y4lw,17051
|
|
6
|
+
dataknobs_structures-1.0.0.dist-info/METADATA,sha256=wetQ-K-I8Ldp-6CHA0zbCdXeWP5OKJjkWk5vspo2Ygo,1134
|
|
7
|
+
dataknobs_structures-1.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
8
|
+
dataknobs_structures-1.0.0.dist-info/RECORD,,
|