henge 0.1.1__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- henge/__init__.py +1 -1
- henge/_version.py +1 -1
- henge/const.py +1 -1
- henge/deprecated.py +224 -0
- henge/henge.py +213 -354
- {henge-0.1.1.dist-info → henge-0.2.1.dist-info}/METADATA +6 -8
- henge-0.2.1.dist-info/RECORD +11 -0
- {henge-0.1.1.dist-info → henge-0.2.1.dist-info}/WHEEL +1 -1
- {henge-0.1.1.dist-info → henge-0.2.1.dist-info}/entry_points.txt +0 -1
- henge-0.1.1.dist-info/RECORD +0 -10
- {henge-0.1.1.dist-info → henge-0.2.1.dist-info}/LICENSE.txt +0 -0
- {henge-0.1.1.dist-info → henge-0.2.1.dist-info}/top_level.txt +0 -0
henge/__init__.py
CHANGED
|
@@ -5,6 +5,6 @@ from ._version import __version__
|
|
|
5
5
|
from .henge import *
|
|
6
6
|
|
|
7
7
|
__classes__ = ["Henge"]
|
|
8
|
-
__all__ = __classes__ + ["connect_mongo", "split_schema", "NotFoundException"]
|
|
8
|
+
__all__ = __classes__ + ["connect_mongo", "split_schema", "NotFoundException", "canonical_str"]
|
|
9
9
|
|
|
10
10
|
logmuse.init_logger("henge", propagate=True)
|
henge/_version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.2.0"
|
henge/const.py
CHANGED
henge/deprecated.py
ADDED
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
|
|
2
|
+
def retrieveOld(self, druid, reclimit=None, raw=False):
|
|
3
|
+
|
|
4
|
+
try:
|
|
5
|
+
item_type = self.database[druid + ITEM_TYPE]
|
|
6
|
+
except:
|
|
7
|
+
_LOGGER.debug(f"Item type not saved in database for {druid}")
|
|
8
|
+
raise NotFoundException(druid)
|
|
9
|
+
|
|
10
|
+
# _LOGGER.debug("item_type: {}".format(item_type))
|
|
11
|
+
# _LOGGER.debug("henge_to_query: {}".format(henge_to_query))
|
|
12
|
+
|
|
13
|
+
schema = self.schemas[item_type] #"type" in schema and
|
|
14
|
+
# string = druid
|
|
15
|
+
_LOGGER.debug("Got druid to retrieve: {} / item_type: {} / schema: {}".format(
|
|
16
|
+
druid, item_type, schema))
|
|
17
|
+
|
|
18
|
+
if schema["type"] == "array":
|
|
19
|
+
string = self.lookup(druid, item_type)
|
|
20
|
+
_LOGGER.debug("Lookup/array/Recursive: {}; Schema: {}".format(string, schema))
|
|
21
|
+
splitstr = string.split(DELIM_ITEM)
|
|
22
|
+
# if self.flexible_digests:
|
|
23
|
+
# pass
|
|
24
|
+
# item_name = splitstr.pop(0)
|
|
25
|
+
if isinstance(reclimit, int) and reclimit == 0:
|
|
26
|
+
return splitstr
|
|
27
|
+
if 'henge_class' in schema['items']:
|
|
28
|
+
_LOGGER.debug("Henge classed array: {}; Schema: {}".format(string, schema))
|
|
29
|
+
if isinstance(reclimit, int):
|
|
30
|
+
reclimit = reclimit - 1
|
|
31
|
+
return [self.retrieve(substr, reclimit) for substr in splitstr]
|
|
32
|
+
else:
|
|
33
|
+
return splitstr
|
|
34
|
+
elif schema["type"] == "object":
|
|
35
|
+
string = self.lookup(druid, item_type)
|
|
36
|
+
attr_array = string.split(DELIM_ATTR)
|
|
37
|
+
if self.flexible_digests:
|
|
38
|
+
keys = attr_array[::2] # evens
|
|
39
|
+
vals = attr_array[1::2] # odds
|
|
40
|
+
item_reconstituted = dict(zip(keys,vals))
|
|
41
|
+
else:
|
|
42
|
+
item_reconstituted = dict(zip(schema['properties'].keys(),
|
|
43
|
+
attr_array))
|
|
44
|
+
# I think this part needs to be removed... it's based on the
|
|
45
|
+
# previous 'recursive' for arrays, which went away...
|
|
46
|
+
# but actually these may be added in by me, so nevermind.
|
|
47
|
+
if 'recursive' in schema:
|
|
48
|
+
if isinstance(reclimit, int) and reclimit == 0:
|
|
49
|
+
_LOGGER.debug("Lookup/obj/Recursive: {}; Schema: {}".format(string, schema))
|
|
50
|
+
return item_reconstituted
|
|
51
|
+
else:
|
|
52
|
+
if isinstance(reclimit, int):
|
|
53
|
+
reclimit = reclimit - 1
|
|
54
|
+
for recursive_attr in schema['recursive']:
|
|
55
|
+
if recursive_attr in item_reconstituted \
|
|
56
|
+
and item_reconstituted[recursive_attr] != "":
|
|
57
|
+
item_reconstituted[recursive_attr] = self.retrieve(
|
|
58
|
+
item_reconstituted[recursive_attr],
|
|
59
|
+
reclimit,
|
|
60
|
+
raw)
|
|
61
|
+
return item_reconstituted
|
|
62
|
+
else: # It must be a primitive type
|
|
63
|
+
# but it could be a primitive (string) that represents something to lookup,
|
|
64
|
+
# or something not-to-lookup (or already looked up)
|
|
65
|
+
_LOGGER.debug("Lookup/prim: {}; Schema: {}".format(druid, schema))
|
|
66
|
+
# return string
|
|
67
|
+
if 'henge_class' in schema and self.schemas[schema['henge_class']]['type'] in ['object', 'array']:
|
|
68
|
+
if isinstance(reclimit, int) and reclimit == 0:
|
|
69
|
+
_LOGGER.debug("Lookup/prim/Recursive-skip: {}; Schema: {}".format(string, schema))
|
|
70
|
+
string = self.lookup(druid, item_type)
|
|
71
|
+
return string
|
|
72
|
+
else:
|
|
73
|
+
if isinstance(reclimit, int):
|
|
74
|
+
reclimit = reclimit - 1
|
|
75
|
+
_LOGGER.debug("Lookup/prim/Recursive: {}; Schema: {}".format(druid, schema))
|
|
76
|
+
return self.retrieve(druid, reclimit, raw)
|
|
77
|
+
else:
|
|
78
|
+
string = self.lookup(druid, item_type)
|
|
79
|
+
_LOGGER.debug("Lookup/prim/Non-recursive: {}; Schema: {}".format(string, schema))
|
|
80
|
+
return string #self.retrieve(string, reclimit, raw)
|
|
81
|
+
|
|
82
|
+
# try:
|
|
83
|
+
# string = henge_to_query.database[druid]
|
|
84
|
+
# except KeyError:
|
|
85
|
+
# raise NotFoundException(druid)
|
|
86
|
+
|
|
87
|
+
# return reconstruct_item(string, schema, reclimit)
|
|
88
|
+
|
|
89
|
+
def retrieve2(self, druid, reclimit=None, raw=False):
|
|
90
|
+
"""
|
|
91
|
+
Retrieve an item given a digest
|
|
92
|
+
|
|
93
|
+
:param str druid: The Decomposable recursive unique identifier (DRUID), or
|
|
94
|
+
digest that uniquely identifies that item to retrieve.
|
|
95
|
+
:param int reclimit: Recursion limit. Set to None for no limit (default).
|
|
96
|
+
:param bool raw: Return the value as a raw, henge-delimited string, instead
|
|
97
|
+
of processing into a mapping. Default: False.
|
|
98
|
+
"""
|
|
99
|
+
def reconstruct_item(string, schema, reclimit):
|
|
100
|
+
if "type" in schema and schema["type"] == "array":
|
|
101
|
+
_LOGGER.debug("Lookup/array/Recursive: {}; Schema: {}".format(string, schema))
|
|
102
|
+
splitstr = string.split(DELIM_ITEM)
|
|
103
|
+
# if self.flexible_digests:
|
|
104
|
+
# pass
|
|
105
|
+
# item_name = splitstr.pop(0)
|
|
106
|
+
if 'henge_class' in schema['items'] and schema['items']['type'] not in ["object", "array"]:
|
|
107
|
+
_LOGGER.debug("Henge classed array: {}; Schema: {}".format(string, schema))
|
|
108
|
+
return "ASDF"
|
|
109
|
+
return [reconstruct_item(self.henges[item_type].database[substr], schema["items"], reclimit)
|
|
110
|
+
for substr in splitstr]
|
|
111
|
+
else:
|
|
112
|
+
return [reconstruct_item(substr, schema["items"], reclimit)
|
|
113
|
+
for substr in splitstr]
|
|
114
|
+
elif schema["type"] == "object":
|
|
115
|
+
attr_array = string.split(DELIM_ATTR)
|
|
116
|
+
if self.flexible_digests:
|
|
117
|
+
keys = attr_array[::2] # evens
|
|
118
|
+
vals = attr_array[1::2] # odds
|
|
119
|
+
item_reconstituted = dict(zip(keys,vals))
|
|
120
|
+
else:
|
|
121
|
+
item_reconstituted = dict(zip(schema['properties'].keys(),
|
|
122
|
+
attr_array))
|
|
123
|
+
# I think this part needs to be removed... it's based on the
|
|
124
|
+
# previous 'recursive' for arrays, which went away...
|
|
125
|
+
# but actually these may be added in by me, so nevermind.
|
|
126
|
+
if 'recursive' in schema:
|
|
127
|
+
if isinstance(reclimit, int) and reclimit == 0:
|
|
128
|
+
_LOGGER.debug("Lookup/obj/Recursive: {}; Schema: {}".format(string, schema))
|
|
129
|
+
return item_reconstituted
|
|
130
|
+
else:
|
|
131
|
+
if isinstance(reclimit, int):
|
|
132
|
+
reclimit = reclimit - 1
|
|
133
|
+
for recursive_attr in schema['recursive']:
|
|
134
|
+
if item_reconstituted[recursive_attr] \
|
|
135
|
+
and item_reconstituted[recursive_attr] != "":
|
|
136
|
+
item_reconstituted[recursive_attr] = self.retrieve(
|
|
137
|
+
item_reconstituted[recursive_attr],
|
|
138
|
+
reclimit,
|
|
139
|
+
raw)
|
|
140
|
+
return item_reconstituted
|
|
141
|
+
else: # it must be a primitive
|
|
142
|
+
# but it could be a primitive (string) that represents something to lookup,
|
|
143
|
+
# or something not-to-lookup (or already looked up)
|
|
144
|
+
_LOGGER.debug("Lookup/prim: {}; Schema: {}".format(string, schema))
|
|
145
|
+
# return string
|
|
146
|
+
if 'henge_class' in schema and self.schemas[schema['henge_class']]['type'] in ['object', 'array']:
|
|
147
|
+
if isinstance(reclimit, int) and reclimit == 0:
|
|
148
|
+
_LOGGER.debug("Lookup/prim/Recursive-skip: {}; Schema: {}".format(string, schema))
|
|
149
|
+
return string
|
|
150
|
+
else:
|
|
151
|
+
if isinstance(reclimit, int):
|
|
152
|
+
reclimit = reclimit - 1
|
|
153
|
+
_LOGGER.debug("Lookup/prim/Recursive: {}; Schema: {}".format(string, schema))
|
|
154
|
+
return self.retrieve(string, reclimit, raw)
|
|
155
|
+
else:
|
|
156
|
+
_LOGGER.debug("Lookup/prim/Non-recursive: {}; Schema: {}".format(string, schema))
|
|
157
|
+
return string
|
|
158
|
+
|
|
159
|
+
# This requires the database to have __iter__ defined...and it scrolls through
|
|
160
|
+
# not a great way, take it out! 2021-01 NS
|
|
161
|
+
# I'll instead do a try block
|
|
162
|
+
# if not druid + ITEM_TYPE in self.database:
|
|
163
|
+
# raise NotFoundException(druid)
|
|
164
|
+
|
|
165
|
+
try:
|
|
166
|
+
item_type = self.database[druid + ITEM_TYPE]
|
|
167
|
+
except:
|
|
168
|
+
_LOGGER.debug(f"Item type not saved in database for {druid}")
|
|
169
|
+
raise NotFoundException(druid)
|
|
170
|
+
|
|
171
|
+
try:
|
|
172
|
+
henge_to_query = self.henges[item_type]
|
|
173
|
+
except:
|
|
174
|
+
_LOGGER.debug("No henges available for this item type")
|
|
175
|
+
raise NotFoundException(druid)
|
|
176
|
+
# _LOGGER.debug("item_type: {}".format(item_type))
|
|
177
|
+
# _LOGGER.debug("henge_to_query: {}".format(henge_to_query))
|
|
178
|
+
try:
|
|
179
|
+
string = henge_to_query.database[druid]
|
|
180
|
+
except KeyError:
|
|
181
|
+
raise NotFoundException(druid)
|
|
182
|
+
|
|
183
|
+
schema = self.schemas[item_type]
|
|
184
|
+
_LOGGER.debug("Got druid to retrieve: {} / item_type: {} / schema: {}".format(
|
|
185
|
+
druid, item_type, schema))
|
|
186
|
+
return reconstruct_item(string, schema, reclimit)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
Part of: _insert_flat
|
|
191
|
+
|
|
192
|
+
def safestr(item, x):
|
|
193
|
+
try:
|
|
194
|
+
return str(item[x])
|
|
195
|
+
except (ValueError, TypeError, KeyError):
|
|
196
|
+
return ""
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def build_attr_string(item, schema, item_name=None):
|
|
201
|
+
if "type" in schema and schema["type"] == "array":
|
|
202
|
+
if self.flexible_digests:
|
|
203
|
+
return DELIM_ITEM.join([build_attr_string(x, schema['items'])
|
|
204
|
+
for x in item])
|
|
205
|
+
else:
|
|
206
|
+
return DELIM_ITEM.join([build_attr_string(x, schema['items'])
|
|
207
|
+
for x in item])
|
|
208
|
+
elif schema["type"] == "object" and 'properties' in schema:
|
|
209
|
+
if self.flexible_digests:
|
|
210
|
+
# flexible schema
|
|
211
|
+
keys_to_include = sorted([x for x in item.keys() if x in list(schema['properties'].keys())])
|
|
212
|
+
return DELIM_ATTR.join([DELIM_ATTR.join([k, safestr(item, k)]) for k in keys_to_include])
|
|
213
|
+
|
|
214
|
+
else:
|
|
215
|
+
# fixed schema
|
|
216
|
+
return DELIM_ATTR.join([safestr(item, x) for x in
|
|
217
|
+
list(schema['properties'].keys())])
|
|
218
|
+
else: #assume it's a primitive
|
|
219
|
+
if self.flexible_digests:
|
|
220
|
+
return item
|
|
221
|
+
attr_string = DELIM_ATTR.join([item_name, item])
|
|
222
|
+
return attr_string
|
|
223
|
+
else:
|
|
224
|
+
return item
|
henge/henge.py
CHANGED
|
@@ -4,7 +4,7 @@ import copy
|
|
|
4
4
|
import hashlib
|
|
5
5
|
import jsonschema
|
|
6
6
|
import logging
|
|
7
|
-
import
|
|
7
|
+
import json
|
|
8
8
|
import os
|
|
9
9
|
import sys
|
|
10
10
|
import yacman
|
|
@@ -17,10 +17,13 @@ from .const import *
|
|
|
17
17
|
|
|
18
18
|
_LOGGER = logging.getLogger(__name__)
|
|
19
19
|
|
|
20
|
+
|
|
20
21
|
class NotFoundException(Exception):
|
|
21
22
|
"""Raised when a digest is not found"""
|
|
23
|
+
|
|
22
24
|
def __init__(self, m):
|
|
23
25
|
self.message = "{} not found in database".format(m)
|
|
26
|
+
|
|
24
27
|
def __str__(self):
|
|
25
28
|
return self.message
|
|
26
29
|
|
|
@@ -28,27 +31,32 @@ class NotFoundException(Exception):
|
|
|
28
31
|
def md5(seq):
|
|
29
32
|
return hashlib.md5(seq.encode()).hexdigest()
|
|
30
33
|
|
|
34
|
+
|
|
31
35
|
def is_url(maybe_url):
|
|
32
36
|
from urllib.parse import urlparse
|
|
33
|
-
|
|
37
|
+
|
|
38
|
+
return " " not in maybe_url and urlparse(maybe_url).scheme != ""
|
|
39
|
+
|
|
34
40
|
|
|
35
41
|
def read_url(url):
|
|
36
42
|
_LOGGER.info("Reading URL: {}".format(url))
|
|
37
43
|
from urllib.request import urlopen
|
|
38
44
|
from urllib.error import HTTPError
|
|
45
|
+
|
|
39
46
|
try:
|
|
40
47
|
response = urlopen(url)
|
|
41
48
|
except HTTPError as e:
|
|
42
49
|
raise e
|
|
43
|
-
data = response.read()
|
|
44
|
-
text = data.decode(
|
|
50
|
+
data = response.read() # a `bytes` object
|
|
51
|
+
text = data.decode("utf-8")
|
|
45
52
|
print(text)
|
|
46
53
|
return yaml.safe_load(text)
|
|
47
54
|
|
|
48
55
|
|
|
49
56
|
class Henge(object):
|
|
50
|
-
def __init__(
|
|
51
|
-
checksum_function=md5
|
|
57
|
+
def __init__(
|
|
58
|
+
self, database, schemas, schemas_str=[], henges=None, checksum_function=md5
|
|
59
|
+
):
|
|
52
60
|
"""
|
|
53
61
|
A user interface to insert and retrieve decomposable recursive unique
|
|
54
62
|
identifiers (DRUIDs).
|
|
@@ -67,7 +75,7 @@ class Henge(object):
|
|
|
67
75
|
self.checksum_function = checksum_function
|
|
68
76
|
self.digest_version = "md5"
|
|
69
77
|
self.flexible_digests = True
|
|
70
|
-
|
|
78
|
+
self.supports_inherent_attrs = True
|
|
71
79
|
|
|
72
80
|
# TODO: Right now you can pass a file, or a URL, or some yaml directly
|
|
73
81
|
# into the schemas param. I want to split that out so that at least the
|
|
@@ -83,7 +91,9 @@ class Henge(object):
|
|
|
83
91
|
else:
|
|
84
92
|
populated_schemas = []
|
|
85
93
|
if isinstance(schemas, str):
|
|
86
|
-
_LOGGER.error(
|
|
94
|
+
_LOGGER.error(
|
|
95
|
+
"The schemas should be a list. Please pass a list of schemas"
|
|
96
|
+
)
|
|
87
97
|
schemas = [schemas]
|
|
88
98
|
for schema_value in schemas:
|
|
89
99
|
if isinstance(schema_value, str):
|
|
@@ -91,8 +101,10 @@ class Henge(object):
|
|
|
91
101
|
populated_schemas.append(yacman.load_yaml(schema_value))
|
|
92
102
|
elif is_url(schema_value):
|
|
93
103
|
populated_schemas.append(read_url(schema_value))
|
|
94
|
-
else
|
|
95
|
-
_LOGGER.error(
|
|
104
|
+
else:
|
|
105
|
+
_LOGGER.error(
|
|
106
|
+
f"Schema file not found: {schema_value}. Use schemas_str if you meant to specify a direct schema"
|
|
107
|
+
)
|
|
96
108
|
# populated_schemas.append(yaml.safe_load(schema_value))
|
|
97
109
|
|
|
98
110
|
for schema_value in schemas_str:
|
|
@@ -120,206 +132,77 @@ class Henge(object):
|
|
|
120
132
|
self.schemas[item_type] = henge.schemas[item_type]
|
|
121
133
|
self.henges[item_type] = henge
|
|
122
134
|
|
|
123
|
-
|
|
124
135
|
def retrieve(self, druid, reclimit=None, raw=False):
|
|
136
|
+
"""
|
|
137
|
+
Retrieve an item given a digest
|
|
125
138
|
|
|
126
|
-
|
|
139
|
+
:param str druid: The Decomposable recursive unique identifier (DRUID), or
|
|
140
|
+
digest that uniquely identifies that item to retrieve.
|
|
141
|
+
:param int reclimit: Recursion limit. Set to None for no limit (default).
|
|
142
|
+
:param bool raw: Return the value as a raw, henge-delimited string, instead
|
|
143
|
+
of processing into a mapping. Default: False.
|
|
144
|
+
"""
|
|
145
|
+
try:
|
|
127
146
|
item_type = self.database[druid + ITEM_TYPE]
|
|
128
|
-
except:
|
|
129
|
-
_LOGGER.debug(f"Item type not saved in database for {druid}")
|
|
147
|
+
except KeyError:
|
|
130
148
|
raise NotFoundException(druid)
|
|
131
149
|
|
|
132
|
-
|
|
133
|
-
|
|
150
|
+
digested_string = self.lookup(druid, item_type)
|
|
151
|
+
reconstructed_item = json.loads(digested_string)
|
|
134
152
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
153
|
+
external_string = self.database[druid + "_external_string"]
|
|
154
|
+
if external_string != "null":
|
|
155
|
+
external_values = json.loads(external_string)
|
|
156
|
+
reconstructed_item.update(external_values)
|
|
157
|
+
|
|
158
|
+
schema = self.schemas[item_type]
|
|
139
159
|
|
|
140
160
|
if schema["type"] == "array":
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
if 'henge_class' in schema['items']:
|
|
150
|
-
_LOGGER.debug("Henge classed array: {}; Schema: {}".format(string, schema))
|
|
161
|
+
if isinstance(reclimit, int) and reclimit == 0:
|
|
162
|
+
return reconstructed_item
|
|
163
|
+
if "henge_class" in schema["items"]:
|
|
164
|
+
_LOGGER.debug(
|
|
165
|
+
"Henge classed array: {}; Schema: {}".format(
|
|
166
|
+
digested_string, schema
|
|
167
|
+
)
|
|
168
|
+
)
|
|
151
169
|
if isinstance(reclimit, int):
|
|
152
|
-
reclimit = reclimit - 1
|
|
153
|
-
return [self.retrieve(
|
|
154
|
-
else:
|
|
155
|
-
return splitstr
|
|
170
|
+
reclimit = reclimit - 1
|
|
171
|
+
return [self.retrieve(item, reclimit) for item in reconstructed_item]
|
|
156
172
|
elif schema["type"] == "object":
|
|
157
|
-
|
|
158
|
-
attr_array = string.split(DELIM_ATTR)
|
|
159
|
-
if self.flexible_digests:
|
|
160
|
-
keys = attr_array[::2] # evens
|
|
161
|
-
vals = attr_array[1::2] # odds
|
|
162
|
-
item_reconstituted = dict(zip(keys,vals))
|
|
163
|
-
else:
|
|
164
|
-
item_reconstituted = dict(zip(schema['properties'].keys(),
|
|
165
|
-
attr_array))
|
|
166
|
-
# I think this part needs to be removed... it's based on the
|
|
167
|
-
# previous 'recursive' for arrays, which went away...
|
|
168
|
-
# but actually these may be added in by me, so nevermind.
|
|
169
|
-
if 'recursive' in schema:
|
|
173
|
+
if "recursive" in schema:
|
|
170
174
|
if isinstance(reclimit, int) and reclimit == 0:
|
|
171
|
-
_LOGGER.debug(
|
|
172
|
-
|
|
175
|
+
_LOGGER.debug(
|
|
176
|
+
"Lookup/obj/Recursive: {}; Schema: {}".format(
|
|
177
|
+
digested_string, schema
|
|
178
|
+
)
|
|
179
|
+
)
|
|
180
|
+
return reconstructed_item
|
|
173
181
|
else:
|
|
174
182
|
if isinstance(reclimit, int):
|
|
175
183
|
reclimit = reclimit - 1
|
|
176
|
-
for recursive_attr in schema[
|
|
177
|
-
if
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
raw
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
# but it could be a primitive (string) that represents something to lookup,
|
|
186
|
-
# or something not-to-lookup (or already looked up)
|
|
187
|
-
_LOGGER.debug("Lookup/prim: {}; Schema: {}".format(druid, schema))
|
|
188
|
-
# return string
|
|
189
|
-
if 'henge_class' in schema and self.schemas[schema['henge_class']]['type'] in ['object', 'array']:
|
|
190
|
-
if isinstance(reclimit, int) and reclimit == 0:
|
|
191
|
-
_LOGGER.debug("Lookup/prim/Recursive-skip: {}; Schema: {}".format(string, schema))
|
|
192
|
-
string = self.lookup(druid, item_type)
|
|
193
|
-
return string
|
|
194
|
-
else:
|
|
195
|
-
if isinstance(reclimit, int):
|
|
196
|
-
reclimit = reclimit - 1
|
|
197
|
-
_LOGGER.debug("Lookup/prim/Recursive: {}; Schema: {}".format(druid, schema))
|
|
198
|
-
return self.retrieve(druid, reclimit, raw)
|
|
199
|
-
else:
|
|
200
|
-
string = self.lookup(druid, item_type)
|
|
201
|
-
_LOGGER.debug("Lookup/prim/Non-recursive: {}; Schema: {}".format(string, schema))
|
|
202
|
-
return string #self.retrieve(string, reclimit, raw)
|
|
203
|
-
|
|
204
|
-
# try:
|
|
205
|
-
# string = henge_to_query.database[druid]
|
|
206
|
-
# except KeyError:
|
|
207
|
-
# raise NotFoundException(druid)
|
|
208
|
-
|
|
209
|
-
# return reconstruct_item(string, schema, reclimit)
|
|
210
|
-
|
|
184
|
+
for recursive_attr in schema["recursive"]:
|
|
185
|
+
if (
|
|
186
|
+
recursive_attr in reconstructed_item
|
|
187
|
+
and reconstructed_item[recursive_attr] != ""
|
|
188
|
+
):
|
|
189
|
+
reconstructed_item[recursive_attr] = self.retrieve(
|
|
190
|
+
reconstructed_item[recursive_attr], reclimit, raw
|
|
191
|
+
)
|
|
192
|
+
return reconstructed_item
|
|
211
193
|
|
|
212
194
|
def lookup(self, druid, item_type):
|
|
213
195
|
try:
|
|
214
196
|
henge_to_query = self.henges[item_type]
|
|
215
197
|
except:
|
|
216
198
|
_LOGGER.debug("No henges available for this item type")
|
|
217
|
-
raise NotFoundException(druid)
|
|
218
|
-
try:
|
|
219
|
-
string = henge_to_query.database[druid]
|
|
220
|
-
except KeyError:
|
|
221
199
|
raise NotFoundException(druid)
|
|
222
|
-
|
|
223
|
-
return string
|
|
224
|
-
|
|
225
|
-
def retrieve2(self, druid, reclimit=None, raw=False):
|
|
226
|
-
"""
|
|
227
|
-
Retrieve an item given a digest
|
|
228
|
-
|
|
229
|
-
:param str druid: The Decomposable recursive unique identifier (DRUID), or
|
|
230
|
-
digest that uniquely identifies that item to retrieve.
|
|
231
|
-
:param int reclimit: Recursion limit. Set to None for no limit (default).
|
|
232
|
-
:param bool raw: Return the value as a raw, henge-delimited string, instead
|
|
233
|
-
of processing into a mapping. Default: False.
|
|
234
|
-
"""
|
|
235
|
-
def reconstruct_item(string, schema, reclimit):
|
|
236
|
-
if "type" in schema and schema["type"] == "array":
|
|
237
|
-
_LOGGER.debug("Lookup/array/Recursive: {}; Schema: {}".format(string, schema))
|
|
238
|
-
splitstr = string.split(DELIM_ITEM)
|
|
239
|
-
# if self.flexible_digests:
|
|
240
|
-
# pass
|
|
241
|
-
# item_name = splitstr.pop(0)
|
|
242
|
-
if 'henge_class' in schema['items'] and schema['items']['type'] not in ["object", "array"]:
|
|
243
|
-
_LOGGER.debug("Henge classed array: {}; Schema: {}".format(string, schema))
|
|
244
|
-
return "ASDF"
|
|
245
|
-
return [reconstruct_item(self.henges[item_type].database[substr], schema["items"], reclimit)
|
|
246
|
-
for substr in splitstr]
|
|
247
|
-
else:
|
|
248
|
-
return [reconstruct_item(substr, schema["items"], reclimit)
|
|
249
|
-
for substr in splitstr]
|
|
250
|
-
elif schema["type"] == "object":
|
|
251
|
-
attr_array = string.split(DELIM_ATTR)
|
|
252
|
-
if self.flexible_digests:
|
|
253
|
-
keys = attr_array[::2] # evens
|
|
254
|
-
vals = attr_array[1::2] # odds
|
|
255
|
-
item_reconstituted = dict(zip(keys,vals))
|
|
256
|
-
else:
|
|
257
|
-
item_reconstituted = dict(zip(schema['properties'].keys(),
|
|
258
|
-
attr_array))
|
|
259
|
-
# I think this part needs to be removed... it's based on the
|
|
260
|
-
# previous 'recursive' for arrays, which went away...
|
|
261
|
-
# but actually these may be added in by me, so nevermind.
|
|
262
|
-
if 'recursive' in schema:
|
|
263
|
-
if isinstance(reclimit, int) and reclimit == 0:
|
|
264
|
-
_LOGGER.debug("Lookup/obj/Recursive: {}; Schema: {}".format(string, schema))
|
|
265
|
-
return item_reconstituted
|
|
266
|
-
else:
|
|
267
|
-
if isinstance(reclimit, int):
|
|
268
|
-
reclimit = reclimit - 1
|
|
269
|
-
for recursive_attr in schema['recursive']:
|
|
270
|
-
if item_reconstituted[recursive_attr] \
|
|
271
|
-
and item_reconstituted[recursive_attr] != "":
|
|
272
|
-
item_reconstituted[recursive_attr] = self.retrieve(
|
|
273
|
-
item_reconstituted[recursive_attr],
|
|
274
|
-
reclimit,
|
|
275
|
-
raw)
|
|
276
|
-
return item_reconstituted
|
|
277
|
-
else: # it must be a primitive
|
|
278
|
-
# but it could be a primitive (string) that represents something to lookup,
|
|
279
|
-
# or something not-to-lookup (or already looked up)
|
|
280
|
-
_LOGGER.debug("Lookup/prim: {}; Schema: {}".format(string, schema))
|
|
281
|
-
# return string
|
|
282
|
-
if 'henge_class' in schema and self.schemas[schema['henge_class']]['type'] in ['object', 'array']:
|
|
283
|
-
if isinstance(reclimit, int) and reclimit == 0:
|
|
284
|
-
_LOGGER.debug("Lookup/prim/Recursive-skip: {}; Schema: {}".format(string, schema))
|
|
285
|
-
return string
|
|
286
|
-
else:
|
|
287
|
-
if isinstance(reclimit, int):
|
|
288
|
-
reclimit = reclimit - 1
|
|
289
|
-
_LOGGER.debug("Lookup/prim/Recursive: {}; Schema: {}".format(string, schema))
|
|
290
|
-
return self.retrieve(string, reclimit, raw)
|
|
291
|
-
else:
|
|
292
|
-
_LOGGER.debug("Lookup/prim/Non-recursive: {}; Schema: {}".format(string, schema))
|
|
293
|
-
return string
|
|
294
|
-
|
|
295
|
-
# This requires the database to have __iter__ defined...and it scrolls through
|
|
296
|
-
# not a great way, take it out! 2021-01 NS
|
|
297
|
-
# I'll instead do a try block
|
|
298
|
-
# if not druid + ITEM_TYPE in self.database:
|
|
299
|
-
# raise NotFoundException(druid)
|
|
300
|
-
|
|
301
|
-
try:
|
|
302
|
-
item_type = self.database[druid + ITEM_TYPE]
|
|
303
|
-
except:
|
|
304
|
-
_LOGGER.debug(f"Item type not saved in database for {druid}")
|
|
305
|
-
raise NotFoundException(druid)
|
|
306
|
-
|
|
307
|
-
try:
|
|
308
|
-
henge_to_query = self.henges[item_type]
|
|
309
|
-
except:
|
|
310
|
-
_LOGGER.debug("No henges available for this item type")
|
|
311
|
-
raise NotFoundException(druid)
|
|
312
|
-
# _LOGGER.debug("item_type: {}".format(item_type))
|
|
313
|
-
# _LOGGER.debug("henge_to_query: {}".format(henge_to_query))
|
|
314
200
|
try:
|
|
315
201
|
string = henge_to_query.database[druid]
|
|
316
202
|
except KeyError:
|
|
317
203
|
raise NotFoundException(druid)
|
|
318
204
|
|
|
319
|
-
|
|
320
|
-
_LOGGER.debug("Got druid to retrieve: {} / item_type: {} / schema: {}".format(
|
|
321
|
-
druid, item_type, schema))
|
|
322
|
-
return reconstruct_item(string, schema, reclimit)
|
|
205
|
+
return string
|
|
323
206
|
|
|
324
207
|
@property
|
|
325
208
|
def item_types(self):
|
|
@@ -356,20 +239,19 @@ class Henge(object):
|
|
|
356
239
|
fits.
|
|
357
240
|
"""
|
|
358
241
|
|
|
359
|
-
|
|
360
242
|
_LOGGER.debug("Insert type: {} / Item: {}".format(item_type, item))
|
|
361
|
-
|
|
243
|
+
|
|
362
244
|
if item_type not in self.schemas.keys():
|
|
363
|
-
_LOGGER.error(
|
|
364
|
-
|
|
365
|
-
|
|
245
|
+
_LOGGER.error(
|
|
246
|
+
"I don't know about items of type '{}'. "
|
|
247
|
+
"I know of: '{}'".format(item_type, list(self.schemas.keys()))
|
|
248
|
+
)
|
|
366
249
|
return False
|
|
367
250
|
|
|
368
251
|
schema = self.schemas[item_type]
|
|
369
252
|
|
|
370
|
-
|
|
371
253
|
flat_item = item
|
|
372
|
-
if schema[
|
|
254
|
+
if schema["type"] == "object":
|
|
373
255
|
flat_item = {}
|
|
374
256
|
if isinstance(reclimit, int) and reclimit == 0:
|
|
375
257
|
return self._insert_flat(item, item_type)
|
|
@@ -378,56 +260,63 @@ class Henge(object):
|
|
|
378
260
|
reclimit = reclimit - 1
|
|
379
261
|
for prop in item:
|
|
380
262
|
if prop in schema["properties"]:
|
|
381
|
-
_LOGGER.debug(
|
|
263
|
+
_LOGGER.debug(
|
|
264
|
+
"-Prop {}; Schema: {}".format(
|
|
265
|
+
prop, str(schema["properties"][prop])
|
|
266
|
+
)
|
|
267
|
+
)
|
|
382
268
|
if "recursive" in schema and prop in schema["recursive"]:
|
|
383
269
|
hclass = schema["properties"][prop]["henge_class"]
|
|
384
270
|
digest = self.insert(item[prop], hclass, reclimit)
|
|
385
271
|
flat_item[prop] = digest
|
|
386
|
-
elif schema["properties"][prop]["type"] in [
|
|
272
|
+
elif schema["properties"][prop]["type"] in ["array"]:
|
|
387
273
|
digest = self.insert(item[prop], "array", reclimit)
|
|
388
274
|
flat_item[prop] = digest
|
|
389
275
|
else:
|
|
390
276
|
flat_item[prop] = item[prop]
|
|
391
|
-
_LOGGER.debug(
|
|
277
|
+
_LOGGER.debug(
|
|
278
|
+
"Prop: {}; Flat item: {}".format(prop, flat_item[prop])
|
|
279
|
+
)
|
|
392
280
|
else:
|
|
281
|
+
_LOGGER.debug(f"Prop: {prop}. Ignoring due to not in schema")
|
|
393
282
|
pass # Ignore non-schema defined properties
|
|
394
283
|
|
|
395
284
|
# if len(flat_item) == 0:
|
|
396
285
|
# flat_item = item
|
|
397
|
-
elif schema[
|
|
286
|
+
elif schema["type"] == "array":
|
|
398
287
|
flat_item = []
|
|
399
|
-
if
|
|
288
|
+
if "henge_class" in schema["items"]:
|
|
400
289
|
digest = []
|
|
401
|
-
hclass = schema[
|
|
290
|
+
hclass = schema["items"]["henge_class"]
|
|
402
291
|
if isinstance(reclimit, int) and reclimit == 0:
|
|
403
292
|
return self._insert_flat(item, item_type)
|
|
404
293
|
else:
|
|
405
294
|
if isinstance(reclimit, int):
|
|
406
|
-
reclimit = reclimit - 1
|
|
407
|
-
_LOGGER.debug(
|
|
295
|
+
reclimit = reclimit - 1
|
|
296
|
+
_LOGGER.debug(
|
|
297
|
+
"Item: {}. Pyclass: {}. hclass: {}".format(
|
|
298
|
+
item, type(item), hclass
|
|
299
|
+
)
|
|
300
|
+
)
|
|
408
301
|
for element in item:
|
|
409
302
|
digest.append(self.insert(element, hclass, reclimit))
|
|
410
303
|
flat_item = digest
|
|
411
304
|
else:
|
|
412
305
|
flat_item = item
|
|
413
306
|
_LOGGER.debug("Array flat item: {}".format(flat_item))
|
|
414
|
-
else:
|
|
307
|
+
else: # A primitive type with a henge class
|
|
415
308
|
_LOGGER.debug("Nice! You're using a henge-classed primitive type!")
|
|
416
309
|
hclass = schema["henge_class"]
|
|
417
310
|
# digest = self.insert(item, hclass)
|
|
418
311
|
flat_item = item
|
|
419
312
|
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
313
|
return self._insert_flat(flat_item, item_type)
|
|
424
314
|
|
|
425
|
-
|
|
426
315
|
def _insert_flat(self, item, item_type=None, item_name=None):
|
|
427
316
|
"""
|
|
428
317
|
Add flattened items (of a specified type) to the database.
|
|
429
318
|
|
|
430
|
-
Flattened items have removed all levels, so it's only attributes and
|
|
319
|
+
Flattened items have removed all levels, so it's only attributes and
|
|
431
320
|
strict values; no nesting allowed. Use the upstream insert function
|
|
432
321
|
to insert full structured objects, which calls this function.
|
|
433
322
|
|
|
@@ -438,9 +327,10 @@ class Henge(object):
|
|
|
438
327
|
fits.
|
|
439
328
|
"""
|
|
440
329
|
if item_type not in self.schemas.keys():
|
|
441
|
-
_LOGGER.error(
|
|
442
|
-
|
|
443
|
-
|
|
330
|
+
_LOGGER.error(
|
|
331
|
+
"I don't know about items of type '{}'. "
|
|
332
|
+
"I know of: '{}'".format(item_type, list(self.schemas.keys()))
|
|
333
|
+
)
|
|
444
334
|
return False
|
|
445
335
|
|
|
446
336
|
# digest_version should be automatically appended to the item by the
|
|
@@ -449,45 +339,16 @@ class Henge(object):
|
|
|
449
339
|
# jsonschema do this automatically?
|
|
450
340
|
# also item_type ?
|
|
451
341
|
|
|
452
|
-
def safestr(item, x):
|
|
453
|
-
try:
|
|
454
|
-
return str(item[x])
|
|
455
|
-
except (ValueError, TypeError, KeyError):
|
|
456
|
-
return ""
|
|
457
|
-
|
|
458
|
-
def build_attr_string(item, schema, item_name=None):
|
|
459
|
-
if "type" in schema and schema["type"] == "array":
|
|
460
|
-
if self.flexible_digests:
|
|
461
|
-
return DELIM_ITEM.join([build_attr_string(x, schema['items'])
|
|
462
|
-
for x in item])
|
|
463
|
-
else:
|
|
464
|
-
return DELIM_ITEM.join([build_attr_string(x, schema['items'])
|
|
465
|
-
for x in item])
|
|
466
|
-
elif schema["type"] == "object" and 'properties' in schema:
|
|
467
|
-
if self.flexible_digests:
|
|
468
|
-
# flexible schema
|
|
469
|
-
keys_to_include = sorted([x for x in item.keys() if x in list(schema['properties'].keys())])
|
|
470
|
-
return DELIM_ATTR.join([DELIM_ATTR.join([k, safestr(item, k)]) for k in keys_to_include])
|
|
471
|
-
|
|
472
|
-
else:
|
|
473
|
-
# fixed schema
|
|
474
|
-
return DELIM_ATTR.join([safestr(item, x) for x in
|
|
475
|
-
list(schema['properties'].keys())])
|
|
476
|
-
else: #assume it's a primitive
|
|
477
|
-
if self.flexible_digests:
|
|
478
|
-
return item
|
|
479
|
-
attr_string = DELIM_ATTR.join([item_name, item])
|
|
480
|
-
return attr_string
|
|
481
|
-
else:
|
|
482
|
-
return item
|
|
483
|
-
|
|
484
342
|
valid_schema = self.schemas[item_type]
|
|
485
343
|
# Add defaults here ?
|
|
486
|
-
try:
|
|
344
|
+
try:
|
|
487
345
|
jsonschema.validate(item, valid_schema)
|
|
488
346
|
except jsonschema.ValidationError as e:
|
|
489
|
-
_LOGGER.error(
|
|
490
|
-
format(
|
|
347
|
+
_LOGGER.error(
|
|
348
|
+
"Not valid data. Item type: {}. Attempting to insert item: {}".format(
|
|
349
|
+
item_type, item
|
|
350
|
+
)
|
|
351
|
+
)
|
|
491
352
|
print(e)
|
|
492
353
|
|
|
493
354
|
if isinstance(item, str):
|
|
@@ -495,7 +356,9 @@ class Henge(object):
|
|
|
495
356
|
try:
|
|
496
357
|
existing_item_type = henge_to_query.database[item + ITEM_TYPE]
|
|
497
358
|
except KeyError:
|
|
498
|
-
_LOGGER.error(
|
|
359
|
+
_LOGGER.error(
|
|
360
|
+
"If you're trying to insert an item with druids, the sub-items must exist in the database."
|
|
361
|
+
)
|
|
499
362
|
# return None
|
|
500
363
|
try:
|
|
501
364
|
existing_item = henge_to_query.database[item]
|
|
@@ -503,23 +366,32 @@ class Henge(object):
|
|
|
503
366
|
_LOGGER.error("That item wasn't in the database.")
|
|
504
367
|
|
|
505
368
|
# if (item_type == existing_item_type):
|
|
506
|
-
|
|
369
|
+
# _LOGGER.info("But wait!!! That's already here, and it's great! I'll return that!")
|
|
507
370
|
return item
|
|
508
371
|
|
|
509
372
|
raise e
|
|
510
373
|
return None
|
|
511
|
-
|
|
512
|
-
|
|
374
|
+
|
|
375
|
+
_LOGGER.debug(f"item to insert: {item}")
|
|
376
|
+
item_inherent_split = select_inherent_properties(item, valid_schema)
|
|
377
|
+
attr_string = canonical_str(item_inherent_split["inherent"])
|
|
378
|
+
external_string = canonical_str(item_inherent_split["external"])
|
|
513
379
|
|
|
514
380
|
_LOGGER.debug(f"String to digest: {attr_string}")
|
|
381
|
+
_LOGGER.debug(f"External string: {external_string}")
|
|
515
382
|
druid = self.checksum_function(attr_string)
|
|
516
|
-
self._henge_insert(druid, attr_string, item_type)
|
|
383
|
+
self._henge_insert(druid, attr_string, item_type, external_string)
|
|
517
384
|
|
|
518
|
-
_LOGGER.debug(
|
|
519
|
-
|
|
385
|
+
_LOGGER.debug(
|
|
386
|
+
"Inserted flat item. Digest: {} / Type: {} / Item: {}".format(
|
|
387
|
+
druid, item_type, item
|
|
388
|
+
)
|
|
389
|
+
)
|
|
520
390
|
return druid
|
|
521
391
|
|
|
522
|
-
def _henge_insert(
|
|
392
|
+
def _henge_insert(
|
|
393
|
+
self, druid, string, item_type, external_string, digest_version=None
|
|
394
|
+
):
|
|
523
395
|
"""
|
|
524
396
|
Inserts an item into the database, with henge-metadata slots for item
|
|
525
397
|
type and digest version.
|
|
@@ -540,6 +412,7 @@ class Henge(object):
|
|
|
540
412
|
henge_to_query.database[druid] = string
|
|
541
413
|
henge_to_query.database[druid + ITEM_TYPE] = item_type
|
|
542
414
|
henge_to_query.database[druid + "_digest_version"] = digest_version
|
|
415
|
+
henge_to_query.database[druid + "_external_string"] = external_string
|
|
543
416
|
|
|
544
417
|
if henge_to_query != self:
|
|
545
418
|
self.database[druid + ITEM_TYPE] = item_type
|
|
@@ -551,13 +424,16 @@ class Henge(object):
|
|
|
551
424
|
"""
|
|
552
425
|
Remove all items from this database.
|
|
553
426
|
"""
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
427
|
+
try:
|
|
428
|
+
for k, v in self.database.items():
|
|
429
|
+
try:
|
|
430
|
+
del self.database[k]
|
|
431
|
+
del self.database[k + ITEM_TYPE]
|
|
432
|
+
del self.database[k + "_digest_version"]
|
|
433
|
+
except (KeyError, AttributeError):
|
|
434
|
+
pass
|
|
435
|
+
except AttributeError as e:
|
|
436
|
+
_LOGGER.warn(f"Error trying to iterate over database items: {e}")
|
|
561
437
|
|
|
562
438
|
def show(self):
|
|
563
439
|
"""
|
|
@@ -566,7 +442,6 @@ class Henge(object):
|
|
|
566
442
|
for k, v in self.database.items():
|
|
567
443
|
print(k, v)
|
|
568
444
|
|
|
569
|
-
|
|
570
445
|
def __repr__(self):
|
|
571
446
|
repr = "Henge object. Item types: " + ",".join(self.item_types)
|
|
572
447
|
return repr
|
|
@@ -578,68 +453,74 @@ def split_schema(schema, name=None):
|
|
|
578
453
|
"""
|
|
579
454
|
slist = {}
|
|
580
455
|
# base case
|
|
581
|
-
if schema[
|
|
456
|
+
if schema["type"] not in ["object", "array"]:
|
|
582
457
|
_LOGGER.debug(schema)
|
|
583
458
|
if name:
|
|
584
459
|
slist[name] = schema
|
|
585
|
-
elif
|
|
586
|
-
slist[schema[
|
|
460
|
+
elif "henge_class" in schema:
|
|
461
|
+
slist[schema["henge_class"]] = schema
|
|
587
462
|
_LOGGER.debug("Returning slist: {}".format(str(slist)))
|
|
588
463
|
return slist
|
|
589
|
-
elif schema[
|
|
464
|
+
elif schema["type"] == "object":
|
|
590
465
|
recursive_properties = []
|
|
591
|
-
if
|
|
466
|
+
if "henge_class" in schema:
|
|
592
467
|
schema_copy = copy.deepcopy(schema)
|
|
593
|
-
_LOGGER.debug("adding " + str(schema_copy[
|
|
594
|
-
henge_class = schema_copy[
|
|
468
|
+
_LOGGER.debug("adding " + str(schema_copy["henge_class"]))
|
|
469
|
+
henge_class = schema_copy["henge_class"]
|
|
595
470
|
# del schema_copy['henge_class']
|
|
596
|
-
for p in schema_copy[
|
|
471
|
+
for p in schema_copy["properties"]:
|
|
597
472
|
hclass = None
|
|
598
|
-
if
|
|
599
|
-
hclass = schema_copy[
|
|
473
|
+
if "henge_class" in schema_copy["properties"][p]:
|
|
474
|
+
hclass = schema_copy["properties"][p]["henge_class"]
|
|
600
475
|
recursive_properties.append(p)
|
|
601
|
-
if schema_copy[
|
|
476
|
+
if schema_copy["properties"][p]["type"] in ["object"]:
|
|
602
477
|
# recursive_properties.append(p)
|
|
603
|
-
schema_copy[
|
|
478
|
+
schema_copy["properties"][p] = {"type": "string"}
|
|
604
479
|
if hclass:
|
|
605
|
-
schema_copy[
|
|
606
|
-
if schema_copy[
|
|
480
|
+
schema_copy["properties"][p]["henge_class"] = hclass
|
|
481
|
+
if schema_copy["properties"][p]["type"] in ["array"]:
|
|
607
482
|
# recursive_properties.append(p)
|
|
608
|
-
schema_copy[
|
|
483
|
+
if schema_copy["properties"][p]["items"]["type"] == "integer":
|
|
484
|
+
schema_copy["properties"][p] = {"type": "string"}
|
|
485
|
+
else:
|
|
486
|
+
schema_copy["properties"][p] = {"type": "string"}
|
|
609
487
|
if hclass:
|
|
610
|
-
schema_copy[
|
|
488
|
+
schema_copy["properties"][p]["henge_class"] = hclass
|
|
611
489
|
else:
|
|
612
|
-
schema_copy[
|
|
490
|
+
schema_copy["properties"][p]["henge_class"] = "strarray"
|
|
613
491
|
# schema_copy['properties'][p]['type'] = "string"
|
|
614
492
|
# del schema_copy['properties']
|
|
615
|
-
_LOGGER.debug(
|
|
616
|
-
|
|
493
|
+
_LOGGER.debug(
|
|
494
|
+
"Adding recursive properties: {}".format(recursive_properties)
|
|
495
|
+
)
|
|
496
|
+
schema_copy["recursive"] = recursive_properties
|
|
617
497
|
slist[henge_class] = schema_copy
|
|
618
498
|
|
|
619
|
-
for p in schema[
|
|
499
|
+
for p in schema["properties"]:
|
|
620
500
|
# if schema['properties'][p]['type'] in ['object', 'array']:
|
|
621
|
-
# recursive_properties.append(p)
|
|
622
|
-
schema_sub = schema[
|
|
501
|
+
# recursive_properties.append(p)
|
|
502
|
+
schema_sub = schema["properties"][p]
|
|
623
503
|
_LOGGER.debug("checking property:" + p)
|
|
624
|
-
slist.update(split_schema(schema[
|
|
625
|
-
elif schema[
|
|
504
|
+
slist.update(split_schema(schema["properties"][p]))
|
|
505
|
+
elif schema["type"] == "array":
|
|
626
506
|
_LOGGER.debug("found array")
|
|
627
507
|
_LOGGER.debug(schema)
|
|
628
|
-
if
|
|
508
|
+
if "henge_class" in schema:
|
|
629
509
|
schema_copy = copy.deepcopy(schema)
|
|
630
|
-
_LOGGER.debug("adding " + str(schema[
|
|
631
|
-
henge_class = schema_copy[
|
|
510
|
+
_LOGGER.debug("adding " + str(schema["henge_class"]))
|
|
511
|
+
henge_class = schema_copy["henge_class"]
|
|
632
512
|
# del schema_copy['henge_class']
|
|
633
|
-
schema_copy[
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
513
|
+
if schema_copy["items"]["type"] != "integer":
|
|
514
|
+
schema_copy["items"] = {"type": "string"}
|
|
515
|
+
if "recursive" in schema_copy and schema_copy["recursive"]:
|
|
516
|
+
schema_copy["items"]["recursive"] = True
|
|
517
|
+
if "henge_class" in schema["items"]:
|
|
518
|
+
schema_copy["items"]["henge_class"] = schema["items"]["henge_class"]
|
|
638
519
|
# schema_copy['items']['type'] = "string"
|
|
639
520
|
# if 'properties' in schema_copy['items']:
|
|
640
521
|
# del schema_copy['items']['properties']
|
|
641
522
|
slist[henge_class] = schema_copy
|
|
642
|
-
schema_sub = schema[
|
|
523
|
+
schema_sub = schema["items"]
|
|
643
524
|
slist.update(split_schema(schema_sub))
|
|
644
525
|
else:
|
|
645
526
|
_LOGGER.debug("Classless array")
|
|
@@ -650,24 +531,46 @@ def split_schema(schema, name=None):
|
|
|
650
531
|
return slist
|
|
651
532
|
|
|
652
533
|
|
|
534
|
+
def canonical_str(item: dict) -> str:
|
|
535
|
+
"""Convert a dict into a canonical string representation"""
|
|
536
|
+
return json.dumps(
|
|
537
|
+
item, separators=(",", ":"), ensure_ascii=False, allow_nan=False, sort_keys=True
|
|
538
|
+
)
|
|
539
|
+
|
|
540
|
+
|
|
541
|
+
def select_inherent_properties(item: dict, schema: dict) -> dict:
|
|
542
|
+
if schema["type"] == "object":
|
|
543
|
+
item_inherent = {}
|
|
544
|
+
if "inherent" in schema and schema["inherent"]:
|
|
545
|
+
for k in schema["inherent"]:
|
|
546
|
+
item_inherent[k] = item[k]
|
|
547
|
+
del item[k]
|
|
548
|
+
return {"inherent": item_inherent, "external": item}
|
|
549
|
+
else:
|
|
550
|
+
return {"inherent": item, "external": None}
|
|
551
|
+
else:
|
|
552
|
+
return {"inherent": item, "external": None}
|
|
553
|
+
|
|
554
|
+
|
|
653
555
|
def is_schema_recursive(schema):
|
|
654
556
|
"""
|
|
655
557
|
Determine if a given schema has elements that need to recurse
|
|
656
558
|
"""
|
|
657
559
|
# return 'recursive' in schema # old way
|
|
658
560
|
is_recursive = False
|
|
659
|
-
if schema[
|
|
660
|
-
for prop in schema[
|
|
661
|
-
if schema[
|
|
561
|
+
if schema["type"] == "object":
|
|
562
|
+
for prop in schema["properties"]:
|
|
563
|
+
if schema["properties"]["prop"]["type"] in ["object", "array"]:
|
|
662
564
|
return True
|
|
663
|
-
if schema[
|
|
664
|
-
if schema[
|
|
565
|
+
if schema["type"] == "array":
|
|
566
|
+
if schema["items"]["type"] in ["object", "array"]:
|
|
665
567
|
return True
|
|
666
568
|
return False
|
|
667
569
|
|
|
668
570
|
|
|
669
|
-
def connect_mongo(
|
|
670
|
-
|
|
571
|
+
def connect_mongo(
|
|
572
|
+
host="0.0.0.0", port=27017, database="henge_dict", collection="store"
|
|
573
|
+
):
|
|
671
574
|
"""
|
|
672
575
|
Connect to MongoDB and return the MongoDB-backed dict object
|
|
673
576
|
|
|
@@ -682,64 +585,20 @@ def connect_mongo(host='0.0.0.0', port=27017, database='henge_dict',
|
|
|
682
585
|
"""
|
|
683
586
|
from importlib import import_module
|
|
684
587
|
from inspect import stack
|
|
588
|
+
|
|
685
589
|
for lib in LIBS_BY_BACKEND["mongo"]:
|
|
686
590
|
try:
|
|
687
591
|
globals()[lib] = import_module(lib)
|
|
688
592
|
except ImportError:
|
|
689
593
|
raise ImportError(
|
|
690
594
|
"Requirements not met. Package '{}' is required to setup "
|
|
691
|
-
"MongoDB connection. Install the package and call '{}' again.".
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
"""
|
|
702
|
-
Builds argument parser.
|
|
703
|
-
|
|
704
|
-
:return argparse.ArgumentParser
|
|
705
|
-
"""
|
|
706
|
-
banner = "%(prog)s - Keeper of druids: " \
|
|
707
|
-
"a python interface to Decomposable Recursive UIDs"
|
|
708
|
-
additional_description = "\n..."
|
|
709
|
-
parser = VersionInHelpParser(version=__version__, description=banner,
|
|
710
|
-
epilog=additional_description)
|
|
711
|
-
|
|
712
|
-
parser.add_argument(
|
|
713
|
-
"-V", "--version",
|
|
714
|
-
action="version",
|
|
715
|
-
version="%(prog)s {v}".format(v=__version__))
|
|
716
|
-
|
|
717
|
-
parser.add_argument(
|
|
718
|
-
"-i", "--input", required=True,
|
|
719
|
-
help="File path to input file.")
|
|
720
|
-
|
|
721
|
-
parser.add_argument(
|
|
722
|
-
"-p", "--parameter", type=int, default=0,
|
|
723
|
-
help="Some parameter.")
|
|
724
|
-
|
|
725
|
-
return parser
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
def main():
|
|
729
|
-
""" Primary workflow """
|
|
730
|
-
|
|
731
|
-
parser = logmuse.add_logging_options(build_argparser())
|
|
732
|
-
args = parser.parse_args()
|
|
733
|
-
global _LOGGER
|
|
734
|
-
_LOGGER = logmuse.logger_via_cli(args, make_root=True)
|
|
735
|
-
|
|
736
|
-
msg = "Input: {input}; Parameter: {parameter}"
|
|
737
|
-
_LOGGER.info(msg.format(input=args.input, parameter=args.parameter))
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
if __name__ == '__main__':
|
|
741
|
-
try:
|
|
742
|
-
sys.exit(main())
|
|
743
|
-
except KeyboardInterrupt:
|
|
744
|
-
_LOGGER.error("Program canceled by user!")
|
|
745
|
-
sys.exit(1)
|
|
595
|
+
"MongoDB connection. Install the package and call '{}' again.".format(
|
|
596
|
+
lib, stack()[0][3]
|
|
597
|
+
)
|
|
598
|
+
)
|
|
599
|
+
pymongo.Connection = lambda host, port, **kwargs: pymongo.MongoClient(
|
|
600
|
+
host=host, port=port
|
|
601
|
+
)
|
|
602
|
+
return mongodict.MongoDict(
|
|
603
|
+
host=host, port=port, database=database, collection=collection
|
|
604
|
+
)
|
|
@@ -1,20 +1,20 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: henge
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: Storage and retrieval of object-derived, decomposable recursive unique identifiers.
|
|
5
5
|
Home-page: https://databio.org
|
|
6
6
|
Author: Nathan Sheffield
|
|
7
7
|
Author-email: nathan@code.databio.org
|
|
8
8
|
License: BSD2
|
|
9
|
-
Platform: UNKNOWN
|
|
10
9
|
Classifier: Development Status :: 4 - Beta
|
|
11
10
|
Classifier: License :: OSI Approved :: BSD License
|
|
12
|
-
Classifier: Programming Language :: Python ::
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.7
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
15
|
Classifier: Topic :: System :: Distributed Computing
|
|
16
16
|
Description-Content-Type: text/markdown
|
|
17
|
-
|
|
17
|
+
License-File: LICENSE.txt
|
|
18
18
|
Requires-Dist: jsonschema
|
|
19
19
|
Requires-Dist: ubiquerg (>=0.5.2)
|
|
20
20
|
Requires-Dist: yacman (>=0.6.7)
|
|
@@ -26,5 +26,3 @@ Requires-Dist: yacman (>=0.6.7)
|
|
|
26
26
|
Henge is a Python package that builds backends for generic decomposable recursive unique identifiers (or, *DRUIDs*). It is intended to be used as a building block for sequence collections (see the [seqcol package](https://github.com/databio/seqcol)), and also for other data types that need content-derived identifiers.
|
|
27
27
|
|
|
28
28
|
Documentation at [http://henge.databio.org](http://henge.databio.org).
|
|
29
|
-
|
|
30
|
-
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
henge/__init__.py,sha256=Su4ZQre-GS24y-LjiAIf57GYovLTww0hkk2fDxzMt_g,289
|
|
2
|
+
henge/_version.py,sha256=Zn1KFblwuFHiDRdRAiRnDBRkbPttWh44jKa5zG2ov0E,22
|
|
3
|
+
henge/const.py,sha256=0t3EgqdjmKBd-zu5L8AJnGoWv0T3sAtvXf-3b62Dd-Y,194
|
|
4
|
+
henge/deprecated.py,sha256=C8eINR2gWCiNaN2b3gbFYn8jfJ0ftJm8a_fIgVVVzXc,11248
|
|
5
|
+
henge/henge.py,sha256=mvAcXefMy5r8XcgRe9XXPH5uuXdyL9-sXRYWLNp_fos,23454
|
|
6
|
+
henge-0.2.1.dist-info/LICENSE.txt,sha256=oB6ZGDa4kcznznJKJsLLFFcOZyi8Y6e2Jv0rJozgp-I,1269
|
|
7
|
+
henge-0.2.1.dist-info/METADATA,sha256=zW6QW4rlOKOpkymjUGkvlMRhbdXZQU_7YvTx3WM2ap8,1270
|
|
8
|
+
henge-0.2.1.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
|
9
|
+
henge-0.2.1.dist-info/entry_points.txt,sha256=c2OKgrH1a5Cx2osbUFSe9NFK8CbN82lPPsi4wry77_M,61
|
|
10
|
+
henge-0.2.1.dist-info/top_level.txt,sha256=QyovlLuKhhKP1r8bMVmxLdke9F6PZFIN7VlkzvB0xIQ,6
|
|
11
|
+
henge-0.2.1.dist-info/RECORD,,
|
henge-0.1.1.dist-info/RECORD
DELETED
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
henge/__init__.py,sha256=mgUuUeNMFlWAwfiyjj1YFSVjGzAoyKB_lr56cYygiA0,272
|
|
2
|
-
henge/_version.py,sha256=rnObPjuBcEStqSO0S6gsdS_ot8ITOQjVj_-P1LUUYpg,22
|
|
3
|
-
henge/const.py,sha256=BW_CMh8eU7QR8umjZA3Th3uNHZNi7jmbDp9J_uHg5to,193
|
|
4
|
-
henge/henge.py,sha256=y0oQIM_icunEiSN4_9hNw2HNAYki0wm4ehV0dY9dFYM,31385
|
|
5
|
-
henge-0.1.1.dist-info/LICENSE.txt,sha256=oB6ZGDa4kcznznJKJsLLFFcOZyi8Y6e2Jv0rJozgp-I,1269
|
|
6
|
-
henge-0.1.1.dist-info/METADATA,sha256=ljP5d6hCmcOCnObmaXLdzn3lQV8MRPjIfjIO2lRntws,1236
|
|
7
|
-
henge-0.1.1.dist-info/WHEEL,sha256=OqRkF0eY5GHssMorFjlbTIq072vpHpF60fIQA6lS9xA,92
|
|
8
|
-
henge-0.1.1.dist-info/entry_points.txt,sha256=XzpBzx_VAbjQiN-QErDU5al7HZhTtI1nZKvbz17guQQ,62
|
|
9
|
-
henge-0.1.1.dist-info/top_level.txt,sha256=QyovlLuKhhKP1r8bMVmxLdke9F6PZFIN7VlkzvB0xIQ,6
|
|
10
|
-
henge-0.1.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|