henge 0.1.1__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- henge-0.2.1/PKG-INFO +25 -0
- henge-0.2.1/henge/__init__.py +12 -0
- henge-0.2.1/henge/_version.py +1 -0
- {henge-0.1.1 → henge-0.2.1}/henge/const.py +1 -1
- henge-0.2.1/henge/deprecated.py +224 -0
- henge-0.2.1/henge/henge.py +604 -0
- henge-0.2.1/henge.egg-info/PKG-INFO +25 -0
- {henge-0.1.1 → henge-0.2.1}/henge.egg-info/SOURCES.txt +1 -0
- {henge-0.1.1 → henge-0.2.1}/henge.egg-info/entry_points.txt +0 -1
- {henge-0.1.1 → henge-0.2.1}/henge.egg-info/requires.txt +0 -1
- {henge-0.1.1 → henge-0.2.1}/setup.py +15 -14
- henge-0.1.1/PKG-INFO +0 -24
- henge-0.1.1/henge/__init__.py +0 -10
- henge-0.1.1/henge/_version.py +0 -1
- henge-0.1.1/henge/henge.py +0 -745
- henge-0.1.1/henge.egg-info/PKG-INFO +0 -24
- {henge-0.1.1 → henge-0.2.1}/LICENSE.txt +0 -0
- {henge-0.1.1 → henge-0.2.1}/README.md +0 -0
- {henge-0.1.1 → henge-0.2.1}/henge.egg-info/dependency_links.txt +0 -0
- {henge-0.1.1 → henge-0.2.1}/henge.egg-info/top_level.txt +0 -0
- {henge-0.1.1 → henge-0.2.1}/setup.cfg +0 -0
henge-0.2.1/PKG-INFO
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: henge
|
|
3
|
+
Version: 0.2.1
|
|
4
|
+
Summary: Storage and retrieval of object-derived, decomposable recursive unique identifiers.
|
|
5
|
+
Home-page: https://databio.org
|
|
6
|
+
Author: Nathan Sheffield
|
|
7
|
+
Author-email: nathan@code.databio.org
|
|
8
|
+
License: BSD2
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: License :: OSI Approved :: BSD License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.7
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Topic :: System :: Distributed Computing
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
License-File: LICENSE.txt
|
|
18
|
+
|
|
19
|
+
[](https://travis-ci.com/databio/henge)
|
|
20
|
+
|
|
21
|
+
# Henge
|
|
22
|
+
|
|
23
|
+
Henge is a Python package that builds backends for generic decomposable recursive unique identifiers (or, *DRUIDs*). It is intended to be used as a building block for sequence collections (see the [seqcol package](https://github.com/databio/seqcol)), and also for other data types that need content-derived identifiers.
|
|
24
|
+
|
|
25
|
+
Documentation at [http://henge.databio.org](http://henge.databio.org).
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.2.1"
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
|
|
2
|
+
def retrieveOld(self, druid, reclimit=None, raw=False):
|
|
3
|
+
|
|
4
|
+
try:
|
|
5
|
+
item_type = self.database[druid + ITEM_TYPE]
|
|
6
|
+
except:
|
|
7
|
+
_LOGGER.debug(f"Item type not saved in database for {druid}")
|
|
8
|
+
raise NotFoundException(druid)
|
|
9
|
+
|
|
10
|
+
# _LOGGER.debug("item_type: {}".format(item_type))
|
|
11
|
+
# _LOGGER.debug("henge_to_query: {}".format(henge_to_query))
|
|
12
|
+
|
|
13
|
+
schema = self.schemas[item_type] #"type" in schema and
|
|
14
|
+
# string = druid
|
|
15
|
+
_LOGGER.debug("Got druid to retrieve: {} / item_type: {} / schema: {}".format(
|
|
16
|
+
druid, item_type, schema))
|
|
17
|
+
|
|
18
|
+
if schema["type"] == "array":
|
|
19
|
+
string = self.lookup(druid, item_type)
|
|
20
|
+
_LOGGER.debug("Lookup/array/Recursive: {}; Schema: {}".format(string, schema))
|
|
21
|
+
splitstr = string.split(DELIM_ITEM)
|
|
22
|
+
# if self.flexible_digests:
|
|
23
|
+
# pass
|
|
24
|
+
# item_name = splitstr.pop(0)
|
|
25
|
+
if isinstance(reclimit, int) and reclimit == 0:
|
|
26
|
+
return splitstr
|
|
27
|
+
if 'henge_class' in schema['items']:
|
|
28
|
+
_LOGGER.debug("Henge classed array: {}; Schema: {}".format(string, schema))
|
|
29
|
+
if isinstance(reclimit, int):
|
|
30
|
+
reclimit = reclimit - 1
|
|
31
|
+
return [self.retrieve(substr, reclimit) for substr in splitstr]
|
|
32
|
+
else:
|
|
33
|
+
return splitstr
|
|
34
|
+
elif schema["type"] == "object":
|
|
35
|
+
string = self.lookup(druid, item_type)
|
|
36
|
+
attr_array = string.split(DELIM_ATTR)
|
|
37
|
+
if self.flexible_digests:
|
|
38
|
+
keys = attr_array[::2] # evens
|
|
39
|
+
vals = attr_array[1::2] # odds
|
|
40
|
+
item_reconstituted = dict(zip(keys,vals))
|
|
41
|
+
else:
|
|
42
|
+
item_reconstituted = dict(zip(schema['properties'].keys(),
|
|
43
|
+
attr_array))
|
|
44
|
+
# I think this part needs to be removed... it's based on the
|
|
45
|
+
# previous 'recursive' for arrays, which went away...
|
|
46
|
+
# but actually these may be added in by me, so nevermind.
|
|
47
|
+
if 'recursive' in schema:
|
|
48
|
+
if isinstance(reclimit, int) and reclimit == 0:
|
|
49
|
+
_LOGGER.debug("Lookup/obj/Recursive: {}; Schema: {}".format(string, schema))
|
|
50
|
+
return item_reconstituted
|
|
51
|
+
else:
|
|
52
|
+
if isinstance(reclimit, int):
|
|
53
|
+
reclimit = reclimit - 1
|
|
54
|
+
for recursive_attr in schema['recursive']:
|
|
55
|
+
if recursive_attr in item_reconstituted \
|
|
56
|
+
and item_reconstituted[recursive_attr] != "":
|
|
57
|
+
item_reconstituted[recursive_attr] = self.retrieve(
|
|
58
|
+
item_reconstituted[recursive_attr],
|
|
59
|
+
reclimit,
|
|
60
|
+
raw)
|
|
61
|
+
return item_reconstituted
|
|
62
|
+
else: # It must be a primitive type
|
|
63
|
+
# but it could be a primitive (string) that represents something to lookup,
|
|
64
|
+
# or something not-to-lookup (or already looked up)
|
|
65
|
+
_LOGGER.debug("Lookup/prim: {}; Schema: {}".format(druid, schema))
|
|
66
|
+
# return string
|
|
67
|
+
if 'henge_class' in schema and self.schemas[schema['henge_class']]['type'] in ['object', 'array']:
|
|
68
|
+
if isinstance(reclimit, int) and reclimit == 0:
|
|
69
|
+
_LOGGER.debug("Lookup/prim/Recursive-skip: {}; Schema: {}".format(string, schema))
|
|
70
|
+
string = self.lookup(druid, item_type)
|
|
71
|
+
return string
|
|
72
|
+
else:
|
|
73
|
+
if isinstance(reclimit, int):
|
|
74
|
+
reclimit = reclimit - 1
|
|
75
|
+
_LOGGER.debug("Lookup/prim/Recursive: {}; Schema: {}".format(druid, schema))
|
|
76
|
+
return self.retrieve(druid, reclimit, raw)
|
|
77
|
+
else:
|
|
78
|
+
string = self.lookup(druid, item_type)
|
|
79
|
+
_LOGGER.debug("Lookup/prim/Non-recursive: {}; Schema: {}".format(string, schema))
|
|
80
|
+
return string #self.retrieve(string, reclimit, raw)
|
|
81
|
+
|
|
82
|
+
# try:
|
|
83
|
+
# string = henge_to_query.database[druid]
|
|
84
|
+
# except KeyError:
|
|
85
|
+
# raise NotFoundException(druid)
|
|
86
|
+
|
|
87
|
+
# return reconstruct_item(string, schema, reclimit)
|
|
88
|
+
|
|
89
|
+
def retrieve2(self, druid, reclimit=None, raw=False):
|
|
90
|
+
"""
|
|
91
|
+
Retrieve an item given a digest
|
|
92
|
+
|
|
93
|
+
:param str druid: The Decomposable recursive unique identifier (DRUID), or
|
|
94
|
+
digest that uniquely identifies that item to retrieve.
|
|
95
|
+
:param int reclimit: Recursion limit. Set to None for no limit (default).
|
|
96
|
+
:param bool raw: Return the value as a raw, henge-delimited string, instead
|
|
97
|
+
of processing into a mapping. Default: False.
|
|
98
|
+
"""
|
|
99
|
+
def reconstruct_item(string, schema, reclimit):
|
|
100
|
+
if "type" in schema and schema["type"] == "array":
|
|
101
|
+
_LOGGER.debug("Lookup/array/Recursive: {}; Schema: {}".format(string, schema))
|
|
102
|
+
splitstr = string.split(DELIM_ITEM)
|
|
103
|
+
# if self.flexible_digests:
|
|
104
|
+
# pass
|
|
105
|
+
# item_name = splitstr.pop(0)
|
|
106
|
+
if 'henge_class' in schema['items'] and schema['items']['type'] not in ["object", "array"]:
|
|
107
|
+
_LOGGER.debug("Henge classed array: {}; Schema: {}".format(string, schema))
|
|
108
|
+
return "ASDF"
|
|
109
|
+
return [reconstruct_item(self.henges[item_type].database[substr], schema["items"], reclimit)
|
|
110
|
+
for substr in splitstr]
|
|
111
|
+
else:
|
|
112
|
+
return [reconstruct_item(substr, schema["items"], reclimit)
|
|
113
|
+
for substr in splitstr]
|
|
114
|
+
elif schema["type"] == "object":
|
|
115
|
+
attr_array = string.split(DELIM_ATTR)
|
|
116
|
+
if self.flexible_digests:
|
|
117
|
+
keys = attr_array[::2] # evens
|
|
118
|
+
vals = attr_array[1::2] # odds
|
|
119
|
+
item_reconstituted = dict(zip(keys,vals))
|
|
120
|
+
else:
|
|
121
|
+
item_reconstituted = dict(zip(schema['properties'].keys(),
|
|
122
|
+
attr_array))
|
|
123
|
+
# I think this part needs to be removed... it's based on the
|
|
124
|
+
# previous 'recursive' for arrays, which went away...
|
|
125
|
+
# but actually these may be added in by me, so nevermind.
|
|
126
|
+
if 'recursive' in schema:
|
|
127
|
+
if isinstance(reclimit, int) and reclimit == 0:
|
|
128
|
+
_LOGGER.debug("Lookup/obj/Recursive: {}; Schema: {}".format(string, schema))
|
|
129
|
+
return item_reconstituted
|
|
130
|
+
else:
|
|
131
|
+
if isinstance(reclimit, int):
|
|
132
|
+
reclimit = reclimit - 1
|
|
133
|
+
for recursive_attr in schema['recursive']:
|
|
134
|
+
if item_reconstituted[recursive_attr] \
|
|
135
|
+
and item_reconstituted[recursive_attr] != "":
|
|
136
|
+
item_reconstituted[recursive_attr] = self.retrieve(
|
|
137
|
+
item_reconstituted[recursive_attr],
|
|
138
|
+
reclimit,
|
|
139
|
+
raw)
|
|
140
|
+
return item_reconstituted
|
|
141
|
+
else: # it must be a primitive
|
|
142
|
+
# but it could be a primitive (string) that represents something to lookup,
|
|
143
|
+
# or something not-to-lookup (or already looked up)
|
|
144
|
+
_LOGGER.debug("Lookup/prim: {}; Schema: {}".format(string, schema))
|
|
145
|
+
# return string
|
|
146
|
+
if 'henge_class' in schema and self.schemas[schema['henge_class']]['type'] in ['object', 'array']:
|
|
147
|
+
if isinstance(reclimit, int) and reclimit == 0:
|
|
148
|
+
_LOGGER.debug("Lookup/prim/Recursive-skip: {}; Schema: {}".format(string, schema))
|
|
149
|
+
return string
|
|
150
|
+
else:
|
|
151
|
+
if isinstance(reclimit, int):
|
|
152
|
+
reclimit = reclimit - 1
|
|
153
|
+
_LOGGER.debug("Lookup/prim/Recursive: {}; Schema: {}".format(string, schema))
|
|
154
|
+
return self.retrieve(string, reclimit, raw)
|
|
155
|
+
else:
|
|
156
|
+
_LOGGER.debug("Lookup/prim/Non-recursive: {}; Schema: {}".format(string, schema))
|
|
157
|
+
return string
|
|
158
|
+
|
|
159
|
+
# This requires the database to have __iter__ defined...and it scrolls through
|
|
160
|
+
# not a great way, take it out! 2021-01 NS
|
|
161
|
+
# I'll instead do a try block
|
|
162
|
+
# if not druid + ITEM_TYPE in self.database:
|
|
163
|
+
# raise NotFoundException(druid)
|
|
164
|
+
|
|
165
|
+
try:
|
|
166
|
+
item_type = self.database[druid + ITEM_TYPE]
|
|
167
|
+
except:
|
|
168
|
+
_LOGGER.debug(f"Item type not saved in database for {druid}")
|
|
169
|
+
raise NotFoundException(druid)
|
|
170
|
+
|
|
171
|
+
try:
|
|
172
|
+
henge_to_query = self.henges[item_type]
|
|
173
|
+
except:
|
|
174
|
+
_LOGGER.debug("No henges available for this item type")
|
|
175
|
+
raise NotFoundException(druid)
|
|
176
|
+
# _LOGGER.debug("item_type: {}".format(item_type))
|
|
177
|
+
# _LOGGER.debug("henge_to_query: {}".format(henge_to_query))
|
|
178
|
+
try:
|
|
179
|
+
string = henge_to_query.database[druid]
|
|
180
|
+
except KeyError:
|
|
181
|
+
raise NotFoundException(druid)
|
|
182
|
+
|
|
183
|
+
schema = self.schemas[item_type]
|
|
184
|
+
_LOGGER.debug("Got druid to retrieve: {} / item_type: {} / schema: {}".format(
|
|
185
|
+
druid, item_type, schema))
|
|
186
|
+
return reconstruct_item(string, schema, reclimit)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
Part of: _insert_flat
|
|
191
|
+
|
|
192
|
+
def safestr(item, x):
|
|
193
|
+
try:
|
|
194
|
+
return str(item[x])
|
|
195
|
+
except (ValueError, TypeError, KeyError):
|
|
196
|
+
return ""
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def build_attr_string(item, schema, item_name=None):
|
|
201
|
+
if "type" in schema and schema["type"] == "array":
|
|
202
|
+
if self.flexible_digests:
|
|
203
|
+
return DELIM_ITEM.join([build_attr_string(x, schema['items'])
|
|
204
|
+
for x in item])
|
|
205
|
+
else:
|
|
206
|
+
return DELIM_ITEM.join([build_attr_string(x, schema['items'])
|
|
207
|
+
for x in item])
|
|
208
|
+
elif schema["type"] == "object" and 'properties' in schema:
|
|
209
|
+
if self.flexible_digests:
|
|
210
|
+
# flexible schema
|
|
211
|
+
keys_to_include = sorted([x for x in item.keys() if x in list(schema['properties'].keys())])
|
|
212
|
+
return DELIM_ATTR.join([DELIM_ATTR.join([k, safestr(item, k)]) for k in keys_to_include])
|
|
213
|
+
|
|
214
|
+
else:
|
|
215
|
+
# fixed schema
|
|
216
|
+
return DELIM_ATTR.join([safestr(item, x) for x in
|
|
217
|
+
list(schema['properties'].keys())])
|
|
218
|
+
else: #assume it's a primitive
|
|
219
|
+
if self.flexible_digests:
|
|
220
|
+
return item
|
|
221
|
+
attr_string = DELIM_ATTR.join([item_name, item])
|
|
222
|
+
return attr_string
|
|
223
|
+
else:
|
|
224
|
+
return item
|