dcicutils 7.11.0__py3-none-any.whl → 7.11.0.1b9__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- dcicutils/misc_utils.py +95 -1
- dcicutils/sheet_utils.py +1131 -0
- {dcicutils-7.11.0.dist-info → dcicutils-7.11.0.1b9.dist-info}/METADATA +3 -1
- {dcicutils-7.11.0.dist-info → dcicutils-7.11.0.1b9.dist-info}/RECORD +7 -6
- {dcicutils-7.11.0.dist-info → dcicutils-7.11.0.1b9.dist-info}/LICENSE.txt +0 -0
- {dcicutils-7.11.0.dist-info → dcicutils-7.11.0.1b9.dist-info}/WHEEL +0 -0
- {dcicutils-7.11.0.dist-info → dcicutils-7.11.0.1b9.dist-info}/entry_points.txt +0 -0
dcicutils/misc_utils.py
CHANGED
@@ -9,6 +9,7 @@ import hashlib
|
|
9
9
|
import inspect
|
10
10
|
import math
|
11
11
|
import io
|
12
|
+
import json
|
12
13
|
import os
|
13
14
|
import logging
|
14
15
|
import pytz
|
@@ -191,7 +192,11 @@ class _VirtualAppHelper(webtest.TestApp):
|
|
191
192
|
pass
|
192
193
|
|
193
194
|
|
194
|
-
class
|
195
|
+
class AbstractVirtualApp:
|
196
|
+
pass
|
197
|
+
|
198
|
+
|
199
|
+
class VirtualApp(AbstractVirtualApp):
|
195
200
|
"""
|
196
201
|
Wrapper class for TestApp, to allow custom control over submitting Encoded requests,
|
197
202
|
simulating a number of conditions, including permissions.
|
@@ -1352,6 +1357,25 @@ def capitalize1(s):
|
|
1352
1357
|
return s[:1].upper() + s[1:]
|
1353
1358
|
|
1354
1359
|
|
1360
|
+
"""
|
1361
|
+
Python's UUID ignores all dashes, whereas Postgres is more strict
|
1362
|
+
http://www.postgresql.org/docs/9.2/static/datatype-uuid.html
|
1363
|
+
See also http://www.postgresql.org/docs/9.2/static/datatype-uuid.html
|
1364
|
+
And, anyway, this pattern is what our portals have been doing
|
1365
|
+
for quite a while, so it's the most stable choice for us now.
|
1366
|
+
"""
|
1367
|
+
|
1368
|
+
uuid_re = re.compile(r'(?i)[{]?(?:[0-9a-f]{4}-?){8}[}]?')
|
1369
|
+
|
1370
|
+
|
1371
|
+
def is_uuid(instance):
|
1372
|
+
"""
|
1373
|
+
Predicate returns true for any group of 32 hex characters with optional hyphens every four characters.
|
1374
|
+
We insist on lowercase to make matching faster. See other notes on this design choice above.
|
1375
|
+
"""
|
1376
|
+
return bool(uuid_re.match(instance))
|
1377
|
+
|
1378
|
+
|
1355
1379
|
def string_list(s):
|
1356
1380
|
"""
|
1357
1381
|
Turns a comma-separated list into an actual list, trimming whitespace and ignoring nulls.
|
@@ -2313,3 +2337,73 @@ def parse_in_radix(text: str, *, radix: int):
|
|
2313
2337
|
except Exception:
|
2314
2338
|
pass
|
2315
2339
|
raise ValueError(f"Unable to parse: {text!r}")
|
2340
|
+
|
2341
|
+
|
2342
|
+
def pad_to(target_size: int, data: list, *, padding=None):
|
2343
|
+
"""
|
2344
|
+
This will pad to a given target size, a list of a potentially different actual size, using given padding.
|
2345
|
+
e.g., pad_to(3, [1, 2]) will return [1, 2, None]
|
2346
|
+
"""
|
2347
|
+
actual_size = len(data)
|
2348
|
+
if actual_size < target_size:
|
2349
|
+
data = data + [padding] * (target_size - actual_size)
|
2350
|
+
return data
|
2351
|
+
|
2352
|
+
|
2353
|
+
class JsonLinesReader:
|
2354
|
+
|
2355
|
+
def __init__(self, fp, padded=False, padding=None):
|
2356
|
+
"""
|
2357
|
+
Given an fp (the conventional name for a "file pointer", the thing a call to io.open returns,
|
2358
|
+
this creates an object that can be used to iterate across the lines in the JSON lines file
|
2359
|
+
that the fp is reading from.
|
2360
|
+
|
2361
|
+
There are two possible formats that this will return.
|
2362
|
+
|
2363
|
+
For files that contain a series of dictionaries, such as:
|
2364
|
+
{"something": 1, "else": "a"}
|
2365
|
+
{"something": 2, "else": "b"}
|
2366
|
+
...etc
|
2367
|
+
this will just return thos those dictionaries one-by-one when iterated over.
|
2368
|
+
|
2369
|
+
The same set of dictionaries will also be yielded by a file containing:
|
2370
|
+
["something", "else"]
|
2371
|
+
[1, "a"]
|
2372
|
+
[2, "b"]
|
2373
|
+
...etc
|
2374
|
+
this will just return thos those dictionaries one-by-one when iterated over.
|
2375
|
+
|
2376
|
+
NOTES:
|
2377
|
+
|
2378
|
+
* In the second case, shorter lists on subsequent lines return only partial dictionaries.
|
2379
|
+
* In the second case, longer lists on subsequent lines will quietly drop any extra elements.
|
2380
|
+
"""
|
2381
|
+
|
2382
|
+
self.fp = fp
|
2383
|
+
self.padded: bool = padded
|
2384
|
+
self.padding = padding
|
2385
|
+
self.headers = None # Might change after we see first line
|
2386
|
+
|
2387
|
+
def __iter__(self):
|
2388
|
+
first_line = True
|
2389
|
+
n_headers = 0
|
2390
|
+
for raw_line in self.fp:
|
2391
|
+
line = json.loads(raw_line)
|
2392
|
+
if first_line:
|
2393
|
+
first_line = False
|
2394
|
+
if isinstance(line, list):
|
2395
|
+
self.headers = line
|
2396
|
+
n_headers = len(line)
|
2397
|
+
continue
|
2398
|
+
# If length of line is more than we expect, ignore it. Let user put comments beyond our table
|
2399
|
+
# But if length of line is less than we expect, extend the line with None
|
2400
|
+
if self.headers:
|
2401
|
+
if not isinstance(line, list):
|
2402
|
+
raise Exception("If the first line is a list, all lines must be.")
|
2403
|
+
if self.padded and len(line) < n_headers:
|
2404
|
+
line = pad_to(n_headers, line, padding=self.padding)
|
2405
|
+
yield dict(zip(self.headers, line))
|
2406
|
+
elif isinstance(line, dict):
|
2407
|
+
yield line
|
2408
|
+
else:
|
2409
|
+
raise Exception(f"If the first line is not a list, all lines must be dictionaries: {line!r}")
|
dcicutils/sheet_utils.py
ADDED
@@ -0,0 +1,1131 @@
|
|
1
|
+
import chardet
|
2
|
+
import contextlib
|
3
|
+
import copy
|
4
|
+
import csv
|
5
|
+
import glob
|
6
|
+
import io
|
7
|
+
import json
|
8
|
+
import openpyxl
|
9
|
+
import os
|
10
|
+
import re
|
11
|
+
import subprocess
|
12
|
+
import uuid
|
13
|
+
import yaml
|
14
|
+
|
15
|
+
from openpyxl.worksheet.worksheet import Worksheet
|
16
|
+
from openpyxl.workbook.workbook import Workbook
|
17
|
+
from tempfile import TemporaryFile, TemporaryDirectory
|
18
|
+
from typing import Any, Dict, Iterable, List, Optional, Tuple, Type, Union
|
19
|
+
from .common import AnyJsonData
|
20
|
+
from .env_utils import public_env_name, EnvUtils
|
21
|
+
from .ff_utils import get_schema
|
22
|
+
from .lang_utils import conjoined_list, disjoined_list, maybe_pluralize, there_are
|
23
|
+
from .misc_utils import ignored, PRINT, pad_to, JsonLinesReader, AbstractVirtualApp, remove_suffix
|
24
|
+
from .task_utils import pmap
|
25
|
+
|
26
|
+
|
27
|
+
Header = str
|
28
|
+
Headers = List[str]
|
29
|
+
ParsedHeader = List[Union[str, int]]
|
30
|
+
ParsedHeaders = List[ParsedHeader]
|
31
|
+
SheetCellValue = Union[int, float, str]
|
32
|
+
SheetRow = List[SheetCellValue]
|
33
|
+
CsvReader = type(csv.reader(TemporaryFile()))
|
34
|
+
SheetData = List[dict]
|
35
|
+
TabbedSheetData = Dict[str, SheetData]
|
36
|
+
Regexp = type(re.compile("sample"))
|
37
|
+
|
38
|
+
|
39
|
+
class LoadFailure(Exception):
|
40
|
+
"""
|
41
|
+
In general, we'd prefer to load up the spreadsheet with clumsy data that can then be validated in detail,
|
42
|
+
but some errors are so confusing or so problematic that we need to just fail the load right away.
|
43
|
+
"""
|
44
|
+
pass
|
45
|
+
|
46
|
+
|
47
|
+
class LoadArgumentsError(LoadFailure):
|
48
|
+
"""
|
49
|
+
Errors of this class represent situations where we can't get started because
|
50
|
+
there's a problem with the given arguments.
|
51
|
+
"""
|
52
|
+
pass
|
53
|
+
|
54
|
+
|
55
|
+
class LoadTableError(LoadFailure):
|
56
|
+
"""
|
57
|
+
Errors of this class represent situations where we can't get started because
|
58
|
+
there's a problem with some table's syntax, for example headers that don't make sense.
|
59
|
+
"""
|
60
|
+
pass
|
61
|
+
|
62
|
+
|
63
|
+
@contextlib.contextmanager
|
64
|
+
def deferred_problems():
|
65
|
+
problems = []
|
66
|
+
|
67
|
+
def note_problems(problem):
|
68
|
+
problems.append(problem)
|
69
|
+
|
70
|
+
yield note_problems
|
71
|
+
|
72
|
+
if problems:
|
73
|
+
for problem in problems:
|
74
|
+
PRINT(f"Problem: {problem}")
|
75
|
+
raise Exception(there_are(problems, kind='problem while compiling hints', tense='past', show=False))
|
76
|
+
|
77
|
+
|
78
|
+
def unwanted_kwargs(*, context, kwargs, context_plural=False, detailed=False):
|
79
|
+
if kwargs:
|
80
|
+
unwanted = [f"{argname}={value!r}" if detailed else argname
|
81
|
+
for argname, value in kwargs.items()
|
82
|
+
if value is not None]
|
83
|
+
if unwanted:
|
84
|
+
does_not = "don't" if context_plural else "doesn't"
|
85
|
+
raise LoadArgumentsError(f"{context} {does_not} use"
|
86
|
+
f" {maybe_pluralize(unwanted, 'keyword argument')} {conjoined_list(unwanted)}.")
|
87
|
+
|
88
|
+
|
89
|
+
def prefer_number(value: SheetCellValue):
|
90
|
+
if isinstance(value, str): # the given value might be an int or float, in which case just fall through
|
91
|
+
if not value:
|
92
|
+
return None
|
93
|
+
value = value
|
94
|
+
ch0 = value[0]
|
95
|
+
if ch0 == '+' or ch0 == '-' or ch0.isdigit():
|
96
|
+
try:
|
97
|
+
return int(value)
|
98
|
+
except Exception:
|
99
|
+
pass
|
100
|
+
try:
|
101
|
+
return float(value)
|
102
|
+
except Exception:
|
103
|
+
pass
|
104
|
+
# If we couldn't parse it as an int or float, fall through to returning the original value
|
105
|
+
pass
|
106
|
+
return value
|
107
|
+
|
108
|
+
|
109
|
+
def expand_string_escape_sequences(text: str) -> str:
|
110
|
+
s = io.StringIO()
|
111
|
+
escaping = False
|
112
|
+
for ch in text:
|
113
|
+
if escaping:
|
114
|
+
if ch == 'r':
|
115
|
+
s.write('\r')
|
116
|
+
elif ch == 't':
|
117
|
+
s.write('\t')
|
118
|
+
elif ch == 'n':
|
119
|
+
s.write('\n')
|
120
|
+
elif ch == '\\':
|
121
|
+
s.write('\\')
|
122
|
+
else:
|
123
|
+
# Rather than err, just leave other sequences as-is.
|
124
|
+
s.write(f"\\{ch}")
|
125
|
+
escaping = False
|
126
|
+
elif ch == '\\':
|
127
|
+
escaping = True
|
128
|
+
else:
|
129
|
+
s.write(ch)
|
130
|
+
return s.getvalue()
|
131
|
+
|
132
|
+
|
133
|
+
def open_unicode_text_input_file_respecting_byte_order_mark(filename):
|
134
|
+
"""
|
135
|
+
Opens a file for text input, respecting a byte-order mark (BOM).
|
136
|
+
"""
|
137
|
+
with io.open(filename, 'rb') as fp:
|
138
|
+
leading_bytes = fp.read(4 * 8) # 4 bytes is all we need
|
139
|
+
bom_info = chardet.detect(leading_bytes, should_rename_legacy=True)
|
140
|
+
detected_encoding = bom_info and bom_info.get('encoding') # tread lightly
|
141
|
+
use_encoding = 'utf-8' if detected_encoding == 'ascii' else detected_encoding
|
142
|
+
return io.open(filename, 'r', encoding=use_encoding)
|
143
|
+
|
144
|
+
|
145
|
+
class TypeHint:
|
146
|
+
def apply_hint(self, value):
|
147
|
+
return value
|
148
|
+
|
149
|
+
def __str__(self):
|
150
|
+
return f"<{self.__class__.__name__}>"
|
151
|
+
|
152
|
+
def __repr__(self):
|
153
|
+
return self.__str__()
|
154
|
+
|
155
|
+
|
156
|
+
class BoolHint(TypeHint):
|
157
|
+
|
158
|
+
def apply_hint(self, value):
|
159
|
+
if isinstance(value, str) and value:
|
160
|
+
if 'true'.startswith(value.lower()):
|
161
|
+
return True
|
162
|
+
elif 'false'.startswith(value.lower()):
|
163
|
+
return False
|
164
|
+
return super().apply_hint(value)
|
165
|
+
|
166
|
+
|
167
|
+
class EnumHint(TypeHint):
|
168
|
+
|
169
|
+
def __str__(self):
|
170
|
+
return f"<EnumHint {','.join(f'{key}={val}' for key, val in self.value_map.items())}>"
|
171
|
+
|
172
|
+
def __init__(self, value_map):
|
173
|
+
self.value_map = value_map
|
174
|
+
|
175
|
+
def apply_hint(self, value):
|
176
|
+
if isinstance(value, str):
|
177
|
+
if value in self.value_map:
|
178
|
+
result = self.value_map[value]
|
179
|
+
return result
|
180
|
+
else:
|
181
|
+
lvalue = value.lower()
|
182
|
+
found = []
|
183
|
+
for lkey, key in self.value_map.items():
|
184
|
+
if lkey.startswith(lvalue):
|
185
|
+
found.append(lkey)
|
186
|
+
if len(found) == 1:
|
187
|
+
[only_found] = found
|
188
|
+
result = self.value_map[only_found]
|
189
|
+
return result
|
190
|
+
return super().apply_hint(value)
|
191
|
+
|
192
|
+
|
193
|
+
OptionalTypeHints = List[Optional[TypeHint]]
|
194
|
+
|
195
|
+
|
196
|
+
class ItemTools:
|
197
|
+
"""
|
198
|
+
Implements operations on table-related data without pre-supposing the specific representation of the table.
|
199
|
+
It is assumed this can be used for data that was obtained from .json, .csv, .tsv, and .xlsx files because
|
200
|
+
it does not presuppose the source of the data nor where it will be written to.
|
201
|
+
|
202
|
+
For the purpose of this class:
|
203
|
+
|
204
|
+
* a 'header' is a string representing the top of a column.
|
205
|
+
|
206
|
+
* a 'parsed header' is a list of strings and/or ints, after splitting at uses of '#' or '.', so that
|
207
|
+
"a.b.c" is represented as ["a", "b", "c"], and "x.y#0" is represented as ["x", "y", 0], and representing
|
208
|
+
each numeric token as an int instead of a string.
|
209
|
+
|
210
|
+
* a 'headers' object is just a list of strings, each of which is a 'header'.
|
211
|
+
|
212
|
+
* a 'parsed headers' object is a non-empty list of lists, each of which is a 'parsed header'.
|
213
|
+
e..g., the headers ["a.b.c", "x.y#0"] is represented as parsed hearders [["a", "b", "c"], ["x", "y", 0]].
|
214
|
+
|
215
|
+
"""
|
216
|
+
|
217
|
+
@classmethod
|
218
|
+
def parse_sheet_header(cls, header: Header) -> ParsedHeader:
|
219
|
+
result = []
|
220
|
+
token = ""
|
221
|
+
for i in range(len(header)):
|
222
|
+
ch = header[i]
|
223
|
+
if ch == '.' or ch == '#':
|
224
|
+
if token:
|
225
|
+
result.append(int(token) if token.isdigit() else token)
|
226
|
+
token = ""
|
227
|
+
else:
|
228
|
+
token += ch
|
229
|
+
if token:
|
230
|
+
result.append(int(token) if token.isdigit() else token)
|
231
|
+
return result
|
232
|
+
|
233
|
+
@classmethod
|
234
|
+
def parse_sheet_headers(cls, headers: Headers):
|
235
|
+
return [cls.parse_sheet_header(header)
|
236
|
+
for header in headers]
|
237
|
+
|
238
|
+
@classmethod
|
239
|
+
def compute_patch_prototype(cls, parsed_headers: ParsedHeaders):
|
240
|
+
prototype = {}
|
241
|
+
for parsed_header in parsed_headers:
|
242
|
+
parsed_header0 = parsed_header[0]
|
243
|
+
if isinstance(parsed_header0, int):
|
244
|
+
raise LoadTableError(f"A header cannot begin with a numeric ref: {parsed_header0}")
|
245
|
+
cls.assure_patch_prototype_shape(parent=prototype, keys=parsed_header)
|
246
|
+
return prototype
|
247
|
+
|
248
|
+
@classmethod
|
249
|
+
def assure_patch_prototype_shape(cls, *, parent: Union[Dict, List], keys: ParsedHeader):
|
250
|
+
[key0, *more_keys] = keys
|
251
|
+
key1 = more_keys[0] if more_keys else None
|
252
|
+
if isinstance(key1, int):
|
253
|
+
placeholder = []
|
254
|
+
elif isinstance(key1, str):
|
255
|
+
placeholder = {}
|
256
|
+
else:
|
257
|
+
placeholder = None
|
258
|
+
if isinstance(key0, int):
|
259
|
+
n = len(parent)
|
260
|
+
if key0 == n:
|
261
|
+
parent.append(placeholder)
|
262
|
+
elif key0 > n:
|
263
|
+
raise LoadTableError("Numeric items must occur sequentially.")
|
264
|
+
elif isinstance(key0, str):
|
265
|
+
if key0 not in parent:
|
266
|
+
parent[key0] = placeholder
|
267
|
+
if key1 is not None:
|
268
|
+
cls.assure_patch_prototype_shape(parent=parent[key0], keys=more_keys)
|
269
|
+
return parent
|
270
|
+
|
271
|
+
INSTAGUIDS_ENABLED = False # Experimental feature not enabled by default
|
272
|
+
|
273
|
+
@classmethod
|
274
|
+
def parse_item_value(cls, value: SheetCellValue, context=None) -> AnyJsonData:
|
275
|
+
# TODO: Remodularize this for easier testing and more Schema-driven effect
|
276
|
+
# Doug asks that this be broken up into different mechanisms, more modular and separately testable.
|
277
|
+
# I pretty much agree with that. I'm just waiting for suggestions on what kinds of features are desired.
|
278
|
+
if isinstance(value, str):
|
279
|
+
lvalue = value.lower()
|
280
|
+
# TODO: We could consult a schema to make this less heuristic, but this may do for now
|
281
|
+
if lvalue == 'true':
|
282
|
+
return True
|
283
|
+
elif lvalue == 'false':
|
284
|
+
return False
|
285
|
+
elif lvalue == 'null' or lvalue == '':
|
286
|
+
return None
|
287
|
+
elif '|' in value:
|
288
|
+
if value == '|': # Use '|' for []
|
289
|
+
return []
|
290
|
+
else:
|
291
|
+
if value.endswith("|"): # Use 'foo|' for ['foo']
|
292
|
+
value = value[:-1]
|
293
|
+
return [cls.parse_item_value(subvalue, context=context) for subvalue in value.split('|')]
|
294
|
+
elif cls.INSTAGUIDS_ENABLED and context is not None and value.startswith('#'):
|
295
|
+
# Note that this clause MUST follow '|' clause above so '#foo|#bar' isn't seen as instaguid
|
296
|
+
return cls.get_instaguid(value, context=context)
|
297
|
+
else:
|
298
|
+
# Doug points out that the schema might not agree, might want a string representation of a number.
|
299
|
+
# At this semantic layer, this might be a bad choice.
|
300
|
+
return prefer_number(value)
|
301
|
+
else: # presumably a number (int or float)
|
302
|
+
return value
|
303
|
+
|
304
|
+
@classmethod
|
305
|
+
def get_instaguid(cls, guid_placeholder: str, *, context: Optional[Dict] = None):
|
306
|
+
if context is None:
|
307
|
+
return guid_placeholder
|
308
|
+
else:
|
309
|
+
referent = context.get(guid_placeholder)
|
310
|
+
if not referent:
|
311
|
+
context[guid_placeholder] = referent = str(uuid.uuid4())
|
312
|
+
return referent
|
313
|
+
|
314
|
+
@classmethod
|
315
|
+
def set_path_value(cls, datum: Union[List, Dict], path: ParsedHeader, value: Any, force: bool = False):
|
316
|
+
if (value is None or value == '') and not force:
|
317
|
+
return
|
318
|
+
[key, *more_path] = path
|
319
|
+
if not more_path:
|
320
|
+
datum[key] = value
|
321
|
+
else:
|
322
|
+
cls.set_path_value(datum[key], more_path, value)
|
323
|
+
|
324
|
+
@classmethod
|
325
|
+
def find_type_hint(cls, parsed_header: Optional[ParsedHeader], schema: Any):
|
326
|
+
|
327
|
+
def finder(subheader, subschema):
|
328
|
+
if not parsed_header:
|
329
|
+
return None
|
330
|
+
else:
|
331
|
+
[key1, *other_headers] = subheader
|
332
|
+
if isinstance(key1, str) and isinstance(subschema, dict):
|
333
|
+
if subschema.get('type') == 'object':
|
334
|
+
def1 = subschema.get('properties', {}).get(key1)
|
335
|
+
if not other_headers:
|
336
|
+
if def1 is not None:
|
337
|
+
t = def1.get('type')
|
338
|
+
if t == 'string':
|
339
|
+
enum = def1.get('enum')
|
340
|
+
if enum:
|
341
|
+
mapping = {e.lower(): e for e in enum}
|
342
|
+
return EnumHint(mapping)
|
343
|
+
elif t == 'boolean':
|
344
|
+
return BoolHint()
|
345
|
+
else:
|
346
|
+
pass # fall through to asking super()
|
347
|
+
else:
|
348
|
+
pass # fall through to asking super()
|
349
|
+
else:
|
350
|
+
return finder(subheader=other_headers, subschema=def1)
|
351
|
+
|
352
|
+
return finder(subheader=parsed_header, subschema=schema)
|
353
|
+
|
354
|
+
@classmethod
|
355
|
+
def infer_tab_name(cls, filename):
|
356
|
+
return os.path.basename(filename).split('.')[0]
|
357
|
+
|
358
|
+
|
359
|
+
# TODO: Consider whether this might want to be an abstract base class. Some change might be needed.
|
360
|
+
#
|
361
|
+
# Doug thinks we might want (metaclass=ABCMeta) here to make this an abstract base class.
|
362
|
+
# I am less certain but open to discussion. Among other things, as implemented now,
|
363
|
+
# the __init__ method here needs to run and the documentation says that ABC's won't appear
|
364
|
+
# in the method resolution order. -kmp 17-Aug-2023
|
365
|
+
# See also discussion at https://github.com/4dn-dcic/utils/pull/276#discussion_r1297775535
|
366
|
+
class AbstractTableSetManager:
|
367
|
+
"""
|
368
|
+
The TableSetManager is the spanning class of anything that wants to be able to load a table set,
|
369
|
+
regardless of what it wants to load it from. To do this, it must support a load method
|
370
|
+
that takes a filename and returns the file content in the form:
|
371
|
+
{
|
372
|
+
"Sheet1": [
|
373
|
+
{...representation of row1 as some kind of dict...},
|
374
|
+
{...representation of row2 as some kind of dict...}
|
375
|
+
],
|
376
|
+
"Sheet2": [...],
|
377
|
+
...,
|
378
|
+
}
|
379
|
+
It also needs some implementation of the .tab_names property.
|
380
|
+
Note that at this level of abstraction, we take no position on what form of representation is used
|
381
|
+
for the rows, as long as it is JSON data of some kind. It might be
|
382
|
+
{"col1": "val1", "col2": "val2", ...}
|
383
|
+
or it might be something more structured like
|
384
|
+
{"something": "val1", {"something_else": ["val2"]}}
|
385
|
+
Additionally, the values stored might be altered as well. In particular, the most likely alteration
|
386
|
+
is to turn "123" to 123 or "" to None, though the specifics of whether and how such transformations
|
387
|
+
happen is not constrained by this class.
|
388
|
+
"""
|
389
|
+
|
390
|
+
ALLOWED_FILE_EXTENSIONS: List[str] = []
|
391
|
+
|
392
|
+
def __init__(self, filename: str, **kwargs):
|
393
|
+
self.filename: str = filename
|
394
|
+
unwanted_kwargs(context=self.__class__.__name__, kwargs=kwargs)
|
395
|
+
|
396
|
+
# TODO: Consider whether this should be an abstractmethod (but first see detailed design note at top of class.)
|
397
|
+
@classmethod
|
398
|
+
def load(cls, filename: str, **kwargs) -> TabbedSheetData:
|
399
|
+
"""
|
400
|
+
Reads a filename and returns a dictionary that maps sheet names to rows of dictionary data.
|
401
|
+
For more information, see documentation of AbstractTableSetManager.
|
402
|
+
"""
|
403
|
+
raise NotImplementedError(f".load(...) is not implemented for {cls.__name__}.") # noQA
|
404
|
+
|
405
|
+
@property
|
406
|
+
def tab_names(self) -> List[str]:
|
407
|
+
raise NotImplementedError(f".tab_names is not implemented for {self.__class__.__name__}..") # noQA
|
408
|
+
|
409
|
+
def load_content(self) -> Any:
|
410
|
+
raise NotImplementedError(f".load_content() is not implemented for {self.__class__.__name__}.") # noQA
|
411
|
+
|
412
|
+
|
413
|
+
class BasicTableSetManager(AbstractTableSetManager):
|
414
|
+
"""
|
415
|
+
A BasicTableManager provides some structure that most kinds of parsers will need.
|
416
|
+
In particular, everything will likely need some way of storing headers and some way of storing content
|
417
|
+
of each sheet. Even a csv file, which doesn't have multiple tabs can be seen as the degenerate case
|
418
|
+
of this where there's only one set of headers and only one block of content.
|
419
|
+
"""
|
420
|
+
|
421
|
+
def __init__(self, filename: str, **kwargs):
|
422
|
+
super().__init__(filename=filename, **kwargs)
|
423
|
+
self.headers_by_tab_name: Dict[str, Headers] = {}
|
424
|
+
self.content_by_tab_name: Dict[str, SheetData] = {}
|
425
|
+
self.reader_agent: Any = self._get_reader_agent()
|
426
|
+
|
427
|
+
def tab_headers(self, tab_name: str) -> Headers:
|
428
|
+
return self.headers_by_tab_name[tab_name]
|
429
|
+
|
430
|
+
def tab_content(self, tab_name: str) -> List[AnyJsonData]:
|
431
|
+
return self.content_by_tab_name[tab_name]
|
432
|
+
|
433
|
+
@classmethod
|
434
|
+
def _create_tab_processor_state(cls, tab_name: str) -> Any:
|
435
|
+
"""
|
436
|
+
This method provides for the possibility that some parsers will want auxiliary state,
|
437
|
+
(such as parsed headers or a line count or a table of temporary names for objects to cross-link
|
438
|
+
or some other such feature) that it carries with it as it moves from line to line parsing things.
|
439
|
+
Subclasses might therefore want to make this do something more interesting.
|
440
|
+
"""
|
441
|
+
ignored(tab_name) # subclasses might need this, but we don't
|
442
|
+
return None
|
443
|
+
|
444
|
+
def _get_reader_agent(self) -> Any:
|
445
|
+
"""This function is responsible for opening the workbook and returning a workbook object."""
|
446
|
+
raise NotImplementedError(f"._get_reader_agent() is not implemented for {self.__class__.__name__}.") # noQA
|
447
|
+
|
448
|
+
|
449
|
+
class SemanticTableSetManager(BasicTableSetManager):
|
450
|
+
"""
|
451
|
+
This is the base class for all workbook-like data sources, i.e., that may need to apply semantic processing.
|
452
|
+
Those may be:
|
453
|
+
* Excel workbook readers (.xlsx)
|
454
|
+
* Comma-separated file readers (.csv)
|
455
|
+
* Tab-separarated file readers (.tsv in most of the world, but Microsoft stupidly calls this .txt, outright
|
456
|
+
refusing to write a .tsv file, so many people seem to compromise and call this .tsv.txt)
|
457
|
+
There are two levels to each of these: a class that is not semantically interpreted,
|
458
|
+
and a class that is semantically interpreted as an "item".
|
459
|
+
|
460
|
+
This is NOT a parent class of these kinds of files, which we always take literally as if semantic processing
|
461
|
+
were already done (in part so that they can be used to test the results of other formats):
|
462
|
+
* Json files
|
463
|
+
* Yaml files
|
464
|
+
* Inserts directories
|
465
|
+
* JsonLines files
|
466
|
+
"""
|
467
|
+
|
468
|
+
@classmethod
|
469
|
+
def load(cls, filename: str, **kwargs) -> AnyJsonData:
|
470
|
+
if cls.ALLOWED_FILE_EXTENSIONS:
|
471
|
+
if not any(filename.lower().endswith(suffix) for suffix in cls.ALLOWED_FILE_EXTENSIONS):
|
472
|
+
raise LoadArgumentsError(f"The TableSetManager subclass {cls.__name__} expects only"
|
473
|
+
f" {disjoined_list(cls.ALLOWED_FILE_EXTENSIONS)} filenames: {filename}")
|
474
|
+
|
475
|
+
table_set_manager: SemanticTableSetManager = cls(filename=filename, **kwargs)
|
476
|
+
return table_set_manager.load_content()
|
477
|
+
|
478
|
+
def __init__(self, filename: str, **kwargs):
|
479
|
+
super().__init__(filename=filename, **kwargs)
|
480
|
+
|
481
|
+
def _raw_row_generator_for_tab_name(self, tab_name: str) -> Iterable[SheetRow]:
|
482
|
+
"""
|
483
|
+
Given a tab_name and a state (returned by _sheet_loader_state), return a generator for a set of row values.
|
484
|
+
"""
|
485
|
+
raise NotImplementedError(f"._rows_for_tab_name(...) is not implemented for {self.__class__.__name__}.") # noQA
|
486
|
+
|
487
|
+
def _process_row(self, tab_name: str, state: Any, row: List[SheetCellValue]) -> AnyJsonData:
|
488
|
+
"""
|
489
|
+
This needs to take a state and whatever represents a row and
|
490
|
+
must return a list of objects representing column values.
|
491
|
+
What constitutes a processed up to the class, but other than that the result must be a JSON dictionary.
|
492
|
+
"""
|
493
|
+
raise NotImplementedError(f"._process_row(...) is not implemented for {self.__class__.__name__}.") # noQA
|
494
|
+
|
495
|
+
def load_content(self) -> AnyJsonData:
|
496
|
+
for tab_name in self.tab_names:
|
497
|
+
sheet_content = []
|
498
|
+
state = self._create_tab_processor_state(tab_name)
|
499
|
+
for row_data in self._raw_row_generator_for_tab_name(tab_name):
|
500
|
+
processed_row_data: AnyJsonData = self._process_row(tab_name, state, row_data)
|
501
|
+
sheet_content.append(processed_row_data)
|
502
|
+
self.content_by_tab_name[tab_name] = sheet_content
|
503
|
+
return self.content_by_tab_name
|
504
|
+
|
505
|
+
@classmethod
|
506
|
+
def parse_cell_value(cls, value: SheetCellValue) -> AnyJsonData:
|
507
|
+
return prefer_number(value)
|
508
|
+
|
509
|
+
|
510
|
+
class AbstractItemManager(AbstractTableSetManager):
|
511
|
+
|
512
|
+
pass
|
513
|
+
|
514
|
+
|
515
|
+
class TableSetManagerRegistry:
|
516
|
+
|
517
|
+
def __init__(self):
|
518
|
+
self.manager_table: Dict[str, Type[AbstractTableSetManager]] = {}
|
519
|
+
self.regexp_mappings: List[Tuple[Regexp, Type[AbstractTableSetManager]]] = []
|
520
|
+
|
521
|
+
def register(self, regexp: Optional[str] = None):
|
522
|
+
def _wrapped_register(class_to_register: Type[AbstractTableSetManager]):
|
523
|
+
if regexp:
|
524
|
+
self.regexp_mappings.append((re.compile(regexp), class_to_register))
|
525
|
+
for ext in class_to_register.ALLOWED_FILE_EXTENSIONS:
|
526
|
+
existing = self.manager_table.get(ext)
|
527
|
+
if existing:
|
528
|
+
raise Exception(f"Tried to define {class_to_register} to extension {ext},"
|
529
|
+
f" but {existing} already claimed that.")
|
530
|
+
self.manager_table[ext] = class_to_register
|
531
|
+
return class_to_register
|
532
|
+
return _wrapped_register
|
533
|
+
|
534
|
+
register1 = register
|
535
|
+
|
536
|
+
def manager_for_filename(self, filename: str) -> Type[AbstractTableSetManager]:
|
537
|
+
base: str = os.path.basename(filename)
|
538
|
+
suffix_parts = base.split('.')[1:]
|
539
|
+
if suffix_parts:
|
540
|
+
for i in range(0, len(suffix_parts)):
|
541
|
+
suffix = f".{'.'.join(suffix_parts[i:])}"
|
542
|
+
found: Optional[Type[AbstractTableSetManager]] = self.manager_table.get(suffix)
|
543
|
+
if found:
|
544
|
+
return found
|
545
|
+
else:
|
546
|
+
special_case: Optional[Type[AbstractItemManager]] = self.manager_for_special_filename(filename)
|
547
|
+
if special_case:
|
548
|
+
return special_case
|
549
|
+
raise LoadArgumentsError(f"Unknown file type: {filename}")
|
550
|
+
|
551
|
+
def manager_for_special_filename(self, filename: str) -> Optional[Type[AbstractTableSetManager]]:
|
552
|
+
for pattern, manager_class in self.regexp_mappings:
|
553
|
+
if pattern.match(filename):
|
554
|
+
return manager_class
|
555
|
+
return None
|
556
|
+
|
557
|
+
|
558
|
+
TABLE_SET_MANAGER_REGISTRY = TableSetManagerRegistry()
|
559
|
+
ITEM_MANAGER_REGISTRY = TableSetManagerRegistry()
|
560
|
+
|
561
|
+
|
562
|
+
@TABLE_SET_MANAGER_REGISTRY.register()
|
563
|
+
class XlsxManager(SemanticTableSetManager):
|
564
|
+
"""
|
565
|
+
This implements the mechanism to get a series of rows out of the sheets in an XLSX file.
|
566
|
+
"""
|
567
|
+
|
568
|
+
ALLOWED_FILE_EXTENSIONS = ['.xlsx']
|
569
|
+
|
570
|
+
@classmethod
|
571
|
+
def _all_rows(cls, sheet: Worksheet):
|
572
|
+
row_max = sheet.max_row
|
573
|
+
for row in range(2, row_max + 1):
|
574
|
+
yield row
|
575
|
+
|
576
|
+
@classmethod
|
577
|
+
def _all_cols(cls, sheet: Worksheet):
|
578
|
+
col_max = sheet.max_column
|
579
|
+
for col in range(1, col_max + 1):
|
580
|
+
yield col
|
581
|
+
|
582
|
+
@property
|
583
|
+
def tab_names(self) -> List[str]:
|
584
|
+
return self.reader_agent.sheetnames
|
585
|
+
|
586
|
+
def _get_reader_agent(self) -> Workbook:
|
587
|
+
return openpyxl.load_workbook(self.filename)
|
588
|
+
|
589
|
+
def _raw_row_generator_for_tab_name(self, tab_name: str) -> Iterable[SheetRow]:
|
590
|
+
sheet = self.reader_agent[tab_name]
|
591
|
+
return (self._get_raw_row_content_tuple(sheet, row)
|
592
|
+
for row in self._all_rows(sheet))
|
593
|
+
|
594
|
+
def _get_raw_row_content_tuple(self, sheet: Worksheet, row: int) -> SheetRow:
|
595
|
+
return [sheet.cell(row=row, column=col).value
|
596
|
+
for col in self._all_cols(sheet)]
|
597
|
+
|
598
|
+
def _create_tab_processor_state(self, tab_name: str) -> Headers:
|
599
|
+
sheet = self.reader_agent[tab_name]
|
600
|
+
headers: Headers = [str(sheet.cell(row=1, column=col).value)
|
601
|
+
for col in self._all_cols(sheet)]
|
602
|
+
self.headers_by_tab_name[sheet.title] = headers
|
603
|
+
return headers
|
604
|
+
|
605
|
+
def _process_row(self, tab_name: str, headers: Headers, row_data: SheetRow) -> AnyJsonData:
|
606
|
+
ignored(tab_name)
|
607
|
+
return {headers[i]: self.parse_cell_value(row_datum)
|
608
|
+
for i, row_datum in enumerate(row_data)}
|
609
|
+
|
610
|
+
|
611
|
+
class SchemaAutoloadMixin(AbstractTableSetManager):
|
612
|
+
|
613
|
+
SCHEMA_CACHE = {} # Shared cache. Do not override. Use .clear_schema_cache() to clear it.
|
614
|
+
CACHE_SCHEMAS = True # Controls whether we're doing caching at all
|
615
|
+
AUTOLOAD_SCHEMAS_DEFAULT = True
|
616
|
+
|
617
|
+
def __init__(self, filename: str, autoload_schemas: Optional[bool] = None, portal_env: Optional[str] = None,
|
618
|
+
portal_vapp: Optional[AbstractVirtualApp] = None, **kwargs):
|
619
|
+
# This setup must be in place before the class initialization is done (via the super call).
|
620
|
+
self.autoload_schemas: bool = self.AUTOLOAD_SCHEMAS_DEFAULT if autoload_schemas is None else autoload_schemas
|
621
|
+
if self.autoload_schemas: # If autoload_schemas is False, we don't care about doing this defaulting.
|
622
|
+
if portal_env is None and portal_vapp is None:
|
623
|
+
portal_env = public_env_name(EnvUtils.PRD_ENV_NAME)
|
624
|
+
PRINT(f"The portal_env was not explicitly supplied. Schemas will come from portal_env={portal_env!r}.")
|
625
|
+
self.portal_env: Optional[str] = portal_env
|
626
|
+
self.portal_vapp: Optional[AbstractVirtualApp] = portal_vapp
|
627
|
+
super().__init__(filename=filename, **kwargs)
|
628
|
+
|
629
|
+
def fetch_relevant_schemas(self, schema_names: List[str]):
|
630
|
+
# The schema_names argument is not normally given, but it is there for easier testing
|
631
|
+
def fetch_schema(schema_name):
|
632
|
+
schema = self.fetch_schema(schema_name, portal_env=self.portal_env, portal_vapp=self.portal_vapp)
|
633
|
+
return schema_name, schema
|
634
|
+
if self.autoload_schemas and (self.portal_env or self.portal_vapp):
|
635
|
+
autoloaded = {tab_name: schema
|
636
|
+
for tab_name, schema in pmap(fetch_schema, schema_names)}
|
637
|
+
return autoloaded
|
638
|
+
else:
|
639
|
+
return {}
|
640
|
+
|
641
|
+
@classmethod
|
642
|
+
def fetch_schema(cls, schema_name: str, *, portal_env: Optional[str] = None,
|
643
|
+
portal_vapp: Optional[AbstractVirtualApp] = None):
|
644
|
+
def just_fetch_it():
|
645
|
+
return get_schema(schema_name, portal_env=portal_env, portal_vapp=portal_vapp)
|
646
|
+
if cls.CACHE_SCHEMAS:
|
647
|
+
schema: Optional[AnyJsonData] = cls.SCHEMA_CACHE.get(schema_name)
|
648
|
+
if schema is None:
|
649
|
+
cls.SCHEMA_CACHE[schema_name] = schema = just_fetch_it()
|
650
|
+
return schema
|
651
|
+
else:
|
652
|
+
return just_fetch_it()
|
653
|
+
|
654
|
+
@classmethod
|
655
|
+
def clear_schema_cache(cls):
|
656
|
+
for key in list(cls.SCHEMA_CACHE.keys()): # important to get the list of keys as a separate object first
|
657
|
+
cls.SCHEMA_CACHE.pop(key, None)
|
658
|
+
|
659
|
+
|
660
|
+
class ItemManagerMixin(SchemaAutoloadMixin, AbstractItemManager, BasicTableSetManager):
|
661
|
+
"""
|
662
|
+
This can add functionality to a reader such as an XlsxManager or a CsvManager in order to make its rows
|
663
|
+
get handled like Items instead of just flat table rows.
|
664
|
+
"""
|
665
|
+
|
666
|
+
def __init__(self, filename: str, schemas: Optional[Dict[str, AnyJsonData]] = None, **kwargs):
|
667
|
+
super().__init__(filename=filename, **kwargs)
|
668
|
+
self.patch_prototypes_by_tab_name: Dict[str, Dict] = {}
|
669
|
+
self.parsed_headers_by_tab_name: Dict[str, ParsedHeaders] = {}
|
670
|
+
self.type_hints_by_tab_name: Dict[str, OptionalTypeHints] = {}
|
671
|
+
self._schemas = schemas
|
672
|
+
self._instaguid_context_table: Dict[str, str] = {}
|
673
|
+
|
674
|
+
@property
|
675
|
+
def schemas(self):
|
676
|
+
schemas = self._schemas
|
677
|
+
if schemas is None:
|
678
|
+
self._schemas = schemas = self.fetch_relevant_schemas(self.tab_names)
|
679
|
+
return schemas
|
680
|
+
|
681
|
+
def sheet_patch_prototype(self, tab_name: str) -> Dict:
|
682
|
+
return self.patch_prototypes_by_tab_name[tab_name]
|
683
|
+
|
684
|
+
def sheet_parsed_headers(self, tab_name: str) -> ParsedHeaders:
|
685
|
+
return self.parsed_headers_by_tab_name[tab_name]
|
686
|
+
|
687
|
+
def sheet_type_hints(self, tab_name: str) -> OptionalTypeHints:
|
688
|
+
return self.type_hints_by_tab_name[tab_name]
|
689
|
+
|
690
|
+
class SheetState:
|
691
|
+
|
692
|
+
def __init__(self, parsed_headers: ParsedHeaders, type_hints: OptionalTypeHints):
|
693
|
+
self.parsed_headers = parsed_headers
|
694
|
+
self.type_hints = type_hints
|
695
|
+
|
696
|
+
def _compile_type_hints(self, tab_name: str):
|
697
|
+
parsed_headers = self.sheet_parsed_headers(tab_name)
|
698
|
+
schema = self.schemas.get(tab_name)
|
699
|
+
with deferred_problems() as note_problem:
|
700
|
+
for required_header in self._schema_required_headers(schema):
|
701
|
+
if required_header not in parsed_headers:
|
702
|
+
note_problem("Missing required header")
|
703
|
+
type_hints = [ItemTools.find_type_hint(parsed_header, schema) if schema else None
|
704
|
+
for parsed_header in parsed_headers]
|
705
|
+
self.type_hints_by_tab_name[tab_name] = type_hints
|
706
|
+
|
707
|
+
@classmethod
|
708
|
+
def _schema_required_headers(cls, schema):
|
709
|
+
ignored(schema)
|
710
|
+
return [] # TODO: Make this compute a list of required headers (in parsed header form)
|
711
|
+
|
712
|
+
def _compile_sheet_headers(self, tab_name: str):
|
713
|
+
headers = self.headers_by_tab_name[tab_name]
|
714
|
+
parsed_headers = ItemTools.parse_sheet_headers(headers)
|
715
|
+
self.parsed_headers_by_tab_name[tab_name] = parsed_headers
|
716
|
+
prototype = ItemTools.compute_patch_prototype(parsed_headers)
|
717
|
+
self.patch_prototypes_by_tab_name[tab_name] = prototype
|
718
|
+
|
719
|
+
def _create_tab_processor_state(self, tab_name: str) -> SheetState:
|
720
|
+
super()._create_tab_processor_state(tab_name)
|
721
|
+
# This will create state that allows us to efficiently assign values in the right place on each row
|
722
|
+
# by setting up a prototype we can copy and then drop values into.
|
723
|
+
self._compile_sheet_headers(tab_name)
|
724
|
+
self._compile_type_hints(tab_name)
|
725
|
+
return self.SheetState(parsed_headers=self.sheet_parsed_headers(tab_name),
|
726
|
+
type_hints=self.sheet_type_hints(tab_name))
|
727
|
+
|
728
|
+
def _process_row(self, tab_name: str, state: SheetState, row_data: SheetRow) -> AnyJsonData:
|
729
|
+
parsed_headers = state.parsed_headers
|
730
|
+
type_hints = state.type_hints
|
731
|
+
patch_item = copy.deepcopy(self.sheet_patch_prototype(tab_name))
|
732
|
+
for i, value in enumerate(row_data):
|
733
|
+
parsed_value = self.parse_cell_value(value)
|
734
|
+
type_hint = type_hints[i]
|
735
|
+
if type_hint:
|
736
|
+
parsed_value = type_hint.apply_hint(parsed_value)
|
737
|
+
ItemTools.set_path_value(patch_item, parsed_headers[i], parsed_value)
|
738
|
+
return patch_item
|
739
|
+
|
740
|
+
def parse_cell_value(self, value: SheetCellValue) -> AnyJsonData:
|
741
|
+
return ItemTools.parse_item_value(value, context=self._instaguid_context_table)
|
742
|
+
|
743
|
+
|
744
|
+
@ITEM_MANAGER_REGISTRY.register()
|
745
|
+
class XlsxItemManager(ItemManagerMixin, XlsxManager):
|
746
|
+
"""
|
747
|
+
This layers item-style row processing functionality on an XLSX file.
|
748
|
+
"""
|
749
|
+
pass
|
750
|
+
|
751
|
+
|
752
|
+
class SingleTableMixin(AbstractTableSetManager):
|
753
|
+
|
754
|
+
def __init__(self, filename: str, tab_name: Optional[str] = None, **kwargs):
|
755
|
+
self._tab_name = tab_name or ItemTools.infer_tab_name(filename)
|
756
|
+
super().__init__(filename=filename, **kwargs)
|
757
|
+
|
758
|
+
@property
|
759
|
+
def tab_names(self) -> List[str]:
|
760
|
+
return [self._tab_name]
|
761
|
+
|
762
|
+
|
763
|
+
class InsertsManager(BasicTableSetManager): # ItemManagerMixin isn't really appropriate here
|
764
|
+
|
765
|
+
ALLOWED_FILE_EXTENSIONS = []
|
766
|
+
|
767
|
+
def _parse_inserts_data(self, filename: str) -> AnyJsonData:
|
768
|
+
raise NotImplementedError(f"._parse_inserts_dataa(...) is not implemented for {self.__class__.__name__}.") # noQA
|
769
|
+
|
770
|
+
def _load_inserts_data(self, filename: str) -> TabbedSheetData:
|
771
|
+
data: AnyJsonData = self._parse_inserts_data(filename)
|
772
|
+
tabbed_inserts: AnyJsonData = self._wrap_inserts_data(filename, data)
|
773
|
+
if (not isinstance(tabbed_inserts, dict)
|
774
|
+
or not all(isinstance(tab_name, str) for tab_name in tabbed_inserts.keys())
|
775
|
+
or not all(isinstance(content, list) and all(isinstance(item, dict) for item in content)
|
776
|
+
for content in tabbed_inserts.values())):
|
777
|
+
raise ValueError(f"Data in {filename} is not of type TabbedSheetData (Dict[str, List[dict]]).")
|
778
|
+
tabbed_inserts: TabbedSheetData # we've just checked that
|
779
|
+
return tabbed_inserts
|
780
|
+
|
781
|
+
@classmethod
|
782
|
+
def _wrap_inserts_data(cls, filename: str, data: AnyJsonData) -> AnyJsonData:
|
783
|
+
ignored(filename)
|
784
|
+
return data
|
785
|
+
|
786
|
+
@property
|
787
|
+
def tab_names(self) -> List[str]:
|
788
|
+
return list(self.content_by_tab_name.keys())
|
789
|
+
|
790
|
+
def _get_reader_agent(self) -> Any:
|
791
|
+
return self
|
792
|
+
|
793
|
+
def load_content(self) -> Dict[str, AnyJsonData]:
|
794
|
+
data = self._load_inserts_data(self.filename)
|
795
|
+
for tab_name, tab_content in data.items():
|
796
|
+
self.content_by_tab_name[tab_name] = tab_content
|
797
|
+
if not tab_content:
|
798
|
+
self.headers_by_tab_name[tab_name] = []
|
799
|
+
else:
|
800
|
+
self.headers_by_tab_name[tab_name] = list(tab_content[0].keys())
|
801
|
+
return self.content_by_tab_name
|
802
|
+
|
803
|
+
|
804
|
+
class SimpleInsertsMixin(SingleTableMixin):
|
805
|
+
|
806
|
+
def _wrap_inserts_data(self, filename: str, data: AnyJsonData) -> TabbedSheetData:
|
807
|
+
if (not isinstance(data, list)
|
808
|
+
or not all(isinstance(item, dict) for item in data)):
|
809
|
+
raise ValueError(f"Data in {filename} is not of type SheetData (List[dict]).")
|
810
|
+
return {self._tab_name: data}
|
811
|
+
|
812
|
+
|
813
|
+
class JsonInsertsMixin:
|
814
|
+
|
815
|
+
@classmethod
|
816
|
+
def _parse_inserts_data(cls, filename: str) -> AnyJsonData:
|
817
|
+
return json.load(open_unicode_text_input_file_respecting_byte_order_mark(filename))
|
818
|
+
|
819
|
+
|
820
|
+
@TABLE_SET_MANAGER_REGISTRY.register()
|
821
|
+
class TabbedJsonInsertsManager(JsonInsertsMixin, InsertsManager):
|
822
|
+
|
823
|
+
ALLOWED_FILE_EXTENSIONS = [".tabs.json"] # If you want them all in one family, use this extension
|
824
|
+
|
825
|
+
|
826
|
+
@TABLE_SET_MANAGER_REGISTRY.register()
|
827
|
+
class SimpleJsonInsertsManager(SimpleInsertsMixin, JsonInsertsMixin, InsertsManager):
|
828
|
+
|
829
|
+
ALLOWED_FILE_EXTENSIONS = [".json"]
|
830
|
+
|
831
|
+
|
832
|
+
class YamlInsertsMixin:
|
833
|
+
|
834
|
+
def _parse_inserts_data(self, filename) -> AnyJsonData:
|
835
|
+
return yaml.safe_load(open_unicode_text_input_file_respecting_byte_order_mark(filename))
|
836
|
+
|
837
|
+
|
838
|
+
@TABLE_SET_MANAGER_REGISTRY.register()
|
839
|
+
class TabbedYamlInsertsManager(YamlInsertsMixin, InsertsManager):
|
840
|
+
|
841
|
+
ALLOWED_FILE_EXTENSIONS = [".tabs.yaml"]
|
842
|
+
|
843
|
+
def _parse_inserts_data(self, filename) -> AnyJsonData:
|
844
|
+
return yaml.safe_load(open_unicode_text_input_file_respecting_byte_order_mark(filename))
|
845
|
+
|
846
|
+
|
847
|
+
@TABLE_SET_MANAGER_REGISTRY.register()
|
848
|
+
class SimpleYamlInsertsManager(SimpleInsertsMixin, YamlInsertsMixin, InsertsManager):
|
849
|
+
|
850
|
+
ALLOWED_FILE_EXTENSIONS = [".yaml"]
|
851
|
+
|
852
|
+
|
853
|
+
class InsertsItemMixin(AbstractItemManager): # ItemManagerMixin isn't really appropriate here
|
854
|
+
"""
|
855
|
+
This class is used for inserts directories and other JSON-like data that will be literally used as an Item
|
856
|
+
without semantic pre-processing. In other words, these classes will not be pre-checked for semantic correctness
|
857
|
+
but instead assumed to have been checked by other means.
|
858
|
+
"""
|
859
|
+
|
860
|
+
AUTOLOAD_SCHEMAS_DEFAULT = False # Has no effect, but someone might inspect the value.
|
861
|
+
|
862
|
+
def __init__(self, filename: str, *, autoload_schemas: Optional[bool] = None, portal_env: Optional[str] = None,
|
863
|
+
portal_vapp: Optional[AbstractVirtualApp] = None, schemas: Optional[Dict[str, AnyJsonData]] = None,
|
864
|
+
**kwargs):
|
865
|
+
ignored(portal_env, portal_vapp) # Would only be used if autoload_schemas was true, and we don't allow that.
|
866
|
+
if schemas not in [None, {}]:
|
867
|
+
raise ValueError(f"{self.__class__.__name__} does not allow schemas={schemas!r}.")
|
868
|
+
if autoload_schemas not in [None, False]:
|
869
|
+
raise ValueError(f"{self.__class__.__name__} does not allow autoload_schemas={autoload_schemas!r}.")
|
870
|
+
super().__init__(filename=filename, **kwargs)
|
871
|
+
|
872
|
+
|
873
|
+
@ITEM_MANAGER_REGISTRY.register()
|
874
|
+
class TabbedJsonInsertsItemManager(InsertsItemMixin, TabbedJsonInsertsManager):
|
875
|
+
pass
|
876
|
+
|
877
|
+
|
878
|
+
@ITEM_MANAGER_REGISTRY.register()
|
879
|
+
class SimpleJsonInsertsItemManager(InsertsItemMixin, SimpleJsonInsertsManager):
|
880
|
+
pass
|
881
|
+
|
882
|
+
|
883
|
+
@ITEM_MANAGER_REGISTRY.register()
|
884
|
+
class TabbedYamlInsertsItemManager(InsertsItemMixin, TabbedYamlInsertsManager):
|
885
|
+
pass
|
886
|
+
|
887
|
+
|
888
|
+
@ITEM_MANAGER_REGISTRY.register()
|
889
|
+
class SimpleYamlInsertsItemManager(InsertsItemMixin, SimpleYamlInsertsManager):
|
890
|
+
pass
|
891
|
+
|
892
|
+
|
893
|
+
@TABLE_SET_MANAGER_REGISTRY.register()
|
894
|
+
class SimpleJsonLinesInsertsManager(SimpleInsertsMixin, InsertsManager):
|
895
|
+
|
896
|
+
ALLOWED_FILE_EXTENSIONS = [".jsonl"]
|
897
|
+
|
898
|
+
def _parse_inserts_data(self, filename: str) -> AnyJsonData:
|
899
|
+
return [line for line in JsonLinesReader(open_unicode_text_input_file_respecting_byte_order_mark(filename))]
|
900
|
+
|
901
|
+
|
902
|
+
@ITEM_MANAGER_REGISTRY.register()
|
903
|
+
class SimpleJsonLinesInsertsItemManager(InsertsItemMixin, SimpleJsonLinesInsertsManager):
|
904
|
+
pass
|
905
|
+
|
906
|
+
|
907
|
+
@TABLE_SET_MANAGER_REGISTRY.register(regexp="^(.*/)?(|[^/]*[-_])inserts/?$")
|
908
|
+
class InsertsDirectoryManager(InsertsManager):
|
909
|
+
|
910
|
+
ALLOWED_FILE_EXTENSIONS = []
|
911
|
+
|
912
|
+
def _parse_inserts_data(self, filename: str) -> AnyJsonData:
|
913
|
+
if not os.path.isdir(filename):
|
914
|
+
raise LoadArgumentsError(f"{filename} is not the name of an inserts directory.")
|
915
|
+
tab_files = glob.glob(os.path.join(filename, "*.json"))
|
916
|
+
data = {}
|
917
|
+
for tab_file in tab_files:
|
918
|
+
tab_content = json.load(open_unicode_text_input_file_respecting_byte_order_mark(tab_file))
|
919
|
+
# Here we don't use os.path.splitext because we want to split on the first dot.
|
920
|
+
# e.g., for foo.bar.baz, return just foo
|
921
|
+
# this allows names like ExperimentSet.tab.json that might need to use multi-dot suffixes
|
922
|
+
# for things unrelated to the tab name.
|
923
|
+
tab_name = os.path.basename(tab_file).split('.')[0]
|
924
|
+
data[tab_name] = tab_content
|
925
|
+
return data
|
926
|
+
|
927
|
+
|
928
|
+
@ITEM_MANAGER_REGISTRY.register(regexp="^(.*/)?(|[^/]*[-_])inserts/?$")
|
929
|
+
class InsertsDirectoryItemManager(InsertsItemMixin, InsertsDirectoryManager):
|
930
|
+
pass
|
931
|
+
|
932
|
+
|
933
|
+
@TABLE_SET_MANAGER_REGISTRY.register()
|
934
|
+
class CsvManager(SingleTableMixin, SemanticTableSetManager):
|
935
|
+
"""
|
936
|
+
This implements the mechanism to get a series of rows out of the sheet in a csv file,
|
937
|
+
returning a result that still looks like there could have been multiple tabs.
|
938
|
+
"""
|
939
|
+
|
940
|
+
ALLOWED_FILE_EXTENSIONS = ['.csv']
|
941
|
+
|
942
|
+
def __init__(self, filename: str, escaping: Optional[bool] = None, **kwargs):
|
943
|
+
super().__init__(filename=filename, **kwargs)
|
944
|
+
self.escaping: bool = escaping or False
|
945
|
+
|
946
|
+
def _get_reader_agent(self) -> CsvReader:
|
947
|
+
return self._get_reader_agent_for_filename(self.filename)
|
948
|
+
|
949
|
+
@classmethod
|
950
|
+
def _get_reader_agent_for_filename(cls, filename) -> CsvReader:
|
951
|
+
return csv.reader(open_unicode_text_input_file_respecting_byte_order_mark(filename))
|
952
|
+
|
953
|
+
PAD_TRAILING_TABS = True
|
954
|
+
|
955
|
+
def _raw_row_generator_for_tab_name(self, tab_name: str) -> Iterable[SheetRow]:
|
956
|
+
headers = self.tab_headers(tab_name)
|
957
|
+
n_headers = len(headers)
|
958
|
+
for row_data in self.reader_agent:
|
959
|
+
if self.PAD_TRAILING_TABS:
|
960
|
+
row_data = pad_to(n_headers, row_data, padding='')
|
961
|
+
yield row_data
|
962
|
+
|
963
|
+
def _create_tab_processor_state(self, tab_name: str) -> Headers:
|
964
|
+
headers: Optional[Headers] = self.headers_by_tab_name.get(tab_name)
|
965
|
+
if headers is None:
|
966
|
+
self.headers_by_tab_name[tab_name] = headers = self.reader_agent.__next__()
|
967
|
+
return headers
|
968
|
+
|
969
|
+
@classmethod
|
970
|
+
def _escape_cell_text(cls, cell_text):
|
971
|
+
if '\\' in cell_text:
|
972
|
+
return expand_string_escape_sequences(cell_text)
|
973
|
+
else:
|
974
|
+
return cell_text
|
975
|
+
|
976
|
+
def _process_row(self, tab_name: str, headers: Headers, row_data: SheetRow) -> AnyJsonData:
|
977
|
+
ignored(tab_name)
|
978
|
+
if self.escaping:
|
979
|
+
return {headers[i]: self.parse_cell_value(self._escape_cell_text(cell_text))
|
980
|
+
for i, cell_text in enumerate(row_data)}
|
981
|
+
else:
|
982
|
+
return {headers[i]: self.parse_cell_value(cell_text)
|
983
|
+
for i, cell_text in enumerate(row_data)}
|
984
|
+
|
985
|
+
|
986
|
+
@ITEM_MANAGER_REGISTRY.register()
|
987
|
+
class CsvItemManager(ItemManagerMixin, CsvManager):
|
988
|
+
"""
|
989
|
+
This layers item-style row processing functionality on a CSV file.
|
990
|
+
"""
|
991
|
+
pass
|
992
|
+
|
993
|
+
|
994
|
+
@TABLE_SET_MANAGER_REGISTRY.register()
|
995
|
+
class TsvManager(CsvManager):
|
996
|
+
"""
|
997
|
+
TSV files are just CSV files with tabs instead of commas as separators.
|
998
|
+
(We do not presently handle any escaping of strange characters. May need to add handling for backslash escaping.)
|
999
|
+
"""
|
1000
|
+
ALLOWED_FILE_EXTENSIONS = ['.tsv', '.tsv.txt']
|
1001
|
+
|
1002
|
+
@classmethod
|
1003
|
+
def _get_reader_agent_for_filename(cls, filename) -> CsvReader:
|
1004
|
+
return csv.reader(open_unicode_text_input_file_respecting_byte_order_mark(filename), delimiter='\t')
|
1005
|
+
|
1006
|
+
|
1007
|
+
@ITEM_MANAGER_REGISTRY.register()
|
1008
|
+
class TsvItemManager(ItemManagerMixin, TsvManager):
|
1009
|
+
"""
|
1010
|
+
This layers item-style row processing functionality on a TSV file.
|
1011
|
+
"""
|
1012
|
+
pass
|
1013
|
+
|
1014
|
+
|
1015
|
+
def _do_shell_command(command, cwd=None):
|
1016
|
+
# This might need to be more elaborate, but hopefully it will do for now. -kmp 11-Sep-2023
|
1017
|
+
subprocess.check_output(command, cwd=cwd)
|
1018
|
+
|
1019
|
+
|
1020
|
+
@contextlib.contextmanager
|
1021
|
+
def maybe_unpack(filename): # Maybe move to another module
|
1022
|
+
"""
|
1023
|
+
If necessary, unpack a file that is zipped and/or tarred, yielding the name of the file (unpacked or not).
|
1024
|
+
"""
|
1025
|
+
unpackables = ['.tar.gz', '.tar', '.tgz', '.gz', '.zip']
|
1026
|
+
ext = None
|
1027
|
+
for unpackable in unpackables:
|
1028
|
+
if filename.endswith(unpackable):
|
1029
|
+
ext = unpackable
|
1030
|
+
break
|
1031
|
+
if not ext:
|
1032
|
+
yield filename
|
1033
|
+
return
|
1034
|
+
if not os.path.exists(filename):
|
1035
|
+
# We don't bother to raise this error if we're not planning to do any unpacking.
|
1036
|
+
# The caller can decide if/when such errors are needed in that case.
|
1037
|
+
# But if we are going to have to move bits around, they'll need to actually be there.
|
1038
|
+
# -kmp 12-Sep-2023
|
1039
|
+
raise ValueError(f"The file {filename!r} does not exist.")
|
1040
|
+
target_base_part = remove_suffix(ext, os.path.basename(filename), required=True)
|
1041
|
+
target_ext = '.tar.gz' if ext == '.tgz' else ext
|
1042
|
+
with TemporaryDirectory() as temp_dir:
|
1043
|
+
temp_base = os.path.join(temp_dir, target_base_part)
|
1044
|
+
temp_filename = temp_base + target_ext
|
1045
|
+
_do_shell_command(['cp', filename, temp_filename])
|
1046
|
+
if temp_filename.endswith('.gz'):
|
1047
|
+
_do_shell_command(['gunzip', temp_filename], cwd=temp_dir)
|
1048
|
+
temp_filename = remove_suffix('.gz', temp_filename)
|
1049
|
+
elif temp_filename.endswith(".zip"):
|
1050
|
+
_do_shell_command(['unzip', temp_filename], cwd=temp_dir)
|
1051
|
+
temp_filename = remove_suffix('.zip', temp_filename)
|
1052
|
+
if temp_filename.endswith(".tar"):
|
1053
|
+
_do_shell_command(['tar', '-xf', temp_filename], cwd=temp_dir)
|
1054
|
+
tar_file = temp_filename
|
1055
|
+
temp_filename = remove_suffix(".tar", temp_filename, required=True)
|
1056
|
+
if not os.path.isdir(temp_filename):
|
1057
|
+
raise Exception(f"{tar_file} didn't unpack to a dir: {temp_filename}")
|
1058
|
+
# print(f"Unpacked {filename} to {temp_filename}")
|
1059
|
+
yield temp_filename
|
1060
|
+
|
1061
|
+
|
1062
|
+
class TableSetManager(AbstractTableSetManager):
|
1063
|
+
"""
|
1064
|
+
This class will open a .xlsx or .csv file and load its content in our standard format.
|
1065
|
+
(See more detailed description in AbstractTableManager.)
|
1066
|
+
"""
|
1067
|
+
|
1068
|
+
@classmethod
|
1069
|
+
def create_implementation_manager(cls, filename: str, **kwargs) -> AbstractTableSetManager:
|
1070
|
+
reader_agent_class = TABLE_SET_MANAGER_REGISTRY.manager_for_filename(filename)
|
1071
|
+
if issubclass(reader_agent_class, AbstractItemManager):
|
1072
|
+
raise ValueError(f"TableSetManager unexpectedly found reader agent class {reader_agent_class}.")
|
1073
|
+
reader_agent = reader_agent_class(filename=filename, **kwargs)
|
1074
|
+
return reader_agent
|
1075
|
+
|
1076
|
+
@classmethod
|
1077
|
+
def load(cls, filename: str, tab_name: Optional[str] = None, escaping: Optional[bool] = None,
|
1078
|
+
**kwargs) -> TabbedSheetData:
|
1079
|
+
"""
|
1080
|
+
Given a filename and various options
|
1081
|
+
"""
|
1082
|
+
with maybe_unpack(filename) as filename:
|
1083
|
+
manager = cls.create_implementation_manager(filename=filename, tab_name=tab_name, escaping=escaping,
|
1084
|
+
**kwargs)
|
1085
|
+
return manager.load_content()
|
1086
|
+
|
1087
|
+
|
1088
|
+
class ItemManager(AbstractTableSetManager):
|
1089
|
+
"""
|
1090
|
+
This class will open a .xlsx or .csv file and load its content in our standard format.
|
1091
|
+
(See more detailed description in AbstractTableManager.)
|
1092
|
+
"""
|
1093
|
+
|
1094
|
+
@classmethod
|
1095
|
+
def create_implementation_manager(cls, filename: str, **kwargs) -> AbstractItemManager:
|
1096
|
+
reader_agent_class: Type[AbstractTableSetManager] = ITEM_MANAGER_REGISTRY.manager_for_filename(filename)
|
1097
|
+
if not issubclass(reader_agent_class, AbstractItemManager):
|
1098
|
+
raise ValueError(f"ItemManager unexpectedly found reader agent class {reader_agent_class}.")
|
1099
|
+
reader_agent_class: Type[AbstractItemManager]
|
1100
|
+
reader_agent = reader_agent_class(filename=filename, **kwargs)
|
1101
|
+
return reader_agent
|
1102
|
+
|
1103
|
+
@classmethod
|
1104
|
+
def load(cls, filename: str, tab_name: Optional[str] = None, escaping: Optional[bool] = None,
|
1105
|
+
schemas: Optional[Dict] = None, autoload_schemas: Optional[bool] = None,
|
1106
|
+
portal_env: Optional[str] = None, portal_vapp: Optional[AbstractVirtualApp] = None,
|
1107
|
+
**kwargs) -> TabbedSheetData:
|
1108
|
+
"""
|
1109
|
+
Given a filename and various options, loads the items associated with that filename.
|
1110
|
+
|
1111
|
+
:param filename: The name of the file to load.
|
1112
|
+
:param tab_name: For files that lack multiple tabs (such as .csv or .tsv),
|
1113
|
+
the tab name to associate with the data.
|
1114
|
+
:param escaping: Whether to perform escape processing on backslashes.
|
1115
|
+
:param schemas: A set of schemas to use instead of trying to load them.
|
1116
|
+
:param autoload_schemas: Whether to try autoloading schemas.
|
1117
|
+
:param portal_env: A portal to consult to find schemas (usually if calling from the outside of a portal).
|
1118
|
+
:param portal_vapp: A vapp to use (usually if calling from within a portal).
|
1119
|
+
"""
|
1120
|
+
|
1121
|
+
with maybe_unpack(filename) as filename:
|
1122
|
+
|
1123
|
+
manager = cls.create_implementation_manager(filename=filename, tab_name=tab_name, escaping=escaping,
|
1124
|
+
schemas=schemas, autoload_schemas=autoload_schemas,
|
1125
|
+
portal_env=portal_env, portal_vapp=portal_vapp,
|
1126
|
+
**kwargs)
|
1127
|
+
return manager.load_content()
|
1128
|
+
|
1129
|
+
|
1130
|
+
load_table_set = TableSetManager.load
|
1131
|
+
load_items = ItemManager.load
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: dcicutils
|
3
|
-
Version: 7.11.0
|
3
|
+
Version: 7.11.0.1b9
|
4
4
|
Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
|
5
5
|
Home-page: https://github.com/4dn-dcic/utils
|
6
6
|
License: MIT
|
@@ -25,9 +25,11 @@ Requires-Dist: PyYAML (>=5.1,<5.5)
|
|
25
25
|
Requires-Dist: aws-requests-auth (>=0.4.2,<1)
|
26
26
|
Requires-Dist: boto3 (>=1.17.39,<2.0.0)
|
27
27
|
Requires-Dist: botocore (>=1.20.39,<2.0.0)
|
28
|
+
Requires-Dist: chardet (>=5.2.0,<6.0.0)
|
28
29
|
Requires-Dist: docker (>=4.4.4,<5.0.0)
|
29
30
|
Requires-Dist: elasticsearch (==7.13.4)
|
30
31
|
Requires-Dist: gitpython (>=3.1.2,<4.0.0)
|
32
|
+
Requires-Dist: openpyxl (>=3.1.2,<4.0.0)
|
31
33
|
Requires-Dist: opensearch-py (>=2.0.1,<3.0.0)
|
32
34
|
Requires-Dist: pyOpenSSL (>=23.1.1,<24.0.0)
|
33
35
|
Requires-Dist: pytz (>=2020.4)
|
@@ -32,7 +32,7 @@ dcicutils/kibana/readme.md,sha256=3KmHF9FH6A6xwYsNxRFLw27q0XzHYnjZOlYUnn3VkQQ,21
|
|
32
32
|
dcicutils/lang_utils.py,sha256=cVLRUGyYeSPJAq3z_RJjA6miajHrXoi6baxF8HzHmLc,27797
|
33
33
|
dcicutils/license_utils.py,sha256=OhOfTXFivvb6Y3tiJAb1b9Is-OTpBfZjC18M-RvqBqk,40456
|
34
34
|
dcicutils/log_utils.py,sha256=7pWMc6vyrorUZQf-V-M3YC6zrPgNhuV_fzm9xqTPph0,10883
|
35
|
-
dcicutils/misc_utils.py,sha256=
|
35
|
+
dcicutils/misc_utils.py,sha256=XisEQGMkHI7k5RiK-k4yeG8Zw00H8b-v9o2Y7mZyKb8,94548
|
36
36
|
dcicutils/obfuscation_utils.py,sha256=fo2jOmDRC6xWpYX49u80bVNisqRRoPskFNX3ymFAmjw,5963
|
37
37
|
dcicutils/opensearch_utils.py,sha256=V2exmFYW8Xl2_pGFixF4I2Cc549Opwe4PhFi5twC0M8,1017
|
38
38
|
dcicutils/project_utils.py,sha256=qPdCaFmWUVBJw4rw342iUytwdQC0P-XKpK4mhyIulMM,31250
|
@@ -43,13 +43,14 @@ dcicutils/redis_utils.py,sha256=VJ-7g8pOZqR1ZCtdcjKz3-6as2DMUcs1b1zG6wSprH4,6462
|
|
43
43
|
dcicutils/s3_utils.py,sha256=a9eU3Flh8Asc8xPWLGP16A6UQ_FVwhoFQNqm4ZYgSQ4,28852
|
44
44
|
dcicutils/scripts/publish_to_pypi.py,sha256=qmWyjrg5bNQNfpNKFTZdyMXpRmrECnRV9VmNQddUPQA,13576
|
45
45
|
dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19745
|
46
|
+
dcicutils/sheet_utils.py,sha256=bnnefjeTUL4ES7gtqThISXJKeli1AIFryu4h7Dt9dxw,47040
|
46
47
|
dcicutils/snapshot_utils.py,sha256=ymP7PXH6-yEiXAt75w0ldQFciGNqWBClNxC5gfX2FnY,22961
|
47
48
|
dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
|
48
49
|
dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
|
49
50
|
dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
|
50
51
|
dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
|
51
|
-
dcicutils-7.11.0.dist-info/LICENSE.txt,sha256=t0_-jIjqxNnymZoNJe-OltRIuuF8qfhN0ATlHyrUJPk,1102
|
52
|
-
dcicutils-7.11.0.dist-info/METADATA,sha256=
|
53
|
-
dcicutils-7.11.0.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
54
|
-
dcicutils-7.11.0.dist-info/entry_points.txt,sha256=Z3vezbXsTpTIY4N2F33c5e-WDVQxgz_Vsk1oV_JBN7A,146
|
55
|
-
dcicutils-7.11.0.dist-info/RECORD,,
|
52
|
+
dcicutils-7.11.0.1b9.dist-info/LICENSE.txt,sha256=t0_-jIjqxNnymZoNJe-OltRIuuF8qfhN0ATlHyrUJPk,1102
|
53
|
+
dcicutils-7.11.0.1b9.dist-info/METADATA,sha256=MER7N-gDAB5nz6YT51jT7aIu8_rHT2x65FBF5x3DN70,3084
|
54
|
+
dcicutils-7.11.0.1b9.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
55
|
+
dcicutils-7.11.0.1b9.dist-info/entry_points.txt,sha256=Z3vezbXsTpTIY4N2F33c5e-WDVQxgz_Vsk1oV_JBN7A,146
|
56
|
+
dcicutils-7.11.0.1b9.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|