dataverse-utils 0.22.1__tar.gz → 0.22.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/PKG-INFO +1 -1
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/pyproject.toml +1 -1
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/__init__.py +1 -1
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/collections.py +39 -11
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_collection_info.py +1 -1
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_readme_creator.py +1 -1
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/LICENCE.md +0 -0
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/README.md +0 -0
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/data/LDC_EULA_general.md +0 -0
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/dataverse_utils.py +0 -0
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/dvdata.py +0 -0
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/ldc.py +0 -0
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_del.py +0 -0
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_ldc_uploader.py +0 -0
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_list_files.py +0 -0
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_manifest_gen.py +0 -0
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_pg_facet_date.py +0 -0
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_record_copy.py +0 -0
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_release.py +0 -0
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_replace_licence.py +0 -0
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_study_migrator.py +0 -0
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_upload_tsv.py +0 -0
|
@@ -16,6 +16,7 @@ import traceback
|
|
|
16
16
|
import warnings
|
|
17
17
|
|
|
18
18
|
import bs4
|
|
19
|
+
import charset_normalizer as cn
|
|
19
20
|
import markdown_pdf
|
|
20
21
|
import markdownify
|
|
21
22
|
import pyreadstat
|
|
@@ -290,8 +291,10 @@ class StudyMetadata(dict):
|
|
|
290
291
|
f'Offending JSON: {self.study_meta}') from e
|
|
291
292
|
self.__files = None
|
|
292
293
|
self.__all_files = None
|
|
293
|
-
self.index = {f"{_['versionNumber']}.{_['versionMinorNumber']}": n
|
|
294
|
-
|
|
294
|
+
#self.index = {f"{_['versionNumber']}.{_['versionMinorNumber']}": n
|
|
295
|
+
# for n, _ in enumerate(self.all_versions['data'])}
|
|
296
|
+
#self.index = {_: n for _, n in enumerate(self.versions)}
|
|
297
|
+
self.index = dict(enumerate(self.versions))
|
|
295
298
|
|
|
296
299
|
def __obtain_metadata(self):
|
|
297
300
|
'''
|
|
@@ -347,7 +350,10 @@ class StudyMetadata(dict):
|
|
|
347
350
|
for field in v['fields']:
|
|
348
351
|
tmp.update(self.extract_field_metadata(field))
|
|
349
352
|
tmp.update(self.__extract_licence_info(chunk))
|
|
350
|
-
|
|
353
|
+
if chunk.get('versionNumber'):
|
|
354
|
+
tmp['versionStatement'] = f"{chunk['versionNumber']}.{chunk['versionMinorNumber']}"
|
|
355
|
+
else:
|
|
356
|
+
tmp['versionStatement'] = f"{chunk.get('versionState', '')}"
|
|
351
357
|
return tmp
|
|
352
358
|
|
|
353
359
|
def extract_field_metadata(self, field):
|
|
@@ -440,8 +446,15 @@ class StudyMetadata(dict):
|
|
|
440
446
|
'''
|
|
441
447
|
Return a *list* of formatted version strings
|
|
442
448
|
'''
|
|
443
|
-
|
|
444
|
-
|
|
449
|
+
out = []
|
|
450
|
+
for _ in self.all_versions['data']:
|
|
451
|
+
if _.get('versionNumber'):
|
|
452
|
+
out.append(f"{_['versionNumber']}.{_['versionMinorNumber']}")
|
|
453
|
+
else:
|
|
454
|
+
out.append(_['versionState'])
|
|
455
|
+
#return [f"{_['versionNumber']}.{_['versionMinorNumber']}"
|
|
456
|
+
# for _ in self.all_versions['data']]
|
|
457
|
+
return out
|
|
445
458
|
|
|
446
459
|
@property
|
|
447
460
|
def files(self)->list:
|
|
@@ -904,7 +917,8 @@ class ReadmeCreator:
|
|
|
904
917
|
'Country':'Country(ies)',
|
|
905
918
|
'State':'State(s)',
|
|
906
919
|
'City':'City(ies)',
|
|
907
|
-
'Geographic Unit':'Geographic unit(s)'
|
|
920
|
+
'Geographic Unit':'Geographic unit(s)',
|
|
921
|
+
'State(s)ment' : 'Statement'}
|
|
908
922
|
for k, v in fixthese.items():
|
|
909
923
|
wordsp = wordsp.replace(k, v)
|
|
910
924
|
return wordsp.strip()
|
|
@@ -1250,6 +1264,14 @@ class FileAnalysis(dict):
|
|
|
1250
1264
|
self.update(outmeta)
|
|
1251
1265
|
return
|
|
1252
1266
|
|
|
1267
|
+
def get_encoding(self, fpath):
|
|
1268
|
+
'''
|
|
1269
|
+
Return the encoding of a file so that pandas
|
|
1270
|
+
won't crash. Hopefully.
|
|
1271
|
+
|
|
1272
|
+
fpath : str
|
|
1273
|
+
file path
|
|
1274
|
+
'''
|
|
1253
1275
|
|
|
1254
1276
|
def generic_metadata(self, ext)->None:
|
|
1255
1277
|
'''
|
|
@@ -1265,14 +1287,20 @@ class FileAnalysis(dict):
|
|
|
1265
1287
|
# data = pd.read_csv(self.__whichfile, sep='\t')
|
|
1266
1288
|
#else:
|
|
1267
1289
|
# data = pd.read_csv(self.__whichfile)
|
|
1268
|
-
|
|
1290
|
+
encme = {'.tsv': {'sep': '\t'},
|
|
1291
|
+
'.csv': {}}
|
|
1292
|
+
if ext.lower() in encme:
|
|
1293
|
+
with open(self.__whichfile, 'rb') as f:
|
|
1294
|
+
encoding = {'encoding':'utf-8'}
|
|
1295
|
+
encoding.update({'encoding':cn.detect(f.read()).get('encoding', 'utf-8')})
|
|
1296
|
+
encme[ext.lower()].update(encoding)
|
|
1269
1297
|
lookuptable ={'.tsv': {'func': pd.read_csv,
|
|
1270
|
-
'kwargs' :
|
|
1271
|
-
|
|
1272
|
-
|
|
1298
|
+
'kwargs' : encme['.tsv']},
|
|
1299
|
+
'.csv': {'func' : pd.read_csv, 'kwargs' : encme['.csv']},
|
|
1300
|
+
'.rda': {'func' : pyreadr.read_r},
|
|
1273
1301
|
'.rdata':{'func' : pyreadr.read_r}}
|
|
1274
1302
|
data = lookuptable[ext]['func'](self.__whichfile,
|
|
1275
|
-
|
|
1303
|
+
**lookuptable[ext].get('kwargs', {}))
|
|
1276
1304
|
if ext in ['.rda', '.rdata']:
|
|
1277
1305
|
data = data[None] #why pyreadr why
|
|
1278
1306
|
outmeta = {}
|
{dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_collection_info.py
RENAMED
|
@@ -207,7 +207,7 @@ def main():
|
|
|
207
207
|
else:
|
|
208
208
|
try:
|
|
209
209
|
all_studies = [dvc.StudyMetadata(url=args.url, pid=args.pid, key=args.key)]
|
|
210
|
-
except dataverse_utils.collections.MetadataError as e:
|
|
210
|
+
except (KeyError, dataverse_utils.collections.MetadataError) as e:
|
|
211
211
|
print(e, file=sys.stderr)
|
|
212
212
|
sys.exit()
|
|
213
213
|
#if 'all' in [x.lower() for x in args.fields] and args.collection:
|
{dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_readme_creator.py
RENAMED
|
@@ -36,7 +36,7 @@ def parse() -> argparse.ArgumentParser():
|
|
|
36
36
|
'Defaults to "borealisdata.ca"'))
|
|
37
37
|
parser.add_argument('-p', '--pid',
|
|
38
38
|
help=('Persistent ID of study (ie, doi or hdl). '
|
|
39
|
-
'
|
|
39
|
+
'eg: doi:12.2345/PRE/ZYX9876'),
|
|
40
40
|
type=str,
|
|
41
41
|
required=True)
|
|
42
42
|
parser.add_argument('-k', '--key', required=True,
|
|
File without changes
|
|
File without changes
|
{dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/data/LDC_EULA_general.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_ldc_uploader.py
RENAMED
|
File without changes
|
{dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_list_files.py
RENAMED
|
File without changes
|
{dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_manifest_gen.py
RENAMED
|
File without changes
|
{dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_pg_facet_date.py
RENAMED
|
File without changes
|
{dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_record_copy.py
RENAMED
|
File without changes
|
|
File without changes
|
{dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_replace_licence.py
RENAMED
|
File without changes
|
{dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_study_migrator.py
RENAMED
|
File without changes
|
{dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_upload_tsv.py
RENAMED
|
File without changes
|