dataverse-utils 0.22.1__tar.gz → 0.22.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/PKG-INFO +1 -1
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/pyproject.toml +1 -1
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/__init__.py +1 -1
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/collections.py +41 -15
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_collection_info.py +1 -1
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_readme_creator.py +1 -1
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/LICENCE.md +0 -0
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/README.md +0 -0
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/data/LDC_EULA_general.md +0 -0
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/dataverse_utils.py +0 -0
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/dvdata.py +0 -0
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/ldc.py +0 -0
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_del.py +0 -0
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_ldc_uploader.py +0 -0
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_list_files.py +0 -0
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_manifest_gen.py +0 -0
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_pg_facet_date.py +0 -0
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_record_copy.py +0 -0
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_release.py +0 -0
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_replace_licence.py +0 -0
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_study_migrator.py +0 -0
- {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_upload_tsv.py +0 -0
|
@@ -7,7 +7,7 @@ import pathlib
|
|
|
7
7
|
import sys
|
|
8
8
|
from dataverse_utils.dataverse_utils import *
|
|
9
9
|
|
|
10
|
-
VERSION = (0, 22,
|
|
10
|
+
VERSION = (0, 22, '3a1')
|
|
11
11
|
__version__ = '.'.join([str(x) for x in VERSION])
|
|
12
12
|
|
|
13
13
|
USERAGENT = (f'dataverse_utils/v{__version__} ({sys.platform.capitalize()}); '
|
|
@@ -16,6 +16,7 @@ import traceback
|
|
|
16
16
|
import warnings
|
|
17
17
|
|
|
18
18
|
import bs4
|
|
19
|
+
import charset_normalizer as cn
|
|
19
20
|
import markdown_pdf
|
|
20
21
|
import markdownify
|
|
21
22
|
import pyreadstat
|
|
@@ -290,8 +291,10 @@ class StudyMetadata(dict):
|
|
|
290
291
|
f'Offending JSON: {self.study_meta}') from e
|
|
291
292
|
self.__files = None
|
|
292
293
|
self.__all_files = None
|
|
293
|
-
self.index = {f"{_['versionNumber']}.{_['versionMinorNumber']}": n
|
|
294
|
-
|
|
294
|
+
#self.index = {f"{_['versionNumber']}.{_['versionMinorNumber']}": n
|
|
295
|
+
# for n, _ in enumerate(self.all_versions['data'])}
|
|
296
|
+
#self.index = {_: n for _, n in enumerate(self.versions)}
|
|
297
|
+
self.index = dict(enumerate(self.versions))
|
|
295
298
|
|
|
296
299
|
def __obtain_metadata(self):
|
|
297
300
|
'''
|
|
@@ -347,7 +350,10 @@ class StudyMetadata(dict):
|
|
|
347
350
|
for field in v['fields']:
|
|
348
351
|
tmp.update(self.extract_field_metadata(field))
|
|
349
352
|
tmp.update(self.__extract_licence_info(chunk))
|
|
350
|
-
|
|
353
|
+
if chunk.get('versionNumber'):
|
|
354
|
+
tmp['versionStatement'] = f"{chunk['versionNumber']}.{chunk['versionMinorNumber']}"
|
|
355
|
+
else:
|
|
356
|
+
tmp['versionStatement'] = f"{chunk.get('versionState', '')}"
|
|
351
357
|
return tmp
|
|
352
358
|
|
|
353
359
|
def extract_field_metadata(self, field):
|
|
@@ -440,8 +446,15 @@ class StudyMetadata(dict):
|
|
|
440
446
|
'''
|
|
441
447
|
Return a *list* of formatted version strings
|
|
442
448
|
'''
|
|
443
|
-
|
|
444
|
-
|
|
449
|
+
out = []
|
|
450
|
+
for _ in self.all_versions['data']:
|
|
451
|
+
if _.get('versionNumber'):
|
|
452
|
+
out.append(f"{_['versionNumber']}.{_['versionMinorNumber']}")
|
|
453
|
+
else:
|
|
454
|
+
out.append(_['versionState'])
|
|
455
|
+
#return [f"{_['versionNumber']}.{_['versionMinorNumber']}"
|
|
456
|
+
# for _ in self.all_versions['data']]
|
|
457
|
+
return out
|
|
445
458
|
|
|
446
459
|
@property
|
|
447
460
|
def files(self)->list:
|
|
@@ -468,10 +481,8 @@ class StudyMetadata(dict):
|
|
|
468
481
|
filelist = self.extract_files(_.get('files', []))
|
|
469
482
|
for oldfile in filelist:
|
|
470
483
|
oldfile.update({k:v for k,v in _.items() if k in add_fields})
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
oldfile.update(version_statement)
|
|
474
|
-
#all_files.extend(self.extract_files_2(_.get('files', [])))
|
|
484
|
+
vs = _.get('versionNumber', _.get('versionState', ''))
|
|
485
|
+
oldfile.update({'versionStatement' : vs})
|
|
475
486
|
all_files.extend(filelist)
|
|
476
487
|
self.__all_files = all_files
|
|
477
488
|
return self.__all_files
|
|
@@ -904,7 +915,8 @@ class ReadmeCreator:
|
|
|
904
915
|
'Country':'Country(ies)',
|
|
905
916
|
'State':'State(s)',
|
|
906
917
|
'City':'City(ies)',
|
|
907
|
-
'Geographic Unit':'Geographic unit(s)'
|
|
918
|
+
'Geographic Unit':'Geographic unit(s)',
|
|
919
|
+
'State(s)ment' : 'Statement'}
|
|
908
920
|
for k, v in fixthese.items():
|
|
909
921
|
wordsp = wordsp.replace(k, v)
|
|
910
922
|
return wordsp.strip()
|
|
@@ -1250,6 +1262,14 @@ class FileAnalysis(dict):
|
|
|
1250
1262
|
self.update(outmeta)
|
|
1251
1263
|
return
|
|
1252
1264
|
|
|
1265
|
+
def get_encoding(self, fpath):
|
|
1266
|
+
'''
|
|
1267
|
+
Return the encoding of a file so that pandas
|
|
1268
|
+
won't crash. Hopefully.
|
|
1269
|
+
|
|
1270
|
+
fpath : str
|
|
1271
|
+
file path
|
|
1272
|
+
'''
|
|
1253
1273
|
|
|
1254
1274
|
def generic_metadata(self, ext)->None:
|
|
1255
1275
|
'''
|
|
@@ -1265,14 +1285,20 @@ class FileAnalysis(dict):
|
|
|
1265
1285
|
# data = pd.read_csv(self.__whichfile, sep='\t')
|
|
1266
1286
|
#else:
|
|
1267
1287
|
# data = pd.read_csv(self.__whichfile)
|
|
1268
|
-
|
|
1288
|
+
encme = {'.tsv': {'sep': '\t'},
|
|
1289
|
+
'.csv': {}}
|
|
1290
|
+
if ext.lower() in encme:
|
|
1291
|
+
with open(self.__whichfile, 'rb') as f:
|
|
1292
|
+
encoding = {'encoding':'utf-8'}
|
|
1293
|
+
encoding.update({'encoding':cn.detect(f.read()).get('encoding', 'utf-8')})
|
|
1294
|
+
encme[ext.lower()].update(encoding)
|
|
1269
1295
|
lookuptable ={'.tsv': {'func': pd.read_csv,
|
|
1270
|
-
'kwargs' :
|
|
1271
|
-
|
|
1272
|
-
|
|
1296
|
+
'kwargs' : encme['.tsv']},
|
|
1297
|
+
'.csv': {'func' : pd.read_csv, 'kwargs' : encme['.csv']},
|
|
1298
|
+
'.rda': {'func' : pyreadr.read_r},
|
|
1273
1299
|
'.rdata':{'func' : pyreadr.read_r}}
|
|
1274
1300
|
data = lookuptable[ext]['func'](self.__whichfile,
|
|
1275
|
-
|
|
1301
|
+
**lookuptable[ext].get('kwargs', {}))
|
|
1276
1302
|
if ext in ['.rda', '.rdata']:
|
|
1277
1303
|
data = data[None] #why pyreadr why
|
|
1278
1304
|
outmeta = {}
|
{dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_collection_info.py
RENAMED
|
@@ -207,7 +207,7 @@ def main():
|
|
|
207
207
|
else:
|
|
208
208
|
try:
|
|
209
209
|
all_studies = [dvc.StudyMetadata(url=args.url, pid=args.pid, key=args.key)]
|
|
210
|
-
except dataverse_utils.collections.MetadataError as e:
|
|
210
|
+
except (KeyError, dataverse_utils.collections.MetadataError) as e:
|
|
211
211
|
print(e, file=sys.stderr)
|
|
212
212
|
sys.exit()
|
|
213
213
|
#if 'all' in [x.lower() for x in args.fields] and args.collection:
|
{dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_readme_creator.py
RENAMED
|
@@ -36,7 +36,7 @@ def parse() -> argparse.ArgumentParser():
|
|
|
36
36
|
'Defaults to "borealisdata.ca"'))
|
|
37
37
|
parser.add_argument('-p', '--pid',
|
|
38
38
|
help=('Persistent ID of study (ie, doi or hdl). '
|
|
39
|
-
'
|
|
39
|
+
'eg: doi:12.2345/PRE/ZYX9876'),
|
|
40
40
|
type=str,
|
|
41
41
|
required=True)
|
|
42
42
|
parser.add_argument('-k', '--key', required=True,
|
|
File without changes
|
|
File without changes
|
{dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/data/LDC_EULA_general.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_ldc_uploader.py
RENAMED
|
File without changes
|
{dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_list_files.py
RENAMED
|
File without changes
|
{dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_manifest_gen.py
RENAMED
|
File without changes
|
{dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_pg_facet_date.py
RENAMED
|
File without changes
|
{dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_record_copy.py
RENAMED
|
File without changes
|
|
File without changes
|
{dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_replace_licence.py
RENAMED
|
File without changes
|
{dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_study_migrator.py
RENAMED
|
File without changes
|
{dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_upload_tsv.py
RENAMED
|
File without changes
|