dataverse-utils 0.22.1__tar.gz → 0.22.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/PKG-INFO +1 -1
  2. {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/pyproject.toml +1 -1
  3. {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/__init__.py +1 -1
  4. {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/collections.py +41 -15
  5. {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_collection_info.py +1 -1
  6. {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_readme_creator.py +1 -1
  7. {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/LICENCE.md +0 -0
  8. {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/README.md +0 -0
  9. {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/data/LDC_EULA_general.md +0 -0
  10. {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/dataverse_utils.py +0 -0
  11. {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/dvdata.py +0 -0
  12. {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/ldc.py +0 -0
  13. {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_del.py +0 -0
  14. {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_ldc_uploader.py +0 -0
  15. {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_list_files.py +0 -0
  16. {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_manifest_gen.py +0 -0
  17. {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_pg_facet_date.py +0 -0
  18. {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_record_copy.py +0 -0
  19. {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_release.py +0 -0
  20. {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_replace_licence.py +0 -0
  21. {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_study_migrator.py +0 -0
  22. {dataverse_utils-0.22.1 → dataverse_utils-0.22.3}/src/dataverse_utils/scripts/dv_upload_tsv.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataverse-utils
3
- Version: 0.22.1
3
+ Version: 0.22.3
4
4
  Summary: Utilities for the Dataverse data respository system
5
5
  License: MIT
6
6
  License-File: LICENCE.md
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dataverse-utils"
3
- version = "0.22.1"
3
+ version = "0.22.3"
4
4
  description = "Utilities for the Dataverse data respository system"
5
5
  authors = [
6
6
  {name = "Paul Lesack",email = "paul.lesack@ubc.ca"}
@@ -7,7 +7,7 @@ import pathlib
7
7
  import sys
8
8
  from dataverse_utils.dataverse_utils import *
9
9
 
10
- VERSION = (0, 22, 1)
10
+ VERSION = (0, 22, '3a1')
11
11
  __version__ = '.'.join([str(x) for x in VERSION])
12
12
 
13
13
  USERAGENT = (f'dataverse_utils/v{__version__} ({sys.platform.capitalize()}); '
@@ -16,6 +16,7 @@ import traceback
16
16
  import warnings
17
17
 
18
18
  import bs4
19
+ import charset_normalizer as cn
19
20
  import markdown_pdf
20
21
  import markdownify
21
22
  import pyreadstat
@@ -290,8 +291,10 @@ class StudyMetadata(dict):
290
291
  f'Offending JSON: {self.study_meta}') from e
291
292
  self.__files = None
292
293
  self.__all_files = None
293
- self.index = {f"{_['versionNumber']}.{_['versionMinorNumber']}": n
294
- for n, _ in enumerate(self.all_versions['data'])}
294
+ #self.index = {f"{_['versionNumber']}.{_['versionMinorNumber']}": n
295
+ # for n, _ in enumerate(self.all_versions['data'])}
296
+ #self.index = {_: n for _, n in enumerate(self.versions)}
297
+ self.index = dict(enumerate(self.versions))
295
298
 
296
299
  def __obtain_metadata(self):
297
300
  '''
@@ -347,7 +350,10 @@ class StudyMetadata(dict):
347
350
  for field in v['fields']:
348
351
  tmp.update(self.extract_field_metadata(field))
349
352
  tmp.update(self.__extract_licence_info(chunk))
350
- tmp['versionStatement'] = f"{chunk['versionNumber']}.{chunk['versionMinorNumber']}"
353
+ if chunk.get('versionNumber'):
354
+ tmp['versionStatement'] = f"{chunk['versionNumber']}.{chunk['versionMinorNumber']}"
355
+ else:
356
+ tmp['versionStatement'] = f"{chunk.get('versionState', '')}"
351
357
  return tmp
352
358
 
353
359
  def extract_field_metadata(self, field):
@@ -440,8 +446,15 @@ class StudyMetadata(dict):
440
446
  '''
441
447
  Return a *list* of formatted version strings
442
448
  '''
443
- return [f"{_['versionNumber']}.{_['versionMinorNumber']}"
444
- for _ in self.all_versions['data']]
449
+ out = []
450
+ for _ in self.all_versions['data']:
451
+ if _.get('versionNumber'):
452
+ out.append(f"{_['versionNumber']}.{_['versionMinorNumber']}")
453
+ else:
454
+ out.append(_['versionState'])
455
+ #return [f"{_['versionNumber']}.{_['versionMinorNumber']}"
456
+ # for _ in self.all_versions['data']]
457
+ return out
445
458
 
446
459
  @property
447
460
  def files(self)->list:
@@ -468,10 +481,8 @@ class StudyMetadata(dict):
468
481
  filelist = self.extract_files(_.get('files', []))
469
482
  for oldfile in filelist:
470
483
  oldfile.update({k:v for k,v in _.items() if k in add_fields})
471
- version_statement = {'versionStatement':
472
- f'{_["versionNumber"]}.{_["versionMinorNumber"]}'}
473
- oldfile.update(version_statement)
474
- #all_files.extend(self.extract_files_2(_.get('files', [])))
484
+ vs = _.get('versionNumber', _.get('versionState', ''))
485
+ oldfile.update({'versionStatement' : vs})
475
486
  all_files.extend(filelist)
476
487
  self.__all_files = all_files
477
488
  return self.__all_files
@@ -904,7 +915,8 @@ class ReadmeCreator:
904
915
  'Country':'Country(ies)',
905
916
  'State':'State(s)',
906
917
  'City':'City(ies)',
907
- 'Geographic Unit':'Geographic unit(s)'}
918
+ 'Geographic Unit':'Geographic unit(s)',
919
+ 'State(s)ment' : 'Statement'}
908
920
  for k, v in fixthese.items():
909
921
  wordsp = wordsp.replace(k, v)
910
922
  return wordsp.strip()
@@ -1250,6 +1262,14 @@ class FileAnalysis(dict):
1250
1262
  self.update(outmeta)
1251
1263
  return
1252
1264
 
1265
+ def get_encoding(self, fpath):
1266
+ '''
1267
+ Return the encoding of a file so that pandas
1268
+ won't crash. Hopefully.
1269
+
1270
+ fpath : str
1271
+ file path
1272
+ '''
1253
1273
 
1254
1274
  def generic_metadata(self, ext)->None:
1255
1275
  '''
@@ -1265,14 +1285,20 @@ class FileAnalysis(dict):
1265
1285
  # data = pd.read_csv(self.__whichfile, sep='\t')
1266
1286
  #else:
1267
1287
  # data = pd.read_csv(self.__whichfile)
1268
-
1288
+ encme = {'.tsv': {'sep': '\t'},
1289
+ '.csv': {}}
1290
+ if ext.lower() in encme:
1291
+ with open(self.__whichfile, 'rb') as f:
1292
+ encoding = {'encoding':'utf-8'}
1293
+ encoding.update({'encoding':cn.detect(f.read()).get('encoding', 'utf-8')})
1294
+ encme[ext.lower()].update(encoding)
1269
1295
  lookuptable ={'.tsv': {'func': pd.read_csv,
1270
- 'kwargs' : {'sep':'\t'}},
1271
- '.csv': {'func' : pd.read_csv},
1272
- '.rda': {'func' : pyreadr.read_r},
1296
+ 'kwargs' : encme['.tsv']},
1297
+ '.csv': {'func' : pd.read_csv, 'kwargs' : encme['.csv']},
1298
+ '.rda': {'func' : pyreadr.read_r},
1273
1299
  '.rdata':{'func' : pyreadr.read_r}}
1274
1300
  data = lookuptable[ext]['func'](self.__whichfile,
1275
- **lookuptable[ext].get('kwargs', {}))
1301
+ **lookuptable[ext].get('kwargs', {}))
1276
1302
  if ext in ['.rda', '.rdata']:
1277
1303
  data = data[None] #why pyreadr why
1278
1304
  outmeta = {}
@@ -207,7 +207,7 @@ def main():
207
207
  else:
208
208
  try:
209
209
  all_studies = [dvc.StudyMetadata(url=args.url, pid=args.pid, key=args.key)]
210
- except dataverse_utils.collections.MetadataError as e:
210
+ except (KeyError, dataverse_utils.collections.MetadataError) as e:
211
211
  print(e, file=sys.stderr)
212
212
  sys.exit()
213
213
  #if 'all' in [x.lower() for x in args.fields] and args.collection:
@@ -36,7 +36,7 @@ def parse() -> argparse.ArgumentParser():
36
36
  'Defaults to "borealisdata.ca"'))
37
37
  parser.add_argument('-p', '--pid',
38
38
  help=('Persistent ID of study (ie, doi or hdl). '
39
- 'format: doi: doi:12.2345/PRE/ZYX9876'),
39
+ 'eg: doi:12.2345/PRE/ZYX9876'),
40
40
  type=str,
41
41
  required=True)
42
42
  parser.add_argument('-k', '--key', required=True,