dataverse-utils 0.22.1__tar.gz → 0.22.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/PKG-INFO +1 -1
  2. {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/pyproject.toml +1 -1
  3. {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/__init__.py +1 -1
  4. {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/collections.py +39 -11
  5. {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_collection_info.py +1 -1
  6. {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_readme_creator.py +1 -1
  7. {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/LICENCE.md +0 -0
  8. {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/README.md +0 -0
  9. {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/data/LDC_EULA_general.md +0 -0
  10. {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/dataverse_utils.py +0 -0
  11. {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/dvdata.py +0 -0
  12. {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/ldc.py +0 -0
  13. {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_del.py +0 -0
  14. {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_ldc_uploader.py +0 -0
  15. {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_list_files.py +0 -0
  16. {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_manifest_gen.py +0 -0
  17. {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_pg_facet_date.py +0 -0
  18. {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_record_copy.py +0 -0
  19. {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_release.py +0 -0
  20. {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_replace_licence.py +0 -0
  21. {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_study_migrator.py +0 -0
  22. {dataverse_utils-0.22.1 → dataverse_utils-0.22.2}/src/dataverse_utils/scripts/dv_upload_tsv.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataverse-utils
3
- Version: 0.22.1
3
+ Version: 0.22.2
4
4
  Summary: Utilities for the Dataverse data respository system
5
5
  License: MIT
6
6
  License-File: LICENCE.md
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dataverse-utils"
3
- version = "0.22.1"
3
+ version = "0.22.2"
4
4
  description = "Utilities for the Dataverse data respository system"
5
5
  authors = [
6
6
  {name = "Paul Lesack",email = "paul.lesack@ubc.ca"}
@@ -7,7 +7,7 @@ import pathlib
7
7
  import sys
8
8
  from dataverse_utils.dataverse_utils import *
9
9
 
10
- VERSION = (0, 22, 1)
10
+ VERSION = (0, 22, 2)
11
11
  __version__ = '.'.join([str(x) for x in VERSION])
12
12
 
13
13
  USERAGENT = (f'dataverse_utils/v{__version__} ({sys.platform.capitalize()}); '
@@ -16,6 +16,7 @@ import traceback
16
16
  import warnings
17
17
 
18
18
  import bs4
19
+ import charset_normalizer as cn
19
20
  import markdown_pdf
20
21
  import markdownify
21
22
  import pyreadstat
@@ -290,8 +291,10 @@ class StudyMetadata(dict):
290
291
  f'Offending JSON: {self.study_meta}') from e
291
292
  self.__files = None
292
293
  self.__all_files = None
293
- self.index = {f"{_['versionNumber']}.{_['versionMinorNumber']}": n
294
- for n, _ in enumerate(self.all_versions['data'])}
294
+ #self.index = {f"{_['versionNumber']}.{_['versionMinorNumber']}": n
295
+ # for n, _ in enumerate(self.all_versions['data'])}
296
+ #self.index = {_: n for _, n in enumerate(self.versions)}
297
+ self.index = dict(enumerate(self.versions))
295
298
 
296
299
  def __obtain_metadata(self):
297
300
  '''
@@ -347,7 +350,10 @@ class StudyMetadata(dict):
347
350
  for field in v['fields']:
348
351
  tmp.update(self.extract_field_metadata(field))
349
352
  tmp.update(self.__extract_licence_info(chunk))
350
- tmp['versionStatement'] = f"{chunk['versionNumber']}.{chunk['versionMinorNumber']}"
353
+ if chunk.get('versionNumber'):
354
+ tmp['versionStatement'] = f"{chunk['versionNumber']}.{chunk['versionMinorNumber']}"
355
+ else:
356
+ tmp['versionStatement'] = f"{chunk.get('versionState', '')}"
351
357
  return tmp
352
358
 
353
359
  def extract_field_metadata(self, field):
@@ -440,8 +446,15 @@ class StudyMetadata(dict):
440
446
  '''
441
447
  Return a *list* of formatted version strings
442
448
  '''
443
- return [f"{_['versionNumber']}.{_['versionMinorNumber']}"
444
- for _ in self.all_versions['data']]
449
+ out = []
450
+ for _ in self.all_versions['data']:
451
+ if _.get('versionNumber'):
452
+ out.append(f"{_['versionNumber']}.{_['versionMinorNumber']}")
453
+ else:
454
+ out.append(_['versionState'])
455
+ #return [f"{_['versionNumber']}.{_['versionMinorNumber']}"
456
+ # for _ in self.all_versions['data']]
457
+ return out
445
458
 
446
459
  @property
447
460
  def files(self)->list:
@@ -904,7 +917,8 @@ class ReadmeCreator:
904
917
  'Country':'Country(ies)',
905
918
  'State':'State(s)',
906
919
  'City':'City(ies)',
907
- 'Geographic Unit':'Geographic unit(s)'}
920
+ 'Geographic Unit':'Geographic unit(s)',
921
+ 'State(s)ment' : 'Statement'}
908
922
  for k, v in fixthese.items():
909
923
  wordsp = wordsp.replace(k, v)
910
924
  return wordsp.strip()
@@ -1250,6 +1264,14 @@ class FileAnalysis(dict):
1250
1264
  self.update(outmeta)
1251
1265
  return
1252
1266
 
1267
+ def get_encoding(self, fpath):
1268
+ '''
1269
+ Return the encoding of a file so that pandas
1270
+ won't crash. Hopefully.
1271
+
1272
+ fpath : str
1273
+ file path
1274
+ '''
1253
1275
 
1254
1276
  def generic_metadata(self, ext)->None:
1255
1277
  '''
@@ -1265,14 +1287,20 @@ class FileAnalysis(dict):
1265
1287
  # data = pd.read_csv(self.__whichfile, sep='\t')
1266
1288
  #else:
1267
1289
  # data = pd.read_csv(self.__whichfile)
1268
-
1290
+ encme = {'.tsv': {'sep': '\t'},
1291
+ '.csv': {}}
1292
+ if ext.lower() in encme:
1293
+ with open(self.__whichfile, 'rb') as f:
1294
+ encoding = {'encoding':'utf-8'}
1295
+ encoding.update({'encoding':cn.detect(f.read()).get('encoding', 'utf-8')})
1296
+ encme[ext.lower()].update(encoding)
1269
1297
  lookuptable ={'.tsv': {'func': pd.read_csv,
1270
- 'kwargs' : {'sep':'\t'}},
1271
- '.csv': {'func' : pd.read_csv},
1272
- '.rda': {'func' : pyreadr.read_r},
1298
+ 'kwargs' : encme['.tsv']},
1299
+ '.csv': {'func' : pd.read_csv, 'kwargs' : encme['.csv']},
1300
+ '.rda': {'func' : pyreadr.read_r},
1273
1301
  '.rdata':{'func' : pyreadr.read_r}}
1274
1302
  data = lookuptable[ext]['func'](self.__whichfile,
1275
- **lookuptable[ext].get('kwargs', {}))
1303
+ **lookuptable[ext].get('kwargs', {}))
1276
1304
  if ext in ['.rda', '.rdata']:
1277
1305
  data = data[None] #why pyreadr why
1278
1306
  outmeta = {}
@@ -207,7 +207,7 @@ def main():
207
207
  else:
208
208
  try:
209
209
  all_studies = [dvc.StudyMetadata(url=args.url, pid=args.pid, key=args.key)]
210
- except dataverse_utils.collections.MetadataError as e:
210
+ except (KeyError, dataverse_utils.collections.MetadataError) as e:
211
211
  print(e, file=sys.stderr)
212
212
  sys.exit()
213
213
  #if 'all' in [x.lower() for x in args.fields] and args.collection:
@@ -36,7 +36,7 @@ def parse() -> argparse.ArgumentParser():
36
36
  'Defaults to "borealisdata.ca"'))
37
37
  parser.add_argument('-p', '--pid',
38
38
  help=('Persistent ID of study (ie, doi or hdl). '
39
- 'format: doi: doi:12.2345/PRE/ZYX9876'),
39
+ 'eg: doi:12.2345/PRE/ZYX9876'),
40
40
  type=str,
41
41
  required=True)
42
42
  parser.add_argument('-k', '--key', required=True,