briard 2.4.1 → 2.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/codeql-analysis.yml +72 -0
  3. data/.github/workflows/rubocop.yml +50 -0
  4. data/.rubocop.yml +144 -620
  5. data/.rubocop_todo.yml +76 -0
  6. data/CHANGELOG.md +22 -0
  7. data/Gemfile +2 -0
  8. data/Gemfile.lock +43 -6
  9. data/Rakefile +1 -1
  10. data/{bolognese.gemspec → briard.gemspec} +46 -38
  11. data/lib/briard/array.rb +2 -2
  12. data/lib/briard/author_utils.rb +79 -71
  13. data/lib/briard/cli.rb +12 -13
  14. data/lib/briard/crossref_utils.rb +73 -61
  15. data/lib/briard/datacite_utils.rb +132 -106
  16. data/lib/briard/doi_utils.rb +10 -10
  17. data/lib/briard/metadata.rb +96 -106
  18. data/lib/briard/metadata_utils.rb +87 -78
  19. data/lib/briard/readers/bibtex_reader.rb +65 -65
  20. data/lib/briard/readers/cff_reader.rb +88 -70
  21. data/lib/briard/readers/citeproc_reader.rb +90 -84
  22. data/lib/briard/readers/codemeta_reader.rb +68 -50
  23. data/lib/briard/readers/crosscite_reader.rb +2 -2
  24. data/lib/briard/readers/crossref_reader.rb +249 -210
  25. data/lib/briard/readers/datacite_json_reader.rb +3 -3
  26. data/lib/briard/readers/datacite_reader.rb +225 -189
  27. data/lib/briard/readers/npm_reader.rb +49 -42
  28. data/lib/briard/readers/ris_reader.rb +82 -80
  29. data/lib/briard/readers/schema_org_reader.rb +182 -159
  30. data/lib/briard/string.rb +1 -1
  31. data/lib/briard/utils.rb +4 -4
  32. data/lib/briard/version.rb +3 -1
  33. data/lib/briard/whitelist_scrubber.rb +11 -4
  34. data/lib/briard/writers/bibtex_writer.rb +14 -8
  35. data/lib/briard/writers/cff_writer.rb +33 -26
  36. data/lib/briard/writers/codemeta_writer.rb +19 -15
  37. data/lib/briard/writers/csv_writer.rb +6 -4
  38. data/lib/briard/writers/datacite_json_writer.rb +8 -2
  39. data/lib/briard/writers/jats_writer.rb +33 -28
  40. data/lib/briard/writers/rdf_xml_writer.rb +1 -1
  41. data/lib/briard/writers/ris_writer.rb +30 -18
  42. data/lib/briard/writers/turtle_writer.rb +1 -1
  43. data/lib/briard.rb +6 -6
  44. data/rubocop.sarif +0 -0
  45. data/spec/array_spec.rb +5 -5
  46. data/spec/author_utils_spec.rb +151 -132
  47. data/spec/datacite_utils_spec.rb +135 -83
  48. data/spec/doi_utils_spec.rb +168 -164
  49. data/spec/find_from_format_spec.rb +69 -69
  50. data/spec/fixtures/vcr_cassettes/Briard_Metadata/sanitize/onlies_keep_specific_tags.yml +65 -0
  51. data/spec/fixtures/vcr_cassettes/Briard_Metadata/sanitize/removes_a_tags.yml +65 -0
  52. data/spec/metadata_spec.rb +91 -90
  53. data/spec/readers/bibtex_reader_spec.rb +43 -38
  54. data/spec/readers/cff_reader_spec.rb +165 -153
  55. data/spec/readers/citeproc_reader_spec.rb +45 -40
  56. data/spec/readers/codemeta_reader_spec.rb +128 -115
  57. data/spec/readers/crosscite_reader_spec.rb +34 -24
  58. data/spec/readers/crossref_reader_spec.rb +1098 -939
  59. data/spec/readers/datacite_json_reader_spec.rb +53 -40
  60. data/spec/readers/datacite_reader_spec.rb +1541 -1337
  61. data/spec/readers/npm_reader_spec.rb +48 -43
  62. data/spec/readers/ris_reader_spec.rb +53 -47
  63. data/spec/readers/schema_org_reader_spec.rb +329 -267
  64. data/spec/spec_helper.rb +6 -5
  65. data/spec/utils_spec.rb +371 -347
  66. data/spec/writers/bibtex_writer_spec.rb +143 -143
  67. data/spec/writers/cff_writer_spec.rb +96 -90
  68. data/spec/writers/citation_writer_spec.rb +34 -33
  69. data/spec/writers/citeproc_writer_spec.rb +226 -224
  70. data/spec/writers/codemeta_writer_spec.rb +18 -16
  71. data/spec/writers/crosscite_writer_spec.rb +91 -73
  72. data/spec/writers/crossref_writer_spec.rb +99 -91
  73. data/spec/writers/csv_writer_spec.rb +70 -70
  74. data/spec/writers/datacite_json_writer_spec.rb +78 -68
  75. data/spec/writers/datacite_writer_spec.rb +417 -322
  76. data/spec/writers/jats_writer_spec.rb +177 -161
  77. data/spec/writers/rdf_xml_writer_spec.rb +68 -63
  78. data/spec/writers/ris_writer_spec.rb +162 -162
  79. data/spec/writers/turtle_writer_spec.rb +47 -47
  80. metadata +250 -160
  81. data/.github/workflows/release.yml +0 -47
@@ -3,39 +3,41 @@
3
3
  module Briard
4
4
  module Readers
5
5
  module NpmReader
6
- def get_npm(id: nil, **options)
7
- return { "string" => nil, "state" => "not_found" } unless id.present?
6
+ def get_npm(id: nil, **_options)
7
+ return { 'string' => nil, 'state' => 'not_found' } unless id.present?
8
+
8
9
  id = normalize_id(id)
9
- response = Maremma.get(id, accept: "json", raw: true)
10
- string = response.body.fetch("data", nil)
10
+ response = Maremma.get(id, accept: 'json', raw: true)
11
+ string = response.body.fetch('data', nil)
11
12
 
12
- { "string" => string }
13
+ { 'string' => string }
13
14
  end
14
15
 
15
16
  def read_npm(string: nil, **options)
16
17
  if string.present?
17
18
  errors = jsonlint(string)
18
- return { "errors" => errors } if errors.present?
19
+ return { 'errors' => errors } if errors.present?
19
20
  end
20
21
 
21
- read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url, :sandbox, :validate, :ra))
22
+ read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url,
23
+ :sandbox, :validate, :ra))
22
24
 
23
25
  meta = string.present? ? Maremma.from_json(string) : {}
24
26
 
25
27
  types = {
26
- "resourceTypeGeneral" => "Software",
27
- "reourceType" => "NPM Package",
28
- "schemaOrg" => "SoftwareSourceCode",
29
- "citeproc" => "article",
30
- "bibtex" => "misc",
31
- "ris" => "GEN"
28
+ 'resourceTypeGeneral' => 'Software',
29
+ 'reourceType' => 'NPM Package',
30
+ 'schemaOrg' => 'SoftwareSourceCode',
31
+ 'citeproc' => 'article',
32
+ 'bibtex' => 'misc',
33
+ 'ris' => 'GEN'
32
34
  }.compact
33
35
 
34
- creators = if meta.fetch("author", nil).present?
35
- get_authors(Array.wrap(meta.fetch("author", nil)))
36
- else
37
- [{ "nameType" => "Organizational", "name" => ":(unav)" }]
38
- end
36
+ creators = if meta.fetch('author', nil).present?
37
+ get_authors(Array.wrap(meta.fetch('author', nil)))
38
+ else
39
+ [{ 'nameType' => 'Organizational', 'name' => ':(unav)' }]
40
+ end
39
41
  # contributors = get_authors(from_citeproc(Array.wrap(meta.fetch("editor", nil))))
40
42
  # dates = if date = get_date_from_date_parts(meta.fetch("issued", nil))
41
43
  # if Date.edtf(date).present?
@@ -44,8 +46,8 @@ module Briard
44
46
  # end
45
47
  # end
46
48
  # publication_year = get_date_from_date_parts(meta.fetch("issued", nil)).to_s[0..3]
47
- rights_list = if meta.fetch("license", nil)
48
- [{ "rights" => meta.fetch("license") }.compact]
49
+ rights_list = if meta.fetch('license', nil)
50
+ [{ 'rights' => meta.fetch('license') }.compact]
49
51
  end
50
52
  # related_identifiers = if meta.fetch("container-title", nil).present? && meta.fetch("ISSN", nil).present?
51
53
  # [{ "type" => "Periodical",
@@ -83,31 +85,36 @@ module Briard
83
85
 
84
86
  # id = Array.wrap(identifiers).first.to_h.fetch("identifier", nil)
85
87
  # doi = Array.wrap(identifiers).find { |r| r["identifierType"] == "DOI" }.to_h.fetch("identifier", nil)
86
-
88
+
87
89
  # state = id.present? || read_options.present? ? "findable" : "not_found"
88
- subjects = Array.wrap(meta.fetch("keywords", nil)).map do |s|
89
- { "subject" => s }
90
+ subjects = Array.wrap(meta.fetch('keywords', nil)).map do |s|
91
+ { 'subject' => s }
90
92
  end
91
93
 
92
- {
93
- #"id" => id,
94
- #"identifiers" => identifiers,
95
- "types" => types,
96
- #"doi" => doi_from_url(doi),
97
- #"url" => normalize_id(meta.fetch("URL", nil)),
98
- "titles" => [{ "title" => meta.fetch("name", nil) }],
99
- "creators" => creators,
100
- #"contributors" => contributors,
101
- #"container" => container,
102
- #"publisher" => meta.fetch("publisher", nil),
103
- #"related_identifiers" => related_identifiers,
104
- #"dates" => dates,
105
- #"publication_year" => publication_year,
106
- "descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : [],
107
- "rights_list" => rights_list,
108
- "version_info" => meta.fetch("version", nil),
109
- "subjects" => subjects
110
- #"state" => state
94
+ {
95
+ # "id" => id,
96
+ # "identifiers" => identifiers,
97
+ 'types' => types,
98
+ # "doi" => doi_from_url(doi),
99
+ # "url" => normalize_id(meta.fetch("URL", nil)),
100
+ 'titles' => [{ 'title' => meta.fetch('name', nil) }],
101
+ 'creators' => creators,
102
+ # "contributors" => contributors,
103
+ # "container" => container,
104
+ # "publisher" => meta.fetch("publisher", nil),
105
+ # "related_identifiers" => related_identifiers,
106
+ # "dates" => dates,
107
+ # "publication_year" => publication_year,
108
+ 'descriptions' => if meta.fetch('description', nil).present?
109
+ [{ 'description' => sanitize(meta.fetch('description')),
110
+ 'descriptionType' => 'Abstract' }]
111
+ else
112
+ []
113
+ end,
114
+ 'rights_list' => rights_list,
115
+ 'version_info' => meta.fetch('version', nil),
116
+ 'subjects' => subjects
117
+ # "state" => state
111
118
  }.merge(read_options)
112
119
  end
113
120
  end
@@ -4,110 +4,112 @@ module Briard
4
4
  module Readers
5
5
  module RisReader
6
6
  RIS_TO_SO_TRANSLATIONS = {
7
- "BLOG" => "BlogPosting",
8
- "GEN" => "CreativeWork",
9
- "CTLG" => "DataCatalog",
10
- "DATA" => "Dataset",
11
- "FIGURE" => "ImageObject",
12
- "THES" => "Thesis",
13
- "MPCT" => "Movie",
14
- "JOUR" => "ScholarlyArticle",
15
- "COMP" => "SoftwareSourceCode",
16
- "VIDEO" => "VideoObject",
17
- "ELEC" => "WebPage"
18
- }
7
+ 'BLOG' => 'BlogPosting',
8
+ 'GEN' => 'CreativeWork',
9
+ 'CTLG' => 'DataCatalog',
10
+ 'DATA' => 'Dataset',
11
+ 'FIGURE' => 'ImageObject',
12
+ 'THES' => 'Thesis',
13
+ 'MPCT' => 'Movie',
14
+ 'JOUR' => 'ScholarlyArticle',
15
+ 'COMP' => 'SoftwareSourceCode',
16
+ 'VIDEO' => 'VideoObject',
17
+ 'ELEC' => 'WebPage'
18
+ }.freeze
19
19
 
20
- RIS_TO_CP_TRANSLATIONS = {
21
- "JOUR" => "article-journal"
22
- }
20
+ RIS_TO_CP_TRANSLATIONS = { 'JOUR' => 'article-journal' }.freeze
23
21
 
24
22
  RIS_TO_BIB_TRANSLATIONS = {
25
- "JOUR" => "article",
26
- "BOOK" => "book",
27
- "CHAP" => "inbook",
28
- "CPAPER" => "inproceedings",
29
- "GEN" => "misc",
30
- "THES" => "phdthesis",
31
- "CONF" => "proceedings",
32
- "RPRT" => "techreport",
33
- "UNPD" => "unpublished"
34
- }
23
+ 'JOUR' => 'article',
24
+ 'BOOK' => 'book',
25
+ 'CHAP' => 'inbook',
26
+ 'CPAPER' => 'inproceedings',
27
+ 'GEN' => 'misc',
28
+ 'THES' => 'phdthesis',
29
+ 'CONF' => 'proceedings',
30
+ 'RPRT' => 'techreport',
31
+ 'UNPD' => 'unpublished'
32
+ }.freeze
35
33
 
36
34
  def read_ris(string: nil, **options)
37
- read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url, :sandbox, :validate, :ra))
35
+ read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url,
36
+ :sandbox, :validate, :ra))
38
37
 
39
38
  meta = ris_meta(string: string)
40
39
 
41
- ris_type = meta.fetch("TY", nil) || "GEN"
42
- schema_org = RIS_TO_SO_TRANSLATIONS[ris_type] || "CreativeWork"
40
+ ris_type = meta.fetch('TY', nil) || 'GEN'
41
+ schema_org = RIS_TO_SO_TRANSLATIONS[ris_type] || 'CreativeWork'
43
42
  types = {
44
- "resourceTypeGeneral" => Metadata::RIS_TO_DC_TRANSLATIONS[ris_type],
45
- "schemaOrg" => schema_org,
46
- "citeproc" => RIS_TO_CP_TRANSLATIONS[schema_org] || "misc",
47
- "ris" => ris_type
43
+ 'resourceTypeGeneral' => Metadata::RIS_TO_DC_TRANSLATIONS[ris_type],
44
+ 'schemaOrg' => schema_org,
45
+ 'citeproc' => RIS_TO_CP_TRANSLATIONS[schema_org] || 'misc',
46
+ 'ris' => ris_type
48
47
  }.compact
49
48
 
50
- id = normalize_doi(options[:doi] || meta.fetch("DO", nil))
49
+ id = normalize_doi(options[:doi] || meta.fetch('DO', nil))
51
50
 
52
- author = Array.wrap(meta.fetch("AU", nil)).map { |a| { "creatorName" => a } }
53
- date_parts = meta.fetch("PY", nil).to_s.split("/")
54
- created_date_parts = meta.fetch("Y1", nil).to_s.split("/")
51
+ author = Array.wrap(meta.fetch('AU', nil)).map { |a| { 'creatorName' => a } }
52
+ date_parts = meta.fetch('PY', nil).to_s.split('/')
53
+ created_date_parts = meta.fetch('Y1', nil).to_s.split('/')
55
54
  dates = []
56
- dates << { "date" => get_date_from_parts(*date_parts), "dateType" => "Issued" } if meta.fetch("PY", nil).present?
57
- dates << { "date" => get_date_from_parts(*created_date_parts), "dateType" => "Created" } if meta.fetch("Y1", nil).present?
58
- publication_year = get_date_from_parts(*date_parts).to_s[0..3]
59
- related_identifiers = if meta.fetch("T2", nil).present? && meta.fetch("SN", nil).present?
60
- [{ "type" => "Periodical",
61
- "id" => meta.fetch("SN", nil),
62
- "relatedIdentifierType" => "ISSN",
63
- "relationType" => "IsPartOf",
64
- "title" => meta.fetch("T2", nil), }.compact]
65
- else
66
- []
55
+ if meta.fetch('PY', nil).present?
56
+ dates << { 'date' => get_date_from_parts(*date_parts), 'dateType' => 'Issued' }
67
57
  end
68
- container = if meta.fetch("T2", nil).present?
69
- { "type" => "Journal",
70
- "title" => meta.fetch("T2", nil),
71
- "identifier" => meta.fetch("SN", nil),
72
- "volume" => meta.fetch("VL", nil),
73
- "issue" => meta.fetch("IS", nil),
74
- "firstPage" => meta.fetch("SP", nil),
75
- "lastPage" => meta.fetch("EP", nil) }.compact
76
- else
77
- nil
58
+ if meta.fetch('Y1', nil).present?
59
+ dates << { 'date' => get_date_from_parts(*created_date_parts), 'dateType' => 'Created' }
78
60
  end
79
- state = meta.fetch("DO", nil).present? || read_options.present? ? "findable" : "not_found"
80
- subjects = Array.wrap(meta.fetch("KW", nil)).reduce([]) do |sum, subject|
61
+ publication_year = get_date_from_parts(*date_parts).to_s[0..3]
62
+ related_identifiers = if meta.fetch('T2', nil).present? && meta.fetch('SN', nil).present?
63
+ [{ 'type' => 'Periodical',
64
+ 'id' => meta.fetch('SN', nil),
65
+ 'relatedIdentifierType' => 'ISSN',
66
+ 'relationType' => 'IsPartOf',
67
+ 'title' => meta.fetch('T2', nil) }.compact]
68
+ else
69
+ []
70
+ end
71
+ container = if meta.fetch('T2', nil).present?
72
+ { 'type' => 'Journal',
73
+ 'title' => meta.fetch('T2', nil),
74
+ 'identifier' => meta.fetch('SN', nil),
75
+ 'volume' => meta.fetch('VL', nil),
76
+ 'issue' => meta.fetch('IS', nil),
77
+ 'firstPage' => meta.fetch('SP', nil),
78
+ 'lastPage' => meta.fetch('EP', nil) }.compact
79
+ end
80
+ state = meta.fetch('DO', nil).present? || read_options.present? ? 'findable' : 'not_found'
81
+ subjects = Array.wrap(meta.fetch('KW', nil)).reduce([]) do |sum, subject|
81
82
  sum += name_to_fos(subject)
82
83
 
83
84
  sum
84
85
  end
85
86
 
86
- { "id" => id,
87
- "types" => types,
88
- "doi" => doi_from_url(id),
89
- "url" => meta.fetch("UR", nil),
90
- "titles" => meta.fetch("T1", nil).present? ? [{ "title" => meta.fetch("T1", nil) }] : nil,
91
- "creators" => get_authors(author),
92
- "publisher" => meta.fetch("PB", "(:unav)"),
93
- "container" => container,
94
- "related_identifiers" => related_identifiers,
95
- "dates" => dates,
96
- "publication_year" => publication_year,
97
- "descriptions" => meta.fetch("AB", nil).present? ? [{ "description" => sanitize(meta.fetch("AB")), "descriptionType" => "Abstract" }] : nil,
98
- "subjects" => subjects,
99
- "language" => meta.fetch("LA", nil),
100
- "state" => state
101
- }.merge(read_options)
87
+ { 'id' => id,
88
+ 'types' => types,
89
+ 'doi' => doi_from_url(id),
90
+ 'url' => meta.fetch('UR', nil),
91
+ 'titles' => meta.fetch('T1', nil).present? ? [{ 'title' => meta.fetch('T1', nil) }] : nil,
92
+ 'creators' => get_authors(author),
93
+ 'publisher' => meta.fetch('PB', '(:unav)'),
94
+ 'container' => container,
95
+ 'related_identifiers' => related_identifiers,
96
+ 'dates' => dates,
97
+ 'publication_year' => publication_year,
98
+ 'descriptions' => if meta.fetch('AB', nil).present?
99
+ [{ 'description' => sanitize(meta.fetch('AB')),
100
+ 'descriptionType' => 'Abstract' }]
101
+ end,
102
+ 'subjects' => subjects,
103
+ 'language' => meta.fetch('LA', nil),
104
+ 'state' => state }.merge(read_options)
102
105
  end
103
106
 
104
107
  def ris_meta(string: nil)
105
- h = Hash.new { |h,k| h[k] = [] }
106
- string.split("\n").reduce(h) do |sum, line|
107
- k, v = line.split("-",2)
108
+ h = Hash.new { |h, k| h[k] = [] }
109
+ string.split("\n").each_with_object(h) do |line, _sum|
110
+ k, v = line.split('-', 2)
108
111
  h[k.strip] << v.to_s.strip
109
- sum
110
- end.map { |k,v| [k, v.unwrap] }.to_h.compact
112
+ end.transform_values(&:unwrap).compact
111
113
  end
112
114
  end
113
115
  end