briard 2.4.1 → 2.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/codeql-analysis.yml +72 -0
- data/.github/workflows/rubocop.yml +50 -0
- data/.rubocop.yml +144 -620
- data/.rubocop_todo.yml +76 -0
- data/CHANGELOG.md +22 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +43 -6
- data/Rakefile +1 -1
- data/{bolognese.gemspec → briard.gemspec} +46 -38
- data/lib/briard/array.rb +2 -2
- data/lib/briard/author_utils.rb +79 -71
- data/lib/briard/cli.rb +12 -13
- data/lib/briard/crossref_utils.rb +73 -61
- data/lib/briard/datacite_utils.rb +132 -106
- data/lib/briard/doi_utils.rb +10 -10
- data/lib/briard/metadata.rb +96 -106
- data/lib/briard/metadata_utils.rb +87 -78
- data/lib/briard/readers/bibtex_reader.rb +65 -65
- data/lib/briard/readers/cff_reader.rb +88 -70
- data/lib/briard/readers/citeproc_reader.rb +90 -84
- data/lib/briard/readers/codemeta_reader.rb +68 -50
- data/lib/briard/readers/crosscite_reader.rb +2 -2
- data/lib/briard/readers/crossref_reader.rb +249 -210
- data/lib/briard/readers/datacite_json_reader.rb +3 -3
- data/lib/briard/readers/datacite_reader.rb +225 -189
- data/lib/briard/readers/npm_reader.rb +49 -42
- data/lib/briard/readers/ris_reader.rb +82 -80
- data/lib/briard/readers/schema_org_reader.rb +182 -159
- data/lib/briard/string.rb +1 -1
- data/lib/briard/utils.rb +4 -4
- data/lib/briard/version.rb +3 -1
- data/lib/briard/whitelist_scrubber.rb +11 -4
- data/lib/briard/writers/bibtex_writer.rb +14 -8
- data/lib/briard/writers/cff_writer.rb +33 -26
- data/lib/briard/writers/codemeta_writer.rb +19 -15
- data/lib/briard/writers/csv_writer.rb +6 -4
- data/lib/briard/writers/datacite_json_writer.rb +8 -2
- data/lib/briard/writers/jats_writer.rb +33 -28
- data/lib/briard/writers/rdf_xml_writer.rb +1 -1
- data/lib/briard/writers/ris_writer.rb +30 -18
- data/lib/briard/writers/turtle_writer.rb +1 -1
- data/lib/briard.rb +6 -6
- data/rubocop.sarif +0 -0
- data/spec/array_spec.rb +5 -5
- data/spec/author_utils_spec.rb +151 -132
- data/spec/datacite_utils_spec.rb +135 -83
- data/spec/doi_utils_spec.rb +168 -164
- data/spec/find_from_format_spec.rb +69 -69
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/sanitize/onlies_keep_specific_tags.yml +65 -0
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/sanitize/removes_a_tags.yml +65 -0
- data/spec/metadata_spec.rb +91 -90
- data/spec/readers/bibtex_reader_spec.rb +43 -38
- data/spec/readers/cff_reader_spec.rb +165 -153
- data/spec/readers/citeproc_reader_spec.rb +45 -40
- data/spec/readers/codemeta_reader_spec.rb +128 -115
- data/spec/readers/crosscite_reader_spec.rb +34 -24
- data/spec/readers/crossref_reader_spec.rb +1098 -939
- data/spec/readers/datacite_json_reader_spec.rb +53 -40
- data/spec/readers/datacite_reader_spec.rb +1541 -1337
- data/spec/readers/npm_reader_spec.rb +48 -43
- data/spec/readers/ris_reader_spec.rb +53 -47
- data/spec/readers/schema_org_reader_spec.rb +329 -267
- data/spec/spec_helper.rb +6 -5
- data/spec/utils_spec.rb +371 -347
- data/spec/writers/bibtex_writer_spec.rb +143 -143
- data/spec/writers/cff_writer_spec.rb +96 -90
- data/spec/writers/citation_writer_spec.rb +34 -33
- data/spec/writers/citeproc_writer_spec.rb +226 -224
- data/spec/writers/codemeta_writer_spec.rb +18 -16
- data/spec/writers/crosscite_writer_spec.rb +91 -73
- data/spec/writers/crossref_writer_spec.rb +99 -91
- data/spec/writers/csv_writer_spec.rb +70 -70
- data/spec/writers/datacite_json_writer_spec.rb +78 -68
- data/spec/writers/datacite_writer_spec.rb +417 -322
- data/spec/writers/jats_writer_spec.rb +177 -161
- data/spec/writers/rdf_xml_writer_spec.rb +68 -63
- data/spec/writers/ris_writer_spec.rb +162 -162
- data/spec/writers/turtle_writer_spec.rb +47 -47
- metadata +250 -160
- data/.github/workflows/release.yml +0 -47
@@ -3,39 +3,41 @@
|
|
3
3
|
module Briard
|
4
4
|
module Readers
|
5
5
|
module NpmReader
|
6
|
-
def get_npm(id: nil, **
|
7
|
-
return {
|
6
|
+
def get_npm(id: nil, **_options)
|
7
|
+
return { 'string' => nil, 'state' => 'not_found' } unless id.present?
|
8
|
+
|
8
9
|
id = normalize_id(id)
|
9
|
-
response = Maremma.get(id, accept:
|
10
|
-
string = response.body.fetch(
|
10
|
+
response = Maremma.get(id, accept: 'json', raw: true)
|
11
|
+
string = response.body.fetch('data', nil)
|
11
12
|
|
12
|
-
{
|
13
|
+
{ 'string' => string }
|
13
14
|
end
|
14
15
|
|
15
16
|
def read_npm(string: nil, **options)
|
16
17
|
if string.present?
|
17
18
|
errors = jsonlint(string)
|
18
|
-
return {
|
19
|
+
return { 'errors' => errors } if errors.present?
|
19
20
|
end
|
20
21
|
|
21
|
-
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url,
|
22
|
+
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url,
|
23
|
+
:sandbox, :validate, :ra))
|
22
24
|
|
23
25
|
meta = string.present? ? Maremma.from_json(string) : {}
|
24
26
|
|
25
27
|
types = {
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
28
|
+
'resourceTypeGeneral' => 'Software',
|
29
|
+
'reourceType' => 'NPM Package',
|
30
|
+
'schemaOrg' => 'SoftwareSourceCode',
|
31
|
+
'citeproc' => 'article',
|
32
|
+
'bibtex' => 'misc',
|
33
|
+
'ris' => 'GEN'
|
32
34
|
}.compact
|
33
35
|
|
34
|
-
creators = if meta.fetch(
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
36
|
+
creators = if meta.fetch('author', nil).present?
|
37
|
+
get_authors(Array.wrap(meta.fetch('author', nil)))
|
38
|
+
else
|
39
|
+
[{ 'nameType' => 'Organizational', 'name' => ':(unav)' }]
|
40
|
+
end
|
39
41
|
# contributors = get_authors(from_citeproc(Array.wrap(meta.fetch("editor", nil))))
|
40
42
|
# dates = if date = get_date_from_date_parts(meta.fetch("issued", nil))
|
41
43
|
# if Date.edtf(date).present?
|
@@ -44,8 +46,8 @@ module Briard
|
|
44
46
|
# end
|
45
47
|
# end
|
46
48
|
# publication_year = get_date_from_date_parts(meta.fetch("issued", nil)).to_s[0..3]
|
47
|
-
rights_list = if meta.fetch(
|
48
|
-
[{
|
49
|
+
rights_list = if meta.fetch('license', nil)
|
50
|
+
[{ 'rights' => meta.fetch('license') }.compact]
|
49
51
|
end
|
50
52
|
# related_identifiers = if meta.fetch("container-title", nil).present? && meta.fetch("ISSN", nil).present?
|
51
53
|
# [{ "type" => "Periodical",
|
@@ -83,31 +85,36 @@ module Briard
|
|
83
85
|
|
84
86
|
# id = Array.wrap(identifiers).first.to_h.fetch("identifier", nil)
|
85
87
|
# doi = Array.wrap(identifiers).find { |r| r["identifierType"] == "DOI" }.to_h.fetch("identifier", nil)
|
86
|
-
|
88
|
+
|
87
89
|
# state = id.present? || read_options.present? ? "findable" : "not_found"
|
88
|
-
subjects = Array.wrap(meta.fetch(
|
89
|
-
{
|
90
|
+
subjects = Array.wrap(meta.fetch('keywords', nil)).map do |s|
|
91
|
+
{ 'subject' => s }
|
90
92
|
end
|
91
93
|
|
92
|
-
{
|
93
|
-
#"id" => id,
|
94
|
-
#"identifiers" => identifiers,
|
95
|
-
|
96
|
-
#"doi" => doi_from_url(doi),
|
97
|
-
#"url" => normalize_id(meta.fetch("URL", nil)),
|
98
|
-
|
99
|
-
|
100
|
-
#"contributors" => contributors,
|
101
|
-
#"container" => container,
|
102
|
-
#"publisher" => meta.fetch("publisher", nil),
|
103
|
-
#"related_identifiers" => related_identifiers,
|
104
|
-
#"dates" => dates,
|
105
|
-
#"publication_year" => publication_year,
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
94
|
+
{
|
95
|
+
# "id" => id,
|
96
|
+
# "identifiers" => identifiers,
|
97
|
+
'types' => types,
|
98
|
+
# "doi" => doi_from_url(doi),
|
99
|
+
# "url" => normalize_id(meta.fetch("URL", nil)),
|
100
|
+
'titles' => [{ 'title' => meta.fetch('name', nil) }],
|
101
|
+
'creators' => creators,
|
102
|
+
# "contributors" => contributors,
|
103
|
+
# "container" => container,
|
104
|
+
# "publisher" => meta.fetch("publisher", nil),
|
105
|
+
# "related_identifiers" => related_identifiers,
|
106
|
+
# "dates" => dates,
|
107
|
+
# "publication_year" => publication_year,
|
108
|
+
'descriptions' => if meta.fetch('description', nil).present?
|
109
|
+
[{ 'description' => sanitize(meta.fetch('description')),
|
110
|
+
'descriptionType' => 'Abstract' }]
|
111
|
+
else
|
112
|
+
[]
|
113
|
+
end,
|
114
|
+
'rights_list' => rights_list,
|
115
|
+
'version_info' => meta.fetch('version', nil),
|
116
|
+
'subjects' => subjects
|
117
|
+
# "state" => state
|
111
118
|
}.merge(read_options)
|
112
119
|
end
|
113
120
|
end
|
@@ -4,110 +4,112 @@ module Briard
|
|
4
4
|
module Readers
|
5
5
|
module RisReader
|
6
6
|
RIS_TO_SO_TRANSLATIONS = {
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
}
|
7
|
+
'BLOG' => 'BlogPosting',
|
8
|
+
'GEN' => 'CreativeWork',
|
9
|
+
'CTLG' => 'DataCatalog',
|
10
|
+
'DATA' => 'Dataset',
|
11
|
+
'FIGURE' => 'ImageObject',
|
12
|
+
'THES' => 'Thesis',
|
13
|
+
'MPCT' => 'Movie',
|
14
|
+
'JOUR' => 'ScholarlyArticle',
|
15
|
+
'COMP' => 'SoftwareSourceCode',
|
16
|
+
'VIDEO' => 'VideoObject',
|
17
|
+
'ELEC' => 'WebPage'
|
18
|
+
}.freeze
|
19
19
|
|
20
|
-
RIS_TO_CP_TRANSLATIONS = {
|
21
|
-
"JOUR" => "article-journal"
|
22
|
-
}
|
20
|
+
RIS_TO_CP_TRANSLATIONS = { 'JOUR' => 'article-journal' }.freeze
|
23
21
|
|
24
22
|
RIS_TO_BIB_TRANSLATIONS = {
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
}
|
23
|
+
'JOUR' => 'article',
|
24
|
+
'BOOK' => 'book',
|
25
|
+
'CHAP' => 'inbook',
|
26
|
+
'CPAPER' => 'inproceedings',
|
27
|
+
'GEN' => 'misc',
|
28
|
+
'THES' => 'phdthesis',
|
29
|
+
'CONF' => 'proceedings',
|
30
|
+
'RPRT' => 'techreport',
|
31
|
+
'UNPD' => 'unpublished'
|
32
|
+
}.freeze
|
35
33
|
|
36
34
|
def read_ris(string: nil, **options)
|
37
|
-
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url,
|
35
|
+
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url,
|
36
|
+
:sandbox, :validate, :ra))
|
38
37
|
|
39
38
|
meta = ris_meta(string: string)
|
40
39
|
|
41
|
-
ris_type = meta.fetch(
|
42
|
-
schema_org = RIS_TO_SO_TRANSLATIONS[ris_type] ||
|
40
|
+
ris_type = meta.fetch('TY', nil) || 'GEN'
|
41
|
+
schema_org = RIS_TO_SO_TRANSLATIONS[ris_type] || 'CreativeWork'
|
43
42
|
types = {
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
43
|
+
'resourceTypeGeneral' => Metadata::RIS_TO_DC_TRANSLATIONS[ris_type],
|
44
|
+
'schemaOrg' => schema_org,
|
45
|
+
'citeproc' => RIS_TO_CP_TRANSLATIONS[schema_org] || 'misc',
|
46
|
+
'ris' => ris_type
|
48
47
|
}.compact
|
49
48
|
|
50
|
-
id = normalize_doi(options[:doi] || meta.fetch(
|
49
|
+
id = normalize_doi(options[:doi] || meta.fetch('DO', nil))
|
51
50
|
|
52
|
-
author = Array.wrap(meta.fetch(
|
53
|
-
date_parts = meta.fetch(
|
54
|
-
created_date_parts = meta.fetch(
|
51
|
+
author = Array.wrap(meta.fetch('AU', nil)).map { |a| { 'creatorName' => a } }
|
52
|
+
date_parts = meta.fetch('PY', nil).to_s.split('/')
|
53
|
+
created_date_parts = meta.fetch('Y1', nil).to_s.split('/')
|
55
54
|
dates = []
|
56
|
-
|
57
|
-
|
58
|
-
publication_year = get_date_from_parts(*date_parts).to_s[0..3]
|
59
|
-
related_identifiers = if meta.fetch("T2", nil).present? && meta.fetch("SN", nil).present?
|
60
|
-
[{ "type" => "Periodical",
|
61
|
-
"id" => meta.fetch("SN", nil),
|
62
|
-
"relatedIdentifierType" => "ISSN",
|
63
|
-
"relationType" => "IsPartOf",
|
64
|
-
"title" => meta.fetch("T2", nil), }.compact]
|
65
|
-
else
|
66
|
-
[]
|
55
|
+
if meta.fetch('PY', nil).present?
|
56
|
+
dates << { 'date' => get_date_from_parts(*date_parts), 'dateType' => 'Issued' }
|
67
57
|
end
|
68
|
-
|
69
|
-
{
|
70
|
-
"title" => meta.fetch("T2", nil),
|
71
|
-
"identifier" => meta.fetch("SN", nil),
|
72
|
-
"volume" => meta.fetch("VL", nil),
|
73
|
-
"issue" => meta.fetch("IS", nil),
|
74
|
-
"firstPage" => meta.fetch("SP", nil),
|
75
|
-
"lastPage" => meta.fetch("EP", nil) }.compact
|
76
|
-
else
|
77
|
-
nil
|
58
|
+
if meta.fetch('Y1', nil).present?
|
59
|
+
dates << { 'date' => get_date_from_parts(*created_date_parts), 'dateType' => 'Created' }
|
78
60
|
end
|
79
|
-
|
80
|
-
|
61
|
+
publication_year = get_date_from_parts(*date_parts).to_s[0..3]
|
62
|
+
related_identifiers = if meta.fetch('T2', nil).present? && meta.fetch('SN', nil).present?
|
63
|
+
[{ 'type' => 'Periodical',
|
64
|
+
'id' => meta.fetch('SN', nil),
|
65
|
+
'relatedIdentifierType' => 'ISSN',
|
66
|
+
'relationType' => 'IsPartOf',
|
67
|
+
'title' => meta.fetch('T2', nil) }.compact]
|
68
|
+
else
|
69
|
+
[]
|
70
|
+
end
|
71
|
+
container = if meta.fetch('T2', nil).present?
|
72
|
+
{ 'type' => 'Journal',
|
73
|
+
'title' => meta.fetch('T2', nil),
|
74
|
+
'identifier' => meta.fetch('SN', nil),
|
75
|
+
'volume' => meta.fetch('VL', nil),
|
76
|
+
'issue' => meta.fetch('IS', nil),
|
77
|
+
'firstPage' => meta.fetch('SP', nil),
|
78
|
+
'lastPage' => meta.fetch('EP', nil) }.compact
|
79
|
+
end
|
80
|
+
state = meta.fetch('DO', nil).present? || read_options.present? ? 'findable' : 'not_found'
|
81
|
+
subjects = Array.wrap(meta.fetch('KW', nil)).reduce([]) do |sum, subject|
|
81
82
|
sum += name_to_fos(subject)
|
82
83
|
|
83
84
|
sum
|
84
85
|
end
|
85
86
|
|
86
|
-
{
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
87
|
+
{ 'id' => id,
|
88
|
+
'types' => types,
|
89
|
+
'doi' => doi_from_url(id),
|
90
|
+
'url' => meta.fetch('UR', nil),
|
91
|
+
'titles' => meta.fetch('T1', nil).present? ? [{ 'title' => meta.fetch('T1', nil) }] : nil,
|
92
|
+
'creators' => get_authors(author),
|
93
|
+
'publisher' => meta.fetch('PB', '(:unav)'),
|
94
|
+
'container' => container,
|
95
|
+
'related_identifiers' => related_identifiers,
|
96
|
+
'dates' => dates,
|
97
|
+
'publication_year' => publication_year,
|
98
|
+
'descriptions' => if meta.fetch('AB', nil).present?
|
99
|
+
[{ 'description' => sanitize(meta.fetch('AB')),
|
100
|
+
'descriptionType' => 'Abstract' }]
|
101
|
+
end,
|
102
|
+
'subjects' => subjects,
|
103
|
+
'language' => meta.fetch('LA', nil),
|
104
|
+
'state' => state }.merge(read_options)
|
102
105
|
end
|
103
106
|
|
104
107
|
def ris_meta(string: nil)
|
105
|
-
h = Hash.new { |h,k| h[k] = [] }
|
106
|
-
string.split("\n").
|
107
|
-
k, v = line.split(
|
108
|
+
h = Hash.new { |h, k| h[k] = [] }
|
109
|
+
string.split("\n").each_with_object(h) do |line, _sum|
|
110
|
+
k, v = line.split('-', 2)
|
108
111
|
h[k.strip] << v.to_s.strip
|
109
|
-
|
110
|
-
end.map { |k,v| [k, v.unwrap] }.to_h.compact
|
112
|
+
end.transform_values(&:unwrap).compact
|
111
113
|
end
|
112
114
|
end
|
113
115
|
end
|