uc3-dmp-citation 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +3 -0
- data/lib/uc3-dmp-citation/version.rb +5 -0
- data/lib/uc3-dmp-citation.rb +106 -0
- metadata +104 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 6f168bafa37250b52f74453f1e9f044fd5ba07eab51632b650207514d7c9decd
|
4
|
+
data.tar.gz: b1f37ab91666d6356f29b6d5da6242947310df4fe5f7e337d75310b6ea78ec7c
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: d6fbdb819e93454d608a3f281e0be0839e2b7785b8072035b3a36192c5ae2be8fe79a6433174f37bdbaec8f99b836bce0a993d8189e966bd78f40a1a22f174dc
|
7
|
+
data.tar.gz: c56061f1ce9e204c97d05ddd03d385b0dcc4044e4f3bfc26e3f4ed436e26550059e8d44c0ba5b64e14ad941f4ffae74fb61d6bdaab6aaaa35cddbfc3a974b8c2
|
data/README.md
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
# rubocop:disable Naming/FileName
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'bibtex'
|
5
|
+
require 'citeproc'
|
6
|
+
require 'csl/styles'
|
7
|
+
|
8
|
+
require 'uc3-dmp-external-api'
|
9
|
+
|
10
|
+
module Uc3DmpCitation
|
11
|
+
class CiterError < StandardError; end
|
12
|
+
|
13
|
+
class Citer
|
14
|
+
DEFAULT_CITATION_STYLE = 'chicago-author-date'
|
15
|
+
DEFAULT_DOI_URL = 'http://doi.org'
|
16
|
+
DEFAULT_WORK_TYPE = 'Dataset'
|
17
|
+
|
18
|
+
DOI_REGEX = %r{[0-9]{2}\.[0-9]{4,}/[a-zA-Z0-9/_.-]+}
|
19
|
+
|
20
|
+
MSG_BIBTEX_FAILURE = 'Unable to fetch Bibtex for the specified DOI.'
|
21
|
+
MSG_UNABLE_TO_UPDATE = 'Unable to update the citations on the DMP ID.'
|
22
|
+
|
23
|
+
def fetch_citation(doi:, logger: nil)
|
24
|
+
uri = _doi_to_uri(doi: doi)
|
25
|
+
return nil if uri.nil? || uri.blank?
|
26
|
+
|
27
|
+
logger.debug(message: "Fetching BibTeX from: #{uri}") if logger.respond_to?(:debug)
|
28
|
+
resp = Uc3DmpExternalApi::Client.call(url: uri, method: :get, additional_headers: headers, logger: logger)
|
29
|
+
return nil if resp.nil? || resp.to_s.strip.empty?
|
30
|
+
|
31
|
+
bibtex = BibTeX.parse(_cleanse_bibtex(text: resp))
|
32
|
+
work_type = identifier['work_type'].nil? ? determine_work_type(bibtex: bibtex) : identifier['work_type']
|
33
|
+
_bibtex_to_citation(uri: uri, work_type: work_type, bibtex: bibtex)
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
# Will convert 'doi:10.1234/abcdefg' to 'http://doi.org/10.1234/abcdefg'
|
39
|
+
def _doi_to_uri(doi:)
|
40
|
+
val = doi.match(DOI_REGEX).to_s
|
41
|
+
return nil if val.nil? || val.strip == ''
|
42
|
+
|
43
|
+
doi.start_with?('http') ? doi : "#{DEFAULT_DOI_URL}/#{doi.gsub('doi:', '')}"
|
44
|
+
end
|
45
|
+
|
46
|
+
# If no :work_type was specified we can try to derive it from the BibTeX metadata
|
47
|
+
def _determine_work_type(bibtex:)
|
48
|
+
return '' if bibtex.nil? || bibtex.data.nil? || bibtex.data.first.nil?
|
49
|
+
|
50
|
+
return 'article' unless bibtex.data.first.journal.nil?
|
51
|
+
|
52
|
+
''
|
53
|
+
end
|
54
|
+
|
55
|
+
def _cleanse_bibtex(text:)
|
56
|
+
return nil if text.nil? || text.to_s.strip == ''
|
57
|
+
|
58
|
+
# Make sure we're working with UTF8
|
59
|
+
utf8 = text.force_encoding('UTF-8')
|
60
|
+
|
61
|
+
# Remove any encoded HTML (e.g. "Regular text $\\lt$strong$\\gt$Bold text$\\lt$/strong$\\gt$")
|
62
|
+
utf8 = utf8.gsub(%r{\$?\\\$?(less|lt|Lt)\$/?[a-zA-Z]+\$?\\\$?(greater|gt|Gt)\$}, '')
|
63
|
+
# Replace any special dash, semicolon and quote characters with a minus sign or single/double quote
|
64
|
+
utf8 = utf8.gsub(%r{\$?\\(T|t)ext[a-zA-Z]+dash\$?}, '-').gsub(%r{\{(T|t)ext[a-zA-Z]+dash\}}, '-')
|
65
|
+
.gsub(%r{\$?\\(M|m)athsemicolon\$?}, ':').gsub(%r{\{(M|m)semicolon\}}, ':')
|
66
|
+
.gsub(%r{\$?\\(T|t)extquotesingle\$?}, "'").gsub(%r{\{(T|t)extquotesingle\}}, "'")
|
67
|
+
.gsub(%r{\$?\\(T|t)extquotedouble\$?}, '"').gsub(%r{\{(T|t)extquotedouble\}}, '"')
|
68
|
+
# Remove any remaining `\v` entries which attempt to construct an accented character
|
69
|
+
utf8.gsub(%r{\\v}, '')
|
70
|
+
end
|
71
|
+
|
72
|
+
# Convert the BibTeX item to a citation
|
73
|
+
def _bibtex_to_citation(uri:, work_type: DEFAULT_WORK_TYPE, bibtex:, style: DEFAULT_CITATION_STYLE)
|
74
|
+
return nil unless uri.is_a?(String) && uri.strip != ''
|
75
|
+
return nil if bibtex.nil? || bibtex.data.nil? || bibtex.data.first.nil?
|
76
|
+
|
77
|
+
cp = CiteProc::Processor.new(style: style, format: 'html')
|
78
|
+
cp.import(bibtex.to_citeproc)
|
79
|
+
citation = cp.render(:bibliography, id: bibtex.data.first.id)
|
80
|
+
return nil unless citation.is_a?(Array) && citation.any?
|
81
|
+
|
82
|
+
# The CiteProc renderer has trouble with some things so fix them here
|
83
|
+
# - For some reason words in all caps in the title get wrapped in curl brackets
|
84
|
+
citation = citation.first.gsub('{', '').gsub('}', '')
|
85
|
+
|
86
|
+
unless work_type.nil? || work_type.strip == ''
|
87
|
+
# This supports the :apa and :chicago-author-date styles
|
88
|
+
citation = citation.gsub(/\.”\s+/, "\.” [#{work_type.gsub('_', ' ').capitalize}]. ")
|
89
|
+
.gsub(/<\/i>\.\s+/, "<\/i>\. [#{work_type.gsub('_', ' ').capitalize}]. ")
|
90
|
+
end
|
91
|
+
|
92
|
+
# Convert the URL into a link. Ensure that the trailing period is not a part of
|
93
|
+
# the link!
|
94
|
+
citation.gsub(URI.regexp) do |url|
|
95
|
+
if url.start_with?('http')
|
96
|
+
'<a href="%{url}" target="_blank">%{url}</a>.' % {
|
97
|
+
url: url.end_with?('.') ? uri : "#{uri}."
|
98
|
+
}
|
99
|
+
else
|
100
|
+
url
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
# rubocop:enable Naming/FileName
|
metadata
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: uc3-dmp-citation
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Brian Riley
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2023-08-17 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: byebug
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 11.1.3
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 11.1.3
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rspec
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 3.9.0
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 3.9.0
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rubocop
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 1.50.2
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 1.50.2
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rubocop-rspec
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - '='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 2.20.0
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 2.20.0
|
69
|
+
description: Helper for working with DOI citations
|
70
|
+
email:
|
71
|
+
- brian.riley@ucop.edu
|
72
|
+
executables: []
|
73
|
+
extensions: []
|
74
|
+
extra_rdoc_files: []
|
75
|
+
files:
|
76
|
+
- README.md
|
77
|
+
- lib/uc3-dmp-citation.rb
|
78
|
+
- lib/uc3-dmp-citation/version.rb
|
79
|
+
homepage: https://github.com/CDLUC3/dmp-hub-cfn/blob/main/src/sam/gems/uc3-dmp-citation
|
80
|
+
licenses:
|
81
|
+
- MIT
|
82
|
+
metadata:
|
83
|
+
rubygems_mfa_required: 'false'
|
84
|
+
post_install_message:
|
85
|
+
rdoc_options: []
|
86
|
+
require_paths:
|
87
|
+
- lib
|
88
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
89
|
+
requirements:
|
90
|
+
- - ">="
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: '2.7'
|
93
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
94
|
+
requirements:
|
95
|
+
- - ">="
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: '0'
|
98
|
+
requirements: []
|
99
|
+
rubygems_version: 3.1.6
|
100
|
+
signing_key:
|
101
|
+
specification_version: 4
|
102
|
+
summary: DMPTool gem that provides support for fetching BibTex for a DOI and converting
|
103
|
+
it into a citation
|
104
|
+
test_files: []
|