uc3-dmp-citation 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 6f168bafa37250b52f74453f1e9f044fd5ba07eab51632b650207514d7c9decd
4
+ data.tar.gz: b1f37ab91666d6356f29b6d5da6242947310df4fe5f7e337d75310b6ea78ec7c
5
+ SHA512:
6
+ metadata.gz: d6fbdb819e93454d608a3f281e0be0839e2b7785b8072035b3a36192c5ae2be8fe79a6433174f37bdbaec8f99b836bce0a993d8189e966bd78f40a1a22f174dc
7
+ data.tar.gz: c56061f1ce9e204c97d05ddd03d385b0dcc4044e4f3bfc26e3f4ed436e26550059e8d44c0ba5b64e14ad941f4ffae74fb61d6bdaab6aaaa35cddbfc3a974b8c2
data/README.md ADDED
@@ -0,0 +1,3 @@
1
+ # uc3-dmp-logger
2
+
3
+ Logger service to write CloudWatch logs in a specific format
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Uc3DmpCitation
4
+ VERSION = '0.0.1'
5
+ end
@@ -0,0 +1,106 @@
1
+ # rubocop:disable Naming/FileName
2
+ # frozen_string_literal: true
3
+
4
+ require 'bibtex'
5
+ require 'citeproc'
6
+ require 'csl/styles'
7
+
8
+ require 'uc3-dmp-external-api'
9
+
10
+ module Uc3DmpCitation
11
+ class CiterError < StandardError; end
12
+
13
+ class Citer
14
+ DEFAULT_CITATION_STYLE = 'chicago-author-date'
15
+ DEFAULT_DOI_URL = 'http://doi.org'
16
+ DEFAULT_WORK_TYPE = 'Dataset'
17
+
18
+ DOI_REGEX = %r{[0-9]{2}\.[0-9]{4,}/[a-zA-Z0-9/_.-]+}
19
+
20
+ MSG_BIBTEX_FAILURE = 'Unable to fetch Bibtex for the specified DOI.'
21
+ MSG_UNABLE_TO_UPDATE = 'Unable to update the citations on the DMP ID.'
22
+
23
+ def fetch_citation(doi:, logger: nil)
24
+ uri = _doi_to_uri(doi: doi)
25
+ return nil if uri.nil? || uri.blank?
26
+
27
+ logger.debug(message: "Fetching BibTeX from: #{uri}") if logger.respond_to?(:debug)
28
+ resp = Uc3DmpExternalApi::Client.call(url: uri, method: :get, additional_headers: headers, logger: logger)
29
+ return nil if resp.nil? || resp.to_s.strip.empty?
30
+
31
+ bibtex = BibTeX.parse(_cleanse_bibtex(text: resp))
32
+ work_type = identifier['work_type'].nil? ? determine_work_type(bibtex: bibtex) : identifier['work_type']
33
+ _bibtex_to_citation(uri: uri, work_type: work_type, bibtex: bibtex)
34
+ end
35
+
36
+ private
37
+
38
+ # Will convert 'doi:10.1234/abcdefg' to 'http://doi.org/10.1234/abcdefg'
39
+ def _doi_to_uri(doi:)
40
+ val = doi.match(DOI_REGEX).to_s
41
+ return nil if val.nil? || val.strip == ''
42
+
43
+ doi.start_with?('http') ? doi : "#{DEFAULT_DOI_URL}/#{doi.gsub('doi:', '')}"
44
+ end
45
+
46
+ # If no :work_type was specified we can try to derive it from the BibTeX metadata
47
+ def _determine_work_type(bibtex:)
48
+ return '' if bibtex.nil? || bibtex.data.nil? || bibtex.data.first.nil?
49
+
50
+ return 'article' unless bibtex.data.first.journal.nil?
51
+
52
+ ''
53
+ end
54
+
55
+ def _cleanse_bibtex(text:)
56
+ return nil if text.nil? || text.to_s.strip == ''
57
+
58
+ # Make sure we're working with UTF8
59
+ utf8 = text.force_encoding('UTF-8')
60
+
61
+ # Remove any encoded HTML (e.g. "Regular text $\\lt$strong$\\gt$Bold text$\\lt$/strong$\\gt$")
62
+ utf8 = utf8.gsub(%r{\$?\\\$?(less|lt|Lt)\$/?[a-zA-Z]+\$?\\\$?(greater|gt|Gt)\$}, '')
63
+ # Replace any special dash, semicolon and quote characters with a minus sign or single/double quote
64
+ utf8 = utf8.gsub(%r{\$?\\(T|t)ext[a-zA-Z]+dash\$?}, '-').gsub(%r{\{(T|t)ext[a-zA-Z]+dash\}}, '-')
65
+ .gsub(%r{\$?\\(M|m)athsemicolon\$?}, ':').gsub(%r{\{(M|m)semicolon\}}, ':')
66
+ .gsub(%r{\$?\\(T|t)extquotesingle\$?}, "'").gsub(%r{\{(T|t)extquotesingle\}}, "'")
67
+ .gsub(%r{\$?\\(T|t)extquotedouble\$?}, '"').gsub(%r{\{(T|t)extquotedouble\}}, '"')
68
+ # Remove any remaining `\v` entries which attempt to construct an accented character
69
+ utf8.gsub(%r{\\v}, '')
70
+ end
71
+
72
+ # Convert the BibTeX item to a citation
73
+ def _bibtex_to_citation(uri:, work_type: DEFAULT_WORK_TYPE, bibtex:, style: DEFAULT_CITATION_STYLE)
74
+ return nil unless uri.is_a?(String) && uri.strip != ''
75
+ return nil if bibtex.nil? || bibtex.data.nil? || bibtex.data.first.nil?
76
+
77
+ cp = CiteProc::Processor.new(style: style, format: 'html')
78
+ cp.import(bibtex.to_citeproc)
79
+ citation = cp.render(:bibliography, id: bibtex.data.first.id)
80
+ return nil unless citation.is_a?(Array) && citation.any?
81
+
82
+ # The CiteProc renderer has trouble with some things so fix them here
83
+ # - For some reason words in all caps in the title get wrapped in curl brackets
84
+ citation = citation.first.gsub('{', '').gsub('}', '')
85
+
86
+ unless work_type.nil? || work_type.strip == ''
87
+ # This supports the :apa and :chicago-author-date styles
88
+ citation = citation.gsub(/\.”\s+/, "\.” [#{work_type.gsub('_', ' ').capitalize}]. ")
89
+ .gsub(/<\/i>\.\s+/, "<\/i>\. [#{work_type.gsub('_', ' ').capitalize}]. ")
90
+ end
91
+
92
+ # Convert the URL into a link. Ensure that the trailing period is not a part of
93
+ # the link!
94
+ citation.gsub(URI.regexp) do |url|
95
+ if url.start_with?('http')
96
+ '<a href="%{url}" target="_blank">%{url}</a>.' % {
97
+ url: url.end_with?('.') ? uri : "#{uri}."
98
+ }
99
+ else
100
+ url
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end
106
+ # rubocop:enable Naming/FileName
metadata ADDED
@@ -0,0 +1,104 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: uc3-dmp-citation
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Brian Riley
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2023-08-17 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: byebug
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '='
18
+ - !ruby/object:Gem::Version
19
+ version: 11.1.3
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '='
25
+ - !ruby/object:Gem::Version
26
+ version: 11.1.3
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '='
32
+ - !ruby/object:Gem::Version
33
+ version: 3.9.0
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '='
39
+ - !ruby/object:Gem::Version
40
+ version: 3.9.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: rubocop
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '='
46
+ - !ruby/object:Gem::Version
47
+ version: 1.50.2
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '='
53
+ - !ruby/object:Gem::Version
54
+ version: 1.50.2
55
+ - !ruby/object:Gem::Dependency
56
+ name: rubocop-rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '='
60
+ - !ruby/object:Gem::Version
61
+ version: 2.20.0
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '='
67
+ - !ruby/object:Gem::Version
68
+ version: 2.20.0
69
+ description: Helper for working with DOI citations
70
+ email:
71
+ - brian.riley@ucop.edu
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - README.md
77
+ - lib/uc3-dmp-citation.rb
78
+ - lib/uc3-dmp-citation/version.rb
79
+ homepage: https://github.com/CDLUC3/dmp-hub-cfn/blob/main/src/sam/gems/uc3-dmp-citation
80
+ licenses:
81
+ - MIT
82
+ metadata:
83
+ rubygems_mfa_required: 'false'
84
+ post_install_message:
85
+ rdoc_options: []
86
+ require_paths:
87
+ - lib
88
+ required_ruby_version: !ruby/object:Gem::Requirement
89
+ requirements:
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: '2.7'
93
+ required_rubygems_version: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - ">="
96
+ - !ruby/object:Gem::Version
97
+ version: '0'
98
+ requirements: []
99
+ rubygems_version: 3.1.6
100
+ signing_key:
101
+ specification_version: 4
102
+ summary: DMPTool gem that provides support for fetching BibTex for a DOI and converting
103
+ it into a citation
104
+ test_files: []