uc3-dmp-citation 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 6f168bafa37250b52f74453f1e9f044fd5ba07eab51632b650207514d7c9decd
4
+ data.tar.gz: b1f37ab91666d6356f29b6d5da6242947310df4fe5f7e337d75310b6ea78ec7c
5
+ SHA512:
6
+ metadata.gz: d6fbdb819e93454d608a3f281e0be0839e2b7785b8072035b3a36192c5ae2be8fe79a6433174f37bdbaec8f99b836bce0a993d8189e966bd78f40a1a22f174dc
7
+ data.tar.gz: c56061f1ce9e204c97d05ddd03d385b0dcc4044e4f3bfc26e3f4ed436e26550059e8d44c0ba5b64e14ad941f4ffae74fb61d6bdaab6aaaa35cddbfc3a974b8c2
data/README.md ADDED
@@ -0,0 +1,3 @@
1
+ # uc3-dmp-logger
2
+
3
+ Logger service to write CloudWatch logs in a specific format
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Uc3DmpCitation
4
+ VERSION = '0.0.1'
5
+ end
@@ -0,0 +1,106 @@
1
+ # rubocop:disable Naming/FileName
2
+ # frozen_string_literal: true
3
+
4
+ require 'bibtex'
5
+ require 'citeproc'
6
+ require 'csl/styles'
7
+
8
+ require 'uc3-dmp-external-api'
9
+
10
+ module Uc3DmpCitation
11
+ class CiterError < StandardError; end
12
+
13
+ class Citer
14
+ DEFAULT_CITATION_STYLE = 'chicago-author-date'
15
+ DEFAULT_DOI_URL = 'http://doi.org'
16
+ DEFAULT_WORK_TYPE = 'Dataset'
17
+
18
+ DOI_REGEX = %r{[0-9]{2}\.[0-9]{4,}/[a-zA-Z0-9/_.-]+}
19
+
20
+ MSG_BIBTEX_FAILURE = 'Unable to fetch Bibtex for the specified DOI.'
21
+ MSG_UNABLE_TO_UPDATE = 'Unable to update the citations on the DMP ID.'
22
+
23
+ def fetch_citation(doi:, logger: nil)
24
+ uri = _doi_to_uri(doi: doi)
25
+ return nil if uri.nil? || uri.blank?
26
+
27
+ logger.debug(message: "Fetching BibTeX from: #{uri}") if logger.respond_to?(:debug)
28
+ resp = Uc3DmpExternalApi::Client.call(url: uri, method: :get, additional_headers: headers, logger: logger)
29
+ return nil if resp.nil? || resp.to_s.strip.empty?
30
+
31
+ bibtex = BibTeX.parse(_cleanse_bibtex(text: resp))
32
+ work_type = identifier['work_type'].nil? ? determine_work_type(bibtex: bibtex) : identifier['work_type']
33
+ _bibtex_to_citation(uri: uri, work_type: work_type, bibtex: bibtex)
34
+ end
35
+
36
+ private
37
+
38
+ # Will convert 'doi:10.1234/abcdefg' to 'http://doi.org/10.1234/abcdefg'
39
+ def _doi_to_uri(doi:)
40
+ val = doi.match(DOI_REGEX).to_s
41
+ return nil if val.nil? || val.strip == ''
42
+
43
+ doi.start_with?('http') ? doi : "#{DEFAULT_DOI_URL}/#{doi.gsub('doi:', '')}"
44
+ end
45
+
46
+ # If no :work_type was specified we can try to derive it from the BibTeX metadata
47
+ def _determine_work_type(bibtex:)
48
+ return '' if bibtex.nil? || bibtex.data.nil? || bibtex.data.first.nil?
49
+
50
+ return 'article' unless bibtex.data.first.journal.nil?
51
+
52
+ ''
53
+ end
54
+
55
+ def _cleanse_bibtex(text:)
56
+ return nil if text.nil? || text.to_s.strip == ''
57
+
58
+ # Make sure we're working with UTF8
59
+ utf8 = text.force_encoding('UTF-8')
60
+
61
+ # Remove any encoded HTML (e.g. "Regular text $\\lt$strong$\\gt$Bold text$\\lt$/strong$\\gt$")
62
+ utf8 = utf8.gsub(%r{\$?\\\$?(less|lt|Lt)\$/?[a-zA-Z]+\$?\\\$?(greater|gt|Gt)\$}, '')
63
+ # Replace any special dash, semicolon and quote characters with a minus sign or single/double quote
64
+ utf8 = utf8.gsub(%r{\$?\\(T|t)ext[a-zA-Z]+dash\$?}, '-').gsub(%r{\{(T|t)ext[a-zA-Z]+dash\}}, '-')
65
+ .gsub(%r{\$?\\(M|m)athsemicolon\$?}, ':').gsub(%r{\{(M|m)semicolon\}}, ':')
66
+ .gsub(%r{\$?\\(T|t)extquotesingle\$?}, "'").gsub(%r{\{(T|t)extquotesingle\}}, "'")
67
+ .gsub(%r{\$?\\(T|t)extquotedouble\$?}, '"').gsub(%r{\{(T|t)extquotedouble\}}, '"')
68
+ # Remove any remaining `\v` entries which attempt to construct an accented character
69
+ utf8.gsub(%r{\\v}, '')
70
+ end
71
+
72
+ # Convert the BibTeX item to a citation
73
+ def _bibtex_to_citation(uri:, work_type: DEFAULT_WORK_TYPE, bibtex:, style: DEFAULT_CITATION_STYLE)
74
+ return nil unless uri.is_a?(String) && uri.strip != ''
75
+ return nil if bibtex.nil? || bibtex.data.nil? || bibtex.data.first.nil?
76
+
77
+ cp = CiteProc::Processor.new(style: style, format: 'html')
78
+ cp.import(bibtex.to_citeproc)
79
+ citation = cp.render(:bibliography, id: bibtex.data.first.id)
80
+ return nil unless citation.is_a?(Array) && citation.any?
81
+
82
+ # The CiteProc renderer has trouble with some things so fix them here
83
+ # - For some reason words in all caps in the title get wrapped in curl brackets
84
+ citation = citation.first.gsub('{', '').gsub('}', '')
85
+
86
+ unless work_type.nil? || work_type.strip == ''
87
+ # This supports the :apa and :chicago-author-date styles
88
+ citation = citation.gsub(/\.”\s+/, "\.” [#{work_type.gsub('_', ' ').capitalize}]. ")
89
+ .gsub(/<\/i>\.\s+/, "<\/i>\. [#{work_type.gsub('_', ' ').capitalize}]. ")
90
+ end
91
+
92
+ # Convert the URL into a link. Ensure that the trailing period is not a part of
93
+ # the link!
94
+ citation.gsub(URI.regexp) do |url|
95
+ if url.start_with?('http')
96
+ '<a href="%{url}" target="_blank">%{url}</a>.' % {
97
+ url: url.end_with?('.') ? uri : "#{uri}."
98
+ }
99
+ else
100
+ url
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end
106
+ # rubocop:enable Naming/FileName
metadata ADDED
@@ -0,0 +1,104 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: uc3-dmp-citation
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Brian Riley
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2023-08-17 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: byebug
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '='
18
+ - !ruby/object:Gem::Version
19
+ version: 11.1.3
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '='
25
+ - !ruby/object:Gem::Version
26
+ version: 11.1.3
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '='
32
+ - !ruby/object:Gem::Version
33
+ version: 3.9.0
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '='
39
+ - !ruby/object:Gem::Version
40
+ version: 3.9.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: rubocop
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '='
46
+ - !ruby/object:Gem::Version
47
+ version: 1.50.2
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '='
53
+ - !ruby/object:Gem::Version
54
+ version: 1.50.2
55
+ - !ruby/object:Gem::Dependency
56
+ name: rubocop-rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '='
60
+ - !ruby/object:Gem::Version
61
+ version: 2.20.0
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '='
67
+ - !ruby/object:Gem::Version
68
+ version: 2.20.0
69
+ description: Helper for working with DOI citations
70
+ email:
71
+ - brian.riley@ucop.edu
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - README.md
77
+ - lib/uc3-dmp-citation.rb
78
+ - lib/uc3-dmp-citation/version.rb
79
+ homepage: https://github.com/CDLUC3/dmp-hub-cfn/blob/main/src/sam/gems/uc3-dmp-citation
80
+ licenses:
81
+ - MIT
82
+ metadata:
83
+ rubygems_mfa_required: 'false'
84
+ post_install_message:
85
+ rdoc_options: []
86
+ require_paths:
87
+ - lib
88
+ required_ruby_version: !ruby/object:Gem::Requirement
89
+ requirements:
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: '2.7'
93
+ required_rubygems_version: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - ">="
96
+ - !ruby/object:Gem::Version
97
+ version: '0'
98
+ requirements: []
99
+ rubygems_version: 3.1.6
100
+ signing_key:
101
+ specification_version: 4
102
+ summary: DMPTool gem that provides support for fetching BibTex for a DOI and converting
103
+ it into a citation
104
+ test_files: []