citero 1.0.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +2 -0
- data/README.md +27 -0
- data/Rakefile +7 -0
- data/lib/citero.rb +123 -0
- data/lib/citero/csf.rb +31 -0
- data/lib/citero/inputs.rb +7 -0
- data/lib/citero/inputs/openurl.rb +272 -0
- data/lib/citero/inputs/pnx.rb +137 -0
- data/lib/citero/inputs/readers.rb +7 -0
- data/lib/citero/inputs/readers/pnx_reader.rb +151 -0
- data/lib/citero/outputs.rb +9 -0
- data/lib/citero/outputs/bibtex.rb +174 -0
- data/lib/citero/outputs/easybib.rb +203 -0
- data/lib/citero/outputs/openurl.rb +199 -0
- data/lib/citero/outputs/refworks_tagged.rb +52 -0
- data/lib/citero/outputs/ris.rb +209 -0
- data/lib/citero/utils.rb +5 -0
- data/lib/citero/utils/name_formatter.rb +56 -0
- data/lib/citero/version.rb +3 -0
- metadata +159 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: a420fcb5e2a2984a8075c248070be78ba1523017
|
4
|
+
data.tar.gz: 8728f7f6805f7713e84e4586606a31f7c5af7bb9
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 0c5fd7a6553dd5be250cbf512f5f9fca8e5ac5c6c60d184797913992d9cc4cdb49fba005bdc1b25f37de2aae93f43391544d1abbec917a8c416ce4190ccff599
|
7
|
+
data.tar.gz: b8663ea856531cc639cda15902992d0caeb7b2a6855be5595a72607a939d5ba21fad9909677ec4fcf047dbccc0f293624713151768d3539cb387af516f2515f2
|
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
Citero-ruby
|
2
|
+
==========
|
3
|
+
Ruby repository for the [Citero](https://github.com/NYULibraries/citero) project.
|
4
|
+
|
5
|
+
Citero is a program that allows for mapping of data inputs from various systems into one normalized metadata schema
|
6
|
+
tentatively known as *Citero Standard Form*, or *CSF*. From the normalized schema, *CSF*, it can produce another output
|
7
|
+
format for use by another system.
|
8
|
+
|
9
|
+
Citero-ruby is a complete rewrite of the Java project in Ruby.
|
10
|
+
|
11
|
+
Currently Supported Formats/Systems
|
12
|
+
===================================
|
13
|
+
|
14
|
+
How to install
|
15
|
+
==============
|
16
|
+
|
17
|
+
How to run
|
18
|
+
==========
|
19
|
+
|
20
|
+
API Considerations
|
21
|
+
==========
|
22
|
+
|
23
|
+
Exceptions
|
24
|
+
==========
|
25
|
+
|
26
|
+
CSF
|
27
|
+
=====
|
data/Rakefile
ADDED
data/lib/citero.rb
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
module Citero
|
2
|
+
require_relative 'citero/version'
|
3
|
+
require_relative 'citero/csf'
|
4
|
+
require_relative 'citero/inputs'
|
5
|
+
require_relative 'citero/outputs'
|
6
|
+
require_relative 'citero/utils'
|
7
|
+
|
8
|
+
def self.from_formats
|
9
|
+
[:csf, :openurl, :pnx]
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.to_formats
|
13
|
+
[:csf, :ris, :openurl, :bibtex, :easybib, :refworks_tagged]
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.citation_styles
|
17
|
+
[]
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.map(input)
|
21
|
+
@input = input
|
22
|
+
self
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.from_(format)
|
26
|
+
@from_format = format.to_sym
|
27
|
+
self
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.csf
|
31
|
+
return nil unless @from_format
|
32
|
+
case @from_format
|
33
|
+
when :csf
|
34
|
+
data = @input
|
35
|
+
data = csf_string_to_hash(data) if data.kind_of?(String)
|
36
|
+
from = Citero::CSF.new(data)
|
37
|
+
when :openurl
|
38
|
+
from = Citero::Inputs::OpenUrl.new(@input)
|
39
|
+
when :pnx
|
40
|
+
from = Citero::Inputs::Pnx.new(@input)
|
41
|
+
else
|
42
|
+
raise ArgumentError
|
43
|
+
end
|
44
|
+
return from.csf
|
45
|
+
end
|
46
|
+
|
47
|
+
def self.to_(format)
|
48
|
+
@to_format = format.to_sym
|
49
|
+
|
50
|
+
case @from_format
|
51
|
+
when :csf
|
52
|
+
data = @input
|
53
|
+
data = csf_string_to_hash(data) if data.kind_of?(String)
|
54
|
+
from = Citero::CSF.new(data)
|
55
|
+
when :openurl
|
56
|
+
from = Citero::Inputs::OpenUrl.new(@input)
|
57
|
+
when :pnx
|
58
|
+
from = Citero::Inputs::Pnx.new(@input)
|
59
|
+
else
|
60
|
+
raise ArgumentError
|
61
|
+
end
|
62
|
+
|
63
|
+
case @to_format
|
64
|
+
when :ris
|
65
|
+
return Citero::Outputs::Ris.new(from).to_ris
|
66
|
+
when :openurl
|
67
|
+
return Citero::Outputs::OpenUrl.new(from).to_openurl
|
68
|
+
when :bibtex
|
69
|
+
return Citero::Outputs::Bibtex.new(from).to_bibtex
|
70
|
+
when :easybib
|
71
|
+
return Citero::Outputs::EasyBib.new(from).to_easybib
|
72
|
+
when :refworks_tagged
|
73
|
+
return Citero::Outputs::RefworksTagged.new(from).to_refworks_tagged
|
74
|
+
when :csf
|
75
|
+
str = ""
|
76
|
+
from.csf.each do |k,v|
|
77
|
+
if v.kind_of?(Array)
|
78
|
+
v.each do |va|
|
79
|
+
str = "#{str}#{k}: #{va.gsub('.','\.').gsub(',','\,')}\n"
|
80
|
+
end
|
81
|
+
else
|
82
|
+
str = "#{str}#{k}: #{v.gsub('.','\.').gsub(',','\,')}\n"
|
83
|
+
end
|
84
|
+
end
|
85
|
+
return str.chomp
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
private
|
90
|
+
|
91
|
+
def self.csf_string_to_hash(string)
|
92
|
+
hash = {}
|
93
|
+
string.lines.map(&:strip).each do |line|
|
94
|
+
k,v = line.split(':',2).map(&:strip)
|
95
|
+
hash[k] = [hash[k],v].compact.flatten
|
96
|
+
end
|
97
|
+
hash
|
98
|
+
end
|
99
|
+
|
100
|
+
def self.method_missing(method_sym, *arguments, &block)
|
101
|
+
super unless respond_to?(method_sym, *arguments, &block)
|
102
|
+
method_str = method_sym.to_s
|
103
|
+
if (method_str.include? "to_")
|
104
|
+
to_(method_str.split('_',2).last)
|
105
|
+
elsif (method_str.include? "from_")
|
106
|
+
from_(method_str.split('_',2).last)
|
107
|
+
else
|
108
|
+
super
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def self.respond_to?(method_sym, *arguments, &block)
|
113
|
+
method_arr = method_sym.to_s.split('_',2)
|
114
|
+
return super unless method_arr.size > 1
|
115
|
+
if method_arr.first.eql?("from")
|
116
|
+
return self.from_formats.include?(method_arr.last.to_sym)
|
117
|
+
end
|
118
|
+
if method_arr.first.eql?("to")
|
119
|
+
return self.to_formats.include?(method_arr.last.to_sym)
|
120
|
+
end
|
121
|
+
super
|
122
|
+
end
|
123
|
+
end
|
data/lib/citero/csf.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
module Citero
|
2
|
+
class CSF
|
3
|
+
extend Forwardable
|
4
|
+
def_delegators :@data, :[], :[]=, :size, :each, :inspect, :to_s
|
5
|
+
attr_reader :data
|
6
|
+
alias_method :csf, :data
|
7
|
+
|
8
|
+
def initialize(hash = nil)
|
9
|
+
@data = Hash.new
|
10
|
+
load_from_hash(hash) unless hash.nil?
|
11
|
+
end
|
12
|
+
|
13
|
+
def load_from_hash(hash)
|
14
|
+
hash.each_pair do |key,value|
|
15
|
+
next if value.nil?
|
16
|
+
self.send(:[]=, key, value)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def []=(key,value)
|
21
|
+
@data[key] = element_or_list(@data[key], value)
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
def element_or_list(new_value, old_value)
|
26
|
+
temp_arr = [new_value, old_value].flatten.compact
|
27
|
+
return temp_arr.first if temp_arr.size == 1
|
28
|
+
return temp_arr
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,272 @@
|
|
1
|
+
module Citero
|
2
|
+
module Inputs
|
3
|
+
class OpenUrl
|
4
|
+
require 'open-uri'
|
5
|
+
require 'cgi'
|
6
|
+
|
7
|
+
attr_reader :csf, :params
|
8
|
+
|
9
|
+
def initialize(raw_data)
|
10
|
+
@raw_data = raw_data
|
11
|
+
construct_csf
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def construct_csf
|
17
|
+
@csf = CSF.new
|
18
|
+
url = @raw_data
|
19
|
+
@params = CGI.parse(CGI::unescape(URI.parse(url).query))
|
20
|
+
remove_blanks_from_params
|
21
|
+
|
22
|
+
hash = [
|
23
|
+
{"itemType" => item_type},
|
24
|
+
rft_id,
|
25
|
+
map_one_to_one_fields,
|
26
|
+
isbn,
|
27
|
+
# rfr_id,
|
28
|
+
btitle,
|
29
|
+
atitle,
|
30
|
+
jtitle,
|
31
|
+
stitle,
|
32
|
+
title,
|
33
|
+
date,
|
34
|
+
author,
|
35
|
+
issn,
|
36
|
+
inventor,
|
37
|
+
authors,
|
38
|
+
publisher,
|
39
|
+
thesis_elements,
|
40
|
+
patent_elements,
|
41
|
+
webpage_elements,
|
42
|
+
{"importedFrom" => "OpenURL"}
|
43
|
+
].compact.reduce({}, :merge)
|
44
|
+
|
45
|
+
@csf.load_from_hash(hash)
|
46
|
+
end
|
47
|
+
|
48
|
+
def remove_blanks_from_params
|
49
|
+
@params.each do |key,values|
|
50
|
+
values = values.uniq.compact.reject(&:empty?)
|
51
|
+
if values.empty?
|
52
|
+
@params.delete(key)
|
53
|
+
else
|
54
|
+
@params[key] = values
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
|
60
|
+
def get_format(openurl_format)
|
61
|
+
return simple_types[openurl_format] if simple_types.has_key?(openurl_format)
|
62
|
+
return get_book_format(openurl_format)
|
63
|
+
end
|
64
|
+
|
65
|
+
def get_book_format(openurl_format)
|
66
|
+
return 'document' unless openurl_format.eql?('info:ofi/fmt:kev:mtx:book')
|
67
|
+
return book_type[params['rft.genre']] if book_type.has_key?(params['rft.genre'])
|
68
|
+
return 'book'
|
69
|
+
end
|
70
|
+
|
71
|
+
def item_type
|
72
|
+
get_format(params["rft_val_fmt"].first)
|
73
|
+
end
|
74
|
+
|
75
|
+
def book_type
|
76
|
+
{
|
77
|
+
"bookitem" => "bookSection",
|
78
|
+
"conference" => "conferencePaper",
|
79
|
+
"proceeding" => "conferencePaper",
|
80
|
+
"report" => "report",
|
81
|
+
"document" => "document"
|
82
|
+
}
|
83
|
+
end
|
84
|
+
|
85
|
+
def simple_types
|
86
|
+
{
|
87
|
+
"info:ofi/fmt:kev:mtx:journal" => "journalArticle",
|
88
|
+
"info:ofi/fmt:kev:mtx:dissertation" => "thesis",
|
89
|
+
"info:ofi/fmt:kev:mtx:patent" => "patent",
|
90
|
+
"info:ofi/fmt:kev:mtx:dc" => "webpage",
|
91
|
+
"info:ofi/fmt:kev:mtx:audio" => "audioRecording"
|
92
|
+
}
|
93
|
+
end
|
94
|
+
|
95
|
+
def key_mappings
|
96
|
+
{
|
97
|
+
'rft.type' => 'itemType',
|
98
|
+
'rft.description' => 'abstractNote',
|
99
|
+
'rft.rights' => 'rights',
|
100
|
+
'rft.language' => 'language',
|
101
|
+
'rft.subject' => 'tags',
|
102
|
+
'rft.source' => 'publicationTitle',
|
103
|
+
'rft.pub' => 'publisher',
|
104
|
+
'rft.publisher' => 'publisher',
|
105
|
+
'rft.place' => 'place',
|
106
|
+
'rft.edition' => 'edition',
|
107
|
+
'rft.series' => 'series',
|
108
|
+
'rft.volume' => 'volume',
|
109
|
+
'rft.issue' => 'issue',
|
110
|
+
'rft.inventor' => 'inventor',
|
111
|
+
'rft.contributor' => 'contributor',
|
112
|
+
'rft.aucorp' => 'author',
|
113
|
+
'rft.pages' => 'pages',
|
114
|
+
'rft.spage' => 'startPage',
|
115
|
+
'rft.epage' => 'endPage',
|
116
|
+
'rft.tpages' => 'numPage'
|
117
|
+
}
|
118
|
+
end
|
119
|
+
|
120
|
+
def map_one_to_one_fields
|
121
|
+
arr = []
|
122
|
+
key_mappings.each do |k,v|
|
123
|
+
arr << create_sub_hash(v,k)
|
124
|
+
end
|
125
|
+
arr.compact.reduce({}, :merge)
|
126
|
+
end
|
127
|
+
|
128
|
+
|
129
|
+
def rft_id
|
130
|
+
rft_id = params['rft_id'].first
|
131
|
+
return if rft_id.nil?
|
132
|
+
if rft_id.start_with? 'info:doi/' || rft_id.start_with?('urn.isbn/')
|
133
|
+
return { "isbn" => rft_id[9..-1] } unless rft_id[9..-1].empty?
|
134
|
+
end
|
135
|
+
return { "url" => rft_id, "accessDate" => "" } if rft_id.match /^https?:\/\/.*/
|
136
|
+
return {}
|
137
|
+
end
|
138
|
+
|
139
|
+
def rfr_id
|
140
|
+
return { "rfr_id" => params['rfr_id'].first } if params['rfr_id'].first
|
141
|
+
return {}
|
142
|
+
end
|
143
|
+
|
144
|
+
def create_formatted_sub_hash(key, value)
|
145
|
+
return nil unless key and value and !value.empty?
|
146
|
+
return { key => value }
|
147
|
+
end
|
148
|
+
|
149
|
+
def create_sub_hash(key,value)
|
150
|
+
create_formatted_sub_hash(key, params[value])
|
151
|
+
end
|
152
|
+
|
153
|
+
def btitle
|
154
|
+
key = 'title' if ['book','report'].include?(item_type)
|
155
|
+
key = 'publicationTitle' if ['bookSection','conferencePaper'].include?(item_type)
|
156
|
+
create_sub_hash(key, 'rft.btitle')
|
157
|
+
end
|
158
|
+
|
159
|
+
def atitle
|
160
|
+
key = 'title' if ['journalArticle','bookSection','conferencePaper'].include?(item_type)
|
161
|
+
create_sub_hash(key, 'rft.atitle')
|
162
|
+
end
|
163
|
+
|
164
|
+
def jtitle
|
165
|
+
key = 'publicationTitle' if ['journalArticle'].include?(item_type)
|
166
|
+
create_sub_hash(key, 'rft.jtitle')
|
167
|
+
end
|
168
|
+
|
169
|
+
def stitle
|
170
|
+
key = 'journalAbbreviation' if ['journalArticle'].include?(item_type)
|
171
|
+
create_sub_hash(key, 'rft.stitle')
|
172
|
+
end
|
173
|
+
|
174
|
+
def title
|
175
|
+
key = 'title'
|
176
|
+
key = 'publicationTitle' if ['journalArticle','bookSection','conferencePaper'].include? item_type
|
177
|
+
create_sub_hash(key, 'rft.title')
|
178
|
+
end
|
179
|
+
|
180
|
+
def date
|
181
|
+
key = "date"
|
182
|
+
key = "issueDate" if item_type.eql? "patent"
|
183
|
+
create_sub_hash(key , 'rft.date')
|
184
|
+
end
|
185
|
+
|
186
|
+
def issn
|
187
|
+
issn = [params['rft.issn'], params['rft.eissn']].flatten.compact.uniq.reject(&:empty?)
|
188
|
+
create_formatted_sub_hash("issn", issn)
|
189
|
+
end
|
190
|
+
|
191
|
+
def author
|
192
|
+
first_name = params['rft.aufirst'].first&.strip
|
193
|
+
last_name = params['rft.aulast'].first&.strip
|
194
|
+
|
195
|
+
name = "#{last_name}," if last_name
|
196
|
+
name = "#{name} #{first_name}".strip
|
197
|
+
|
198
|
+
name = nil if name.empty?
|
199
|
+
output_name = name || params['rft.au'] || params['rft.creator']
|
200
|
+
|
201
|
+
if first_name and last_name
|
202
|
+
return create_formatted_sub_hash("author", [output_name,output_name])
|
203
|
+
end
|
204
|
+
create_formatted_sub_hash("author", output_name)
|
205
|
+
end
|
206
|
+
|
207
|
+
def inventor
|
208
|
+
first_name = params['rft.invfirst'].first
|
209
|
+
last_name = params['rft.invlast'].first
|
210
|
+
name = Citero::Utils::NameFormatter.new("#{first_name} #{last_name}")
|
211
|
+
output_name = name.to_standardized || params['rft.inventor']
|
212
|
+
create_formatted_sub_hash("author", output_name)
|
213
|
+
end
|
214
|
+
|
215
|
+
def authors
|
216
|
+
authors = ['rft.au', 'rft.creator', 'rft.addau'].collect{|key| params[key]}.flatten.collect(&:to_s)
|
217
|
+
authors.reject!(&:empty?)
|
218
|
+
create_formatted_sub_hash('author', authors ) unless authors.empty?
|
219
|
+
end
|
220
|
+
|
221
|
+
def isbn
|
222
|
+
create_sub_hash("isbn", 'rft.isbn')
|
223
|
+
end
|
224
|
+
|
225
|
+
def publisher
|
226
|
+
publisher = [params['rft.pub'], params['rft.publisher']].flatten
|
227
|
+
create_formatted_sub_hash("publisher", publisher)
|
228
|
+
end
|
229
|
+
|
230
|
+
|
231
|
+
def thesis_elements
|
232
|
+
hash = []
|
233
|
+
hash << create_sub_hash("publisher", "rft.inst")
|
234
|
+
hash << create_sub_hash("type", "rft.degree")
|
235
|
+
merged = hash.compact.reduce({}, :merge)
|
236
|
+
return nil if merged.empty?
|
237
|
+
merged
|
238
|
+
end
|
239
|
+
|
240
|
+
def patent_elements
|
241
|
+
hash = []
|
242
|
+
hash << create_sub_hash("assignee", "rft.assignee")
|
243
|
+
hash << create_sub_hash("patentNumber", "rft.number")
|
244
|
+
hash << create_sub_hash("date", "rft.appldate")
|
245
|
+
merged = hash.compact.reduce({}, :merge)
|
246
|
+
return nil if merged.empty?
|
247
|
+
merged
|
248
|
+
end
|
249
|
+
|
250
|
+
def webpage_elements
|
251
|
+
hash = []
|
252
|
+
hash << create_sub_hash("abstractNote", "rft.description")
|
253
|
+
hash << create_sub_hash("rights", "rft.rights")
|
254
|
+
hash << create_sub_hash("language", "rft.language")
|
255
|
+
hash << create_sub_hash("tags", "rft.subject")
|
256
|
+
hash << create_sub_hash("itemType", "rft.type")
|
257
|
+
hash << create_sub_hash("publicationTitle", "rft.source")
|
258
|
+
unless params["rft.identifier"].empty?
|
259
|
+
identifier = params["rft.identifier"].first
|
260
|
+
hash << create_formatted_sub_hash("isbn", (identifier - 'isbn').strip) if identifier.start_with? 'isbn'
|
261
|
+
hash << create_formatted_sub_hash("issn", (identifier - 'issn').strip) if identifier.start_with? 'issn'
|
262
|
+
hash << create_formatted_sub_hash("doi", (identifier - 'urn:doi:').strip) if identifier.start_with? 'urn:doi:'
|
263
|
+
hash << create_formatted_sub_hash("url", identifier.strip) if identifier.match /^https?:\/\/.*/
|
264
|
+
end
|
265
|
+
merged = hash.compact.reduce({}, :merge)
|
266
|
+
return nil if merged.empty?
|
267
|
+
merged
|
268
|
+
end
|
269
|
+
|
270
|
+
end
|
271
|
+
end
|
272
|
+
end
|