citero 1.0.0.alpha
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +2 -0
- data/README.md +27 -0
- data/Rakefile +7 -0
- data/lib/citero.rb +123 -0
- data/lib/citero/csf.rb +31 -0
- data/lib/citero/inputs.rb +7 -0
- data/lib/citero/inputs/openurl.rb +272 -0
- data/lib/citero/inputs/pnx.rb +137 -0
- data/lib/citero/inputs/readers.rb +7 -0
- data/lib/citero/inputs/readers/pnx_reader.rb +151 -0
- data/lib/citero/outputs.rb +9 -0
- data/lib/citero/outputs/bibtex.rb +174 -0
- data/lib/citero/outputs/easybib.rb +203 -0
- data/lib/citero/outputs/openurl.rb +199 -0
- data/lib/citero/outputs/refworks_tagged.rb +52 -0
- data/lib/citero/outputs/ris.rb +209 -0
- data/lib/citero/utils.rb +5 -0
- data/lib/citero/utils/name_formatter.rb +56 -0
- data/lib/citero/version.rb +3 -0
- metadata +159 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: a420fcb5e2a2984a8075c248070be78ba1523017
|
4
|
+
data.tar.gz: 8728f7f6805f7713e84e4586606a31f7c5af7bb9
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 0c5fd7a6553dd5be250cbf512f5f9fca8e5ac5c6c60d184797913992d9cc4cdb49fba005bdc1b25f37de2aae93f43391544d1abbec917a8c416ce4190ccff599
|
7
|
+
data.tar.gz: b8663ea856531cc639cda15902992d0caeb7b2a6855be5595a72607a939d5ba21fad9909677ec4fcf047dbccc0f293624713151768d3539cb387af516f2515f2
|
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
Citero-ruby
|
2
|
+
==========
|
3
|
+
Ruby repository for the [Citero](https://github.com/NYULibraries/citero) project.
|
4
|
+
|
5
|
+
Citero is a program that allows for mapping of data inputs from various systems into one normalized metadata schema
|
6
|
+
tentatively known as *Citero Standard Form*, or *CSF*. From the normalized schema, *CSF*, it can produce another output
|
7
|
+
format for use by another system.
|
8
|
+
|
9
|
+
Citero-ruby is a complete rewrite of the Java project in Ruby.
|
10
|
+
|
11
|
+
Currently Supported Formats/Systems
|
12
|
+
===================================
|
13
|
+
|
14
|
+
How to install
|
15
|
+
==============
|
16
|
+
|
17
|
+
How to run
|
18
|
+
==========
|
19
|
+
|
20
|
+
API Considerations
|
21
|
+
==========
|
22
|
+
|
23
|
+
Exceptions
|
24
|
+
==========
|
25
|
+
|
26
|
+
CSF
|
27
|
+
=====
|
data/Rakefile
ADDED
data/lib/citero.rb
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
module Citero
|
2
|
+
require_relative 'citero/version'
|
3
|
+
require_relative 'citero/csf'
|
4
|
+
require_relative 'citero/inputs'
|
5
|
+
require_relative 'citero/outputs'
|
6
|
+
require_relative 'citero/utils'
|
7
|
+
|
8
|
+
def self.from_formats
|
9
|
+
[:csf, :openurl, :pnx]
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.to_formats
|
13
|
+
[:csf, :ris, :openurl, :bibtex, :easybib, :refworks_tagged]
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.citation_styles
|
17
|
+
[]
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.map(input)
|
21
|
+
@input = input
|
22
|
+
self
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.from_(format)
|
26
|
+
@from_format = format.to_sym
|
27
|
+
self
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.csf
|
31
|
+
return nil unless @from_format
|
32
|
+
case @from_format
|
33
|
+
when :csf
|
34
|
+
data = @input
|
35
|
+
data = csf_string_to_hash(data) if data.kind_of?(String)
|
36
|
+
from = Citero::CSF.new(data)
|
37
|
+
when :openurl
|
38
|
+
from = Citero::Inputs::OpenUrl.new(@input)
|
39
|
+
when :pnx
|
40
|
+
from = Citero::Inputs::Pnx.new(@input)
|
41
|
+
else
|
42
|
+
raise ArgumentError
|
43
|
+
end
|
44
|
+
return from.csf
|
45
|
+
end
|
46
|
+
|
47
|
+
def self.to_(format)
|
48
|
+
@to_format = format.to_sym
|
49
|
+
|
50
|
+
case @from_format
|
51
|
+
when :csf
|
52
|
+
data = @input
|
53
|
+
data = csf_string_to_hash(data) if data.kind_of?(String)
|
54
|
+
from = Citero::CSF.new(data)
|
55
|
+
when :openurl
|
56
|
+
from = Citero::Inputs::OpenUrl.new(@input)
|
57
|
+
when :pnx
|
58
|
+
from = Citero::Inputs::Pnx.new(@input)
|
59
|
+
else
|
60
|
+
raise ArgumentError
|
61
|
+
end
|
62
|
+
|
63
|
+
case @to_format
|
64
|
+
when :ris
|
65
|
+
return Citero::Outputs::Ris.new(from).to_ris
|
66
|
+
when :openurl
|
67
|
+
return Citero::Outputs::OpenUrl.new(from).to_openurl
|
68
|
+
when :bibtex
|
69
|
+
return Citero::Outputs::Bibtex.new(from).to_bibtex
|
70
|
+
when :easybib
|
71
|
+
return Citero::Outputs::EasyBib.new(from).to_easybib
|
72
|
+
when :refworks_tagged
|
73
|
+
return Citero::Outputs::RefworksTagged.new(from).to_refworks_tagged
|
74
|
+
when :csf
|
75
|
+
str = ""
|
76
|
+
from.csf.each do |k,v|
|
77
|
+
if v.kind_of?(Array)
|
78
|
+
v.each do |va|
|
79
|
+
str = "#{str}#{k}: #{va.gsub('.','\.').gsub(',','\,')}\n"
|
80
|
+
end
|
81
|
+
else
|
82
|
+
str = "#{str}#{k}: #{v.gsub('.','\.').gsub(',','\,')}\n"
|
83
|
+
end
|
84
|
+
end
|
85
|
+
return str.chomp
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
private
|
90
|
+
|
91
|
+
def self.csf_string_to_hash(string)
|
92
|
+
hash = {}
|
93
|
+
string.lines.map(&:strip).each do |line|
|
94
|
+
k,v = line.split(':',2).map(&:strip)
|
95
|
+
hash[k] = [hash[k],v].compact.flatten
|
96
|
+
end
|
97
|
+
hash
|
98
|
+
end
|
99
|
+
|
100
|
+
def self.method_missing(method_sym, *arguments, &block)
|
101
|
+
super unless respond_to?(method_sym, *arguments, &block)
|
102
|
+
method_str = method_sym.to_s
|
103
|
+
if (method_str.include? "to_")
|
104
|
+
to_(method_str.split('_',2).last)
|
105
|
+
elsif (method_str.include? "from_")
|
106
|
+
from_(method_str.split('_',2).last)
|
107
|
+
else
|
108
|
+
super
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def self.respond_to?(method_sym, *arguments, &block)
|
113
|
+
method_arr = method_sym.to_s.split('_',2)
|
114
|
+
return super unless method_arr.size > 1
|
115
|
+
if method_arr.first.eql?("from")
|
116
|
+
return self.from_formats.include?(method_arr.last.to_sym)
|
117
|
+
end
|
118
|
+
if method_arr.first.eql?("to")
|
119
|
+
return self.to_formats.include?(method_arr.last.to_sym)
|
120
|
+
end
|
121
|
+
super
|
122
|
+
end
|
123
|
+
end
|
data/lib/citero/csf.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
module Citero
|
2
|
+
class CSF
|
3
|
+
extend Forwardable
|
4
|
+
def_delegators :@data, :[], :[]=, :size, :each, :inspect, :to_s
|
5
|
+
attr_reader :data
|
6
|
+
alias_method :csf, :data
|
7
|
+
|
8
|
+
def initialize(hash = nil)
|
9
|
+
@data = Hash.new
|
10
|
+
load_from_hash(hash) unless hash.nil?
|
11
|
+
end
|
12
|
+
|
13
|
+
def load_from_hash(hash)
|
14
|
+
hash.each_pair do |key,value|
|
15
|
+
next if value.nil?
|
16
|
+
self.send(:[]=, key, value)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def []=(key,value)
|
21
|
+
@data[key] = element_or_list(@data[key], value)
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
def element_or_list(new_value, old_value)
|
26
|
+
temp_arr = [new_value, old_value].flatten.compact
|
27
|
+
return temp_arr.first if temp_arr.size == 1
|
28
|
+
return temp_arr
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,272 @@
|
|
1
|
+
module Citero
|
2
|
+
module Inputs
|
3
|
+
class OpenUrl
|
4
|
+
require 'open-uri'
|
5
|
+
require 'cgi'
|
6
|
+
|
7
|
+
attr_reader :csf, :params
|
8
|
+
|
9
|
+
def initialize(raw_data)
|
10
|
+
@raw_data = raw_data
|
11
|
+
construct_csf
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def construct_csf
|
17
|
+
@csf = CSF.new
|
18
|
+
url = @raw_data
|
19
|
+
@params = CGI.parse(CGI::unescape(URI.parse(url).query))
|
20
|
+
remove_blanks_from_params
|
21
|
+
|
22
|
+
hash = [
|
23
|
+
{"itemType" => item_type},
|
24
|
+
rft_id,
|
25
|
+
map_one_to_one_fields,
|
26
|
+
isbn,
|
27
|
+
# rfr_id,
|
28
|
+
btitle,
|
29
|
+
atitle,
|
30
|
+
jtitle,
|
31
|
+
stitle,
|
32
|
+
title,
|
33
|
+
date,
|
34
|
+
author,
|
35
|
+
issn,
|
36
|
+
inventor,
|
37
|
+
authors,
|
38
|
+
publisher,
|
39
|
+
thesis_elements,
|
40
|
+
patent_elements,
|
41
|
+
webpage_elements,
|
42
|
+
{"importedFrom" => "OpenURL"}
|
43
|
+
].compact.reduce({}, :merge)
|
44
|
+
|
45
|
+
@csf.load_from_hash(hash)
|
46
|
+
end
|
47
|
+
|
48
|
+
def remove_blanks_from_params
|
49
|
+
@params.each do |key,values|
|
50
|
+
values = values.uniq.compact.reject(&:empty?)
|
51
|
+
if values.empty?
|
52
|
+
@params.delete(key)
|
53
|
+
else
|
54
|
+
@params[key] = values
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
|
60
|
+
def get_format(openurl_format)
|
61
|
+
return simple_types[openurl_format] if simple_types.has_key?(openurl_format)
|
62
|
+
return get_book_format(openurl_format)
|
63
|
+
end
|
64
|
+
|
65
|
+
def get_book_format(openurl_format)
|
66
|
+
return 'document' unless openurl_format.eql?('info:ofi/fmt:kev:mtx:book')
|
67
|
+
return book_type[params['rft.genre']] if book_type.has_key?(params['rft.genre'])
|
68
|
+
return 'book'
|
69
|
+
end
|
70
|
+
|
71
|
+
def item_type
|
72
|
+
get_format(params["rft_val_fmt"].first)
|
73
|
+
end
|
74
|
+
|
75
|
+
def book_type
|
76
|
+
{
|
77
|
+
"bookitem" => "bookSection",
|
78
|
+
"conference" => "conferencePaper",
|
79
|
+
"proceeding" => "conferencePaper",
|
80
|
+
"report" => "report",
|
81
|
+
"document" => "document"
|
82
|
+
}
|
83
|
+
end
|
84
|
+
|
85
|
+
def simple_types
|
86
|
+
{
|
87
|
+
"info:ofi/fmt:kev:mtx:journal" => "journalArticle",
|
88
|
+
"info:ofi/fmt:kev:mtx:dissertation" => "thesis",
|
89
|
+
"info:ofi/fmt:kev:mtx:patent" => "patent",
|
90
|
+
"info:ofi/fmt:kev:mtx:dc" => "webpage",
|
91
|
+
"info:ofi/fmt:kev:mtx:audio" => "audioRecording"
|
92
|
+
}
|
93
|
+
end
|
94
|
+
|
95
|
+
def key_mappings
|
96
|
+
{
|
97
|
+
'rft.type' => 'itemType',
|
98
|
+
'rft.description' => 'abstractNote',
|
99
|
+
'rft.rights' => 'rights',
|
100
|
+
'rft.language' => 'language',
|
101
|
+
'rft.subject' => 'tags',
|
102
|
+
'rft.source' => 'publicationTitle',
|
103
|
+
'rft.pub' => 'publisher',
|
104
|
+
'rft.publisher' => 'publisher',
|
105
|
+
'rft.place' => 'place',
|
106
|
+
'rft.edition' => 'edition',
|
107
|
+
'rft.series' => 'series',
|
108
|
+
'rft.volume' => 'volume',
|
109
|
+
'rft.issue' => 'issue',
|
110
|
+
'rft.inventor' => 'inventor',
|
111
|
+
'rft.contributor' => 'contributor',
|
112
|
+
'rft.aucorp' => 'author',
|
113
|
+
'rft.pages' => 'pages',
|
114
|
+
'rft.spage' => 'startPage',
|
115
|
+
'rft.epage' => 'endPage',
|
116
|
+
'rft.tpages' => 'numPage'
|
117
|
+
}
|
118
|
+
end
|
119
|
+
|
120
|
+
def map_one_to_one_fields
|
121
|
+
arr = []
|
122
|
+
key_mappings.each do |k,v|
|
123
|
+
arr << create_sub_hash(v,k)
|
124
|
+
end
|
125
|
+
arr.compact.reduce({}, :merge)
|
126
|
+
end
|
127
|
+
|
128
|
+
|
129
|
+
def rft_id
|
130
|
+
rft_id = params['rft_id'].first
|
131
|
+
return if rft_id.nil?
|
132
|
+
if rft_id.start_with? 'info:doi/' || rft_id.start_with?('urn.isbn/')
|
133
|
+
return { "isbn" => rft_id[9..-1] } unless rft_id[9..-1].empty?
|
134
|
+
end
|
135
|
+
return { "url" => rft_id, "accessDate" => "" } if rft_id.match /^https?:\/\/.*/
|
136
|
+
return {}
|
137
|
+
end
|
138
|
+
|
139
|
+
def rfr_id
|
140
|
+
return { "rfr_id" => params['rfr_id'].first } if params['rfr_id'].first
|
141
|
+
return {}
|
142
|
+
end
|
143
|
+
|
144
|
+
def create_formatted_sub_hash(key, value)
|
145
|
+
return nil unless key and value and !value.empty?
|
146
|
+
return { key => value }
|
147
|
+
end
|
148
|
+
|
149
|
+
def create_sub_hash(key,value)
|
150
|
+
create_formatted_sub_hash(key, params[value])
|
151
|
+
end
|
152
|
+
|
153
|
+
def btitle
|
154
|
+
key = 'title' if ['book','report'].include?(item_type)
|
155
|
+
key = 'publicationTitle' if ['bookSection','conferencePaper'].include?(item_type)
|
156
|
+
create_sub_hash(key, 'rft.btitle')
|
157
|
+
end
|
158
|
+
|
159
|
+
def atitle
|
160
|
+
key = 'title' if ['journalArticle','bookSection','conferencePaper'].include?(item_type)
|
161
|
+
create_sub_hash(key, 'rft.atitle')
|
162
|
+
end
|
163
|
+
|
164
|
+
def jtitle
|
165
|
+
key = 'publicationTitle' if ['journalArticle'].include?(item_type)
|
166
|
+
create_sub_hash(key, 'rft.jtitle')
|
167
|
+
end
|
168
|
+
|
169
|
+
def stitle
|
170
|
+
key = 'journalAbbreviation' if ['journalArticle'].include?(item_type)
|
171
|
+
create_sub_hash(key, 'rft.stitle')
|
172
|
+
end
|
173
|
+
|
174
|
+
def title
|
175
|
+
key = 'title'
|
176
|
+
key = 'publicationTitle' if ['journalArticle','bookSection','conferencePaper'].include? item_type
|
177
|
+
create_sub_hash(key, 'rft.title')
|
178
|
+
end
|
179
|
+
|
180
|
+
def date
|
181
|
+
key = "date"
|
182
|
+
key = "issueDate" if item_type.eql? "patent"
|
183
|
+
create_sub_hash(key , 'rft.date')
|
184
|
+
end
|
185
|
+
|
186
|
+
def issn
|
187
|
+
issn = [params['rft.issn'], params['rft.eissn']].flatten.compact.uniq.reject(&:empty?)
|
188
|
+
create_formatted_sub_hash("issn", issn)
|
189
|
+
end
|
190
|
+
|
191
|
+
def author
|
192
|
+
first_name = params['rft.aufirst'].first&.strip
|
193
|
+
last_name = params['rft.aulast'].first&.strip
|
194
|
+
|
195
|
+
name = "#{last_name}," if last_name
|
196
|
+
name = "#{name} #{first_name}".strip
|
197
|
+
|
198
|
+
name = nil if name.empty?
|
199
|
+
output_name = name || params['rft.au'] || params['rft.creator']
|
200
|
+
|
201
|
+
if first_name and last_name
|
202
|
+
return create_formatted_sub_hash("author", [output_name,output_name])
|
203
|
+
end
|
204
|
+
create_formatted_sub_hash("author", output_name)
|
205
|
+
end
|
206
|
+
|
207
|
+
def inventor
|
208
|
+
first_name = params['rft.invfirst'].first
|
209
|
+
last_name = params['rft.invlast'].first
|
210
|
+
name = Citero::Utils::NameFormatter.new("#{first_name} #{last_name}")
|
211
|
+
output_name = name.to_standardized || params['rft.inventor']
|
212
|
+
create_formatted_sub_hash("author", output_name)
|
213
|
+
end
|
214
|
+
|
215
|
+
def authors
|
216
|
+
authors = ['rft.au', 'rft.creator', 'rft.addau'].collect{|key| params[key]}.flatten.collect(&:to_s)
|
217
|
+
authors.reject!(&:empty?)
|
218
|
+
create_formatted_sub_hash('author', authors ) unless authors.empty?
|
219
|
+
end
|
220
|
+
|
221
|
+
def isbn
|
222
|
+
create_sub_hash("isbn", 'rft.isbn')
|
223
|
+
end
|
224
|
+
|
225
|
+
def publisher
|
226
|
+
publisher = [params['rft.pub'], params['rft.publisher']].flatten
|
227
|
+
create_formatted_sub_hash("publisher", publisher)
|
228
|
+
end
|
229
|
+
|
230
|
+
|
231
|
+
def thesis_elements
|
232
|
+
hash = []
|
233
|
+
hash << create_sub_hash("publisher", "rft.inst")
|
234
|
+
hash << create_sub_hash("type", "rft.degree")
|
235
|
+
merged = hash.compact.reduce({}, :merge)
|
236
|
+
return nil if merged.empty?
|
237
|
+
merged
|
238
|
+
end
|
239
|
+
|
240
|
+
def patent_elements
|
241
|
+
hash = []
|
242
|
+
hash << create_sub_hash("assignee", "rft.assignee")
|
243
|
+
hash << create_sub_hash("patentNumber", "rft.number")
|
244
|
+
hash << create_sub_hash("date", "rft.appldate")
|
245
|
+
merged = hash.compact.reduce({}, :merge)
|
246
|
+
return nil if merged.empty?
|
247
|
+
merged
|
248
|
+
end
|
249
|
+
|
250
|
+
def webpage_elements
|
251
|
+
hash = []
|
252
|
+
hash << create_sub_hash("abstractNote", "rft.description")
|
253
|
+
hash << create_sub_hash("rights", "rft.rights")
|
254
|
+
hash << create_sub_hash("language", "rft.language")
|
255
|
+
hash << create_sub_hash("tags", "rft.subject")
|
256
|
+
hash << create_sub_hash("itemType", "rft.type")
|
257
|
+
hash << create_sub_hash("publicationTitle", "rft.source")
|
258
|
+
unless params["rft.identifier"].empty?
|
259
|
+
identifier = params["rft.identifier"].first
|
260
|
+
hash << create_formatted_sub_hash("isbn", (identifier - 'isbn').strip) if identifier.start_with? 'isbn'
|
261
|
+
hash << create_formatted_sub_hash("issn", (identifier - 'issn').strip) if identifier.start_with? 'issn'
|
262
|
+
hash << create_formatted_sub_hash("doi", (identifier - 'urn:doi:').strip) if identifier.start_with? 'urn:doi:'
|
263
|
+
hash << create_formatted_sub_hash("url", identifier.strip) if identifier.match /^https?:\/\/.*/
|
264
|
+
end
|
265
|
+
merged = hash.compact.reduce({}, :merge)
|
266
|
+
return nil if merged.empty?
|
267
|
+
merged
|
268
|
+
end
|
269
|
+
|
270
|
+
end
|
271
|
+
end
|
272
|
+
end
|