citero 1.0.0.alpha

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: a420fcb5e2a2984a8075c248070be78ba1523017
4
+ data.tar.gz: 8728f7f6805f7713e84e4586606a31f7c5af7bb9
5
+ SHA512:
6
+ metadata.gz: 0c5fd7a6553dd5be250cbf512f5f9fca8e5ac5c6c60d184797913992d9cc4cdb49fba005bdc1b25f37de2aae93f43391544d1abbec917a8c416ce4190ccff599
7
+ data.tar.gz: b8663ea856531cc639cda15902992d0caeb7b2a6855be5595a72607a939d5ba21fad9909677ec4fcf047dbccc0f293624713151768d3539cb387af516f2515f2
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
@@ -0,0 +1,27 @@
1
+ Citero-ruby
2
+ ==========
3
+ Ruby repository for the [Citero](https://github.com/NYULibraries/citero) project.
4
+
5
+ Citero is a program that allows for mapping of data inputs from various systems into one normalized metadata schema
6
+ tentatively known as *Citero Standard Form*, or *CSF*. From the normalized schema, *CSF*, it can produce another output
7
+ format for use by another system.
8
+
9
+ Citero-ruby is a complete rewrite of the Java project in Ruby.
10
+
11
+ Currently Supported Formats/Systems
12
+ ===================================
13
+
14
+ How to install
15
+ ==============
16
+
17
+ How to run
18
+ ==========
19
+
20
+ API Considerations
21
+ ==========
22
+
23
+ Exceptions
24
+ ==========
25
+
26
+ CSF
27
+ =====
@@ -0,0 +1,7 @@
1
+ begin
2
+ require 'rspec/core/rake_task'
3
+ require 'bundler/gem_tasks'
4
+ RSpec::Core::RakeTask.new(:spec)
5
+ task :default => :spec
6
+ rescue LoadError
7
+ end
@@ -0,0 +1,123 @@
1
+ module Citero
2
+ require_relative 'citero/version'
3
+ require_relative 'citero/csf'
4
+ require_relative 'citero/inputs'
5
+ require_relative 'citero/outputs'
6
+ require_relative 'citero/utils'
7
+
8
+ def self.from_formats
9
+ [:csf, :openurl, :pnx]
10
+ end
11
+
12
+ def self.to_formats
13
+ [:csf, :ris, :openurl, :bibtex, :easybib, :refworks_tagged]
14
+ end
15
+
16
+ def self.citation_styles
17
+ []
18
+ end
19
+
20
+ def self.map(input)
21
+ @input = input
22
+ self
23
+ end
24
+
25
+ def self.from_(format)
26
+ @from_format = format.to_sym
27
+ self
28
+ end
29
+
30
+ def self.csf
31
+ return nil unless @from_format
32
+ case @from_format
33
+ when :csf
34
+ data = @input
35
+ data = csf_string_to_hash(data) if data.kind_of?(String)
36
+ from = Citero::CSF.new(data)
37
+ when :openurl
38
+ from = Citero::Inputs::OpenUrl.new(@input)
39
+ when :pnx
40
+ from = Citero::Inputs::Pnx.new(@input)
41
+ else
42
+ raise ArgumentError
43
+ end
44
+ return from.csf
45
+ end
46
+
47
+ def self.to_(format)
48
+ @to_format = format.to_sym
49
+
50
+ case @from_format
51
+ when :csf
52
+ data = @input
53
+ data = csf_string_to_hash(data) if data.kind_of?(String)
54
+ from = Citero::CSF.new(data)
55
+ when :openurl
56
+ from = Citero::Inputs::OpenUrl.new(@input)
57
+ when :pnx
58
+ from = Citero::Inputs::Pnx.new(@input)
59
+ else
60
+ raise ArgumentError
61
+ end
62
+
63
+ case @to_format
64
+ when :ris
65
+ return Citero::Outputs::Ris.new(from).to_ris
66
+ when :openurl
67
+ return Citero::Outputs::OpenUrl.new(from).to_openurl
68
+ when :bibtex
69
+ return Citero::Outputs::Bibtex.new(from).to_bibtex
70
+ when :easybib
71
+ return Citero::Outputs::EasyBib.new(from).to_easybib
72
+ when :refworks_tagged
73
+ return Citero::Outputs::RefworksTagged.new(from).to_refworks_tagged
74
+ when :csf
75
+ str = ""
76
+ from.csf.each do |k,v|
77
+ if v.kind_of?(Array)
78
+ v.each do |va|
79
+ str = "#{str}#{k}: #{va.gsub('.','\.').gsub(',','\,')}\n"
80
+ end
81
+ else
82
+ str = "#{str}#{k}: #{v.gsub('.','\.').gsub(',','\,')}\n"
83
+ end
84
+ end
85
+ return str.chomp
86
+ end
87
+ end
88
+
89
+ private
90
+
91
+ def self.csf_string_to_hash(string)
92
+ hash = {}
93
+ string.lines.map(&:strip).each do |line|
94
+ k,v = line.split(':',2).map(&:strip)
95
+ hash[k] = [hash[k],v].compact.flatten
96
+ end
97
+ hash
98
+ end
99
+
100
+ def self.method_missing(method_sym, *arguments, &block)
101
+ super unless respond_to?(method_sym, *arguments, &block)
102
+ method_str = method_sym.to_s
103
+ if (method_str.include? "to_")
104
+ to_(method_str.split('_',2).last)
105
+ elsif (method_str.include? "from_")
106
+ from_(method_str.split('_',2).last)
107
+ else
108
+ super
109
+ end
110
+ end
111
+
112
+ def self.respond_to?(method_sym, *arguments, &block)
113
+ method_arr = method_sym.to_s.split('_',2)
114
+ return super unless method_arr.size > 1
115
+ if method_arr.first.eql?("from")
116
+ return self.from_formats.include?(method_arr.last.to_sym)
117
+ end
118
+ if method_arr.first.eql?("to")
119
+ return self.to_formats.include?(method_arr.last.to_sym)
120
+ end
121
+ super
122
+ end
123
+ end
@@ -0,0 +1,31 @@
1
+ module Citero
2
+ class CSF
3
+ extend Forwardable
4
+ def_delegators :@data, :[], :[]=, :size, :each, :inspect, :to_s
5
+ attr_reader :data
6
+ alias_method :csf, :data
7
+
8
+ def initialize(hash = nil)
9
+ @data = Hash.new
10
+ load_from_hash(hash) unless hash.nil?
11
+ end
12
+
13
+ def load_from_hash(hash)
14
+ hash.each_pair do |key,value|
15
+ next if value.nil?
16
+ self.send(:[]=, key, value)
17
+ end
18
+ end
19
+
20
+ def []=(key,value)
21
+ @data[key] = element_or_list(@data[key], value)
22
+ end
23
+
24
+ private
25
+ def element_or_list(new_value, old_value)
26
+ temp_arr = [new_value, old_value].flatten.compact
27
+ return temp_arr.first if temp_arr.size == 1
28
+ return temp_arr
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,7 @@
1
+ module Citero
2
+ module Inputs
3
+ require_relative 'inputs/readers'
4
+ require_relative 'inputs/pnx'
5
+ require_relative 'inputs/openurl'
6
+ end
7
+ end
@@ -0,0 +1,272 @@
1
+ module Citero
2
+ module Inputs
3
+ class OpenUrl
4
+ require 'open-uri'
5
+ require 'cgi'
6
+
7
+ attr_reader :csf, :params
8
+
9
+ def initialize(raw_data)
10
+ @raw_data = raw_data
11
+ construct_csf
12
+ end
13
+
14
+ private
15
+
16
+ def construct_csf
17
+ @csf = CSF.new
18
+ url = @raw_data
19
+ @params = CGI.parse(CGI::unescape(URI.parse(url).query))
20
+ remove_blanks_from_params
21
+
22
+ hash = [
23
+ {"itemType" => item_type},
24
+ rft_id,
25
+ map_one_to_one_fields,
26
+ isbn,
27
+ # rfr_id,
28
+ btitle,
29
+ atitle,
30
+ jtitle,
31
+ stitle,
32
+ title,
33
+ date,
34
+ author,
35
+ issn,
36
+ inventor,
37
+ authors,
38
+ publisher,
39
+ thesis_elements,
40
+ patent_elements,
41
+ webpage_elements,
42
+ {"importedFrom" => "OpenURL"}
43
+ ].compact.reduce({}, :merge)
44
+
45
+ @csf.load_from_hash(hash)
46
+ end
47
+
48
+ def remove_blanks_from_params
49
+ @params.each do |key,values|
50
+ values = values.uniq.compact.reject(&:empty?)
51
+ if values.empty?
52
+ @params.delete(key)
53
+ else
54
+ @params[key] = values
55
+ end
56
+ end
57
+ end
58
+
59
+
60
+ def get_format(openurl_format)
61
+ return simple_types[openurl_format] if simple_types.has_key?(openurl_format)
62
+ return get_book_format(openurl_format)
63
+ end
64
+
65
+ def get_book_format(openurl_format)
66
+ return 'document' unless openurl_format.eql?('info:ofi/fmt:kev:mtx:book')
67
+ return book_type[params['rft.genre']] if book_type.has_key?(params['rft.genre'])
68
+ return 'book'
69
+ end
70
+
71
+ def item_type
72
+ get_format(params["rft_val_fmt"].first)
73
+ end
74
+
75
+ def book_type
76
+ {
77
+ "bookitem" => "bookSection",
78
+ "conference" => "conferencePaper",
79
+ "proceeding" => "conferencePaper",
80
+ "report" => "report",
81
+ "document" => "document"
82
+ }
83
+ end
84
+
85
+ def simple_types
86
+ {
87
+ "info:ofi/fmt:kev:mtx:journal" => "journalArticle",
88
+ "info:ofi/fmt:kev:mtx:dissertation" => "thesis",
89
+ "info:ofi/fmt:kev:mtx:patent" => "patent",
90
+ "info:ofi/fmt:kev:mtx:dc" => "webpage",
91
+ "info:ofi/fmt:kev:mtx:audio" => "audioRecording"
92
+ }
93
+ end
94
+
95
+ def key_mappings
96
+ {
97
+ 'rft.type' => 'itemType',
98
+ 'rft.description' => 'abstractNote',
99
+ 'rft.rights' => 'rights',
100
+ 'rft.language' => 'language',
101
+ 'rft.subject' => 'tags',
102
+ 'rft.source' => 'publicationTitle',
103
+ 'rft.pub' => 'publisher',
104
+ 'rft.publisher' => 'publisher',
105
+ 'rft.place' => 'place',
106
+ 'rft.edition' => 'edition',
107
+ 'rft.series' => 'series',
108
+ 'rft.volume' => 'volume',
109
+ 'rft.issue' => 'issue',
110
+ 'rft.inventor' => 'inventor',
111
+ 'rft.contributor' => 'contributor',
112
+ 'rft.aucorp' => 'author',
113
+ 'rft.pages' => 'pages',
114
+ 'rft.spage' => 'startPage',
115
+ 'rft.epage' => 'endPage',
116
+ 'rft.tpages' => 'numPage'
117
+ }
118
+ end
119
+
120
+ def map_one_to_one_fields
121
+ arr = []
122
+ key_mappings.each do |k,v|
123
+ arr << create_sub_hash(v,k)
124
+ end
125
+ arr.compact.reduce({}, :merge)
126
+ end
127
+
128
+
129
+ def rft_id
130
+ rft_id = params['rft_id'].first
131
+ return if rft_id.nil?
132
+ if rft_id.start_with? 'info:doi/' || rft_id.start_with?('urn.isbn/')
133
+ return { "isbn" => rft_id[9..-1] } unless rft_id[9..-1].empty?
134
+ end
135
+ return { "url" => rft_id, "accessDate" => "" } if rft_id.match /^https?:\/\/.*/
136
+ return {}
137
+ end
138
+
139
+ def rfr_id
140
+ return { "rfr_id" => params['rfr_id'].first } if params['rfr_id'].first
141
+ return {}
142
+ end
143
+
144
+ def create_formatted_sub_hash(key, value)
145
+ return nil unless key and value and !value.empty?
146
+ return { key => value }
147
+ end
148
+
149
+ def create_sub_hash(key,value)
150
+ create_formatted_sub_hash(key, params[value])
151
+ end
152
+
153
+ def btitle
154
+ key = 'title' if ['book','report'].include?(item_type)
155
+ key = 'publicationTitle' if ['bookSection','conferencePaper'].include?(item_type)
156
+ create_sub_hash(key, 'rft.btitle')
157
+ end
158
+
159
+ def atitle
160
+ key = 'title' if ['journalArticle','bookSection','conferencePaper'].include?(item_type)
161
+ create_sub_hash(key, 'rft.atitle')
162
+ end
163
+
164
+ def jtitle
165
+ key = 'publicationTitle' if ['journalArticle'].include?(item_type)
166
+ create_sub_hash(key, 'rft.jtitle')
167
+ end
168
+
169
+ def stitle
170
+ key = 'journalAbbreviation' if ['journalArticle'].include?(item_type)
171
+ create_sub_hash(key, 'rft.stitle')
172
+ end
173
+
174
+ def title
175
+ key = 'title'
176
+ key = 'publicationTitle' if ['journalArticle','bookSection','conferencePaper'].include? item_type
177
+ create_sub_hash(key, 'rft.title')
178
+ end
179
+
180
+ def date
181
+ key = "date"
182
+ key = "issueDate" if item_type.eql? "patent"
183
+ create_sub_hash(key , 'rft.date')
184
+ end
185
+
186
+ def issn
187
+ issn = [params['rft.issn'], params['rft.eissn']].flatten.compact.uniq.reject(&:empty?)
188
+ create_formatted_sub_hash("issn", issn)
189
+ end
190
+
191
+ def author
192
+ first_name = params['rft.aufirst'].first&.strip
193
+ last_name = params['rft.aulast'].first&.strip
194
+
195
+ name = "#{last_name}," if last_name
196
+ name = "#{name} #{first_name}".strip
197
+
198
+ name = nil if name.empty?
199
+ output_name = name || params['rft.au'] || params['rft.creator']
200
+
201
+ if first_name and last_name
202
+ return create_formatted_sub_hash("author", [output_name,output_name])
203
+ end
204
+ create_formatted_sub_hash("author", output_name)
205
+ end
206
+
207
+ def inventor
208
+ first_name = params['rft.invfirst'].first
209
+ last_name = params['rft.invlast'].first
210
+ name = Citero::Utils::NameFormatter.new("#{first_name} #{last_name}")
211
+ output_name = name.to_standardized || params['rft.inventor']
212
+ create_formatted_sub_hash("author", output_name)
213
+ end
214
+
215
+ def authors
216
+ authors = ['rft.au', 'rft.creator', 'rft.addau'].collect{|key| params[key]}.flatten.collect(&:to_s)
217
+ authors.reject!(&:empty?)
218
+ create_formatted_sub_hash('author', authors ) unless authors.empty?
219
+ end
220
+
221
+ def isbn
222
+ create_sub_hash("isbn", 'rft.isbn')
223
+ end
224
+
225
+ def publisher
226
+ publisher = [params['rft.pub'], params['rft.publisher']].flatten
227
+ create_formatted_sub_hash("publisher", publisher)
228
+ end
229
+
230
+
231
+ def thesis_elements
232
+ hash = []
233
+ hash << create_sub_hash("publisher", "rft.inst")
234
+ hash << create_sub_hash("type", "rft.degree")
235
+ merged = hash.compact.reduce({}, :merge)
236
+ return nil if merged.empty?
237
+ merged
238
+ end
239
+
240
+ def patent_elements
241
+ hash = []
242
+ hash << create_sub_hash("assignee", "rft.assignee")
243
+ hash << create_sub_hash("patentNumber", "rft.number")
244
+ hash << create_sub_hash("date", "rft.appldate")
245
+ merged = hash.compact.reduce({}, :merge)
246
+ return nil if merged.empty?
247
+ merged
248
+ end
249
+
250
+ def webpage_elements
251
+ hash = []
252
+ hash << create_sub_hash("abstractNote", "rft.description")
253
+ hash << create_sub_hash("rights", "rft.rights")
254
+ hash << create_sub_hash("language", "rft.language")
255
+ hash << create_sub_hash("tags", "rft.subject")
256
+ hash << create_sub_hash("itemType", "rft.type")
257
+ hash << create_sub_hash("publicationTitle", "rft.source")
258
+ unless params["rft.identifier"].empty?
259
+ identifier = params["rft.identifier"].first
260
+ hash << create_formatted_sub_hash("isbn", (identifier - 'isbn').strip) if identifier.start_with? 'isbn'
261
+ hash << create_formatted_sub_hash("issn", (identifier - 'issn').strip) if identifier.start_with? 'issn'
262
+ hash << create_formatted_sub_hash("doi", (identifier - 'urn:doi:').strip) if identifier.start_with? 'urn:doi:'
263
+ hash << create_formatted_sub_hash("url", identifier.strip) if identifier.match /^https?:\/\/.*/
264
+ end
265
+ merged = hash.compact.reduce({}, :merge)
266
+ return nil if merged.empty?
267
+ merged
268
+ end
269
+
270
+ end
271
+ end
272
+ end