citero 1.0.0.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: a420fcb5e2a2984a8075c248070be78ba1523017
4
+ data.tar.gz: 8728f7f6805f7713e84e4586606a31f7c5af7bb9
5
+ SHA512:
6
+ metadata.gz: 0c5fd7a6553dd5be250cbf512f5f9fca8e5ac5c6c60d184797913992d9cc4cdb49fba005bdc1b25f37de2aae93f43391544d1abbec917a8c416ce4190ccff599
7
+ data.tar.gz: b8663ea856531cc639cda15902992d0caeb7b2a6855be5595a72607a939d5ba21fad9909677ec4fcf047dbccc0f293624713151768d3539cb387af516f2515f2
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
@@ -0,0 +1,27 @@
1
+ Citero-ruby
2
+ ==========
3
+ Ruby repository for the [Citero](https://github.com/NYULibraries/citero) project.
4
+
5
+ Citero is a program that allows for mapping of data inputs from various systems into one normalized metadata schema
6
+ tentatively known as *Citero Standard Form*, or *CSF*. From the normalized schema, *CSF*, it can produce another output
7
+ format for use by another system.
8
+
9
+ Citero-ruby is a complete rewrite of the Java project in Ruby.
10
+
11
+ Currently Supported Formats/Systems
12
+ ===================================
13
+
14
+ How to install
15
+ ==============
16
+
17
+ How to run
18
+ ==========
19
+
20
+ API Considerations
21
+ ==========
22
+
23
+ Exceptions
24
+ ==========
25
+
26
+ CSF
27
+ =====
@@ -0,0 +1,7 @@
1
+ begin
2
+ require 'rspec/core/rake_task'
3
+ require 'bundler/gem_tasks'
4
+ RSpec::Core::RakeTask.new(:spec)
5
+ task :default => :spec
6
+ rescue LoadError
7
+ end
@@ -0,0 +1,123 @@
1
+ module Citero
2
+ require_relative 'citero/version'
3
+ require_relative 'citero/csf'
4
+ require_relative 'citero/inputs'
5
+ require_relative 'citero/outputs'
6
+ require_relative 'citero/utils'
7
+
8
+ def self.from_formats
9
+ [:csf, :openurl, :pnx]
10
+ end
11
+
12
+ def self.to_formats
13
+ [:csf, :ris, :openurl, :bibtex, :easybib, :refworks_tagged]
14
+ end
15
+
16
+ def self.citation_styles
17
+ []
18
+ end
19
+
20
+ def self.map(input)
21
+ @input = input
22
+ self
23
+ end
24
+
25
+ def self.from_(format)
26
+ @from_format = format.to_sym
27
+ self
28
+ end
29
+
30
+ def self.csf
31
+ return nil unless @from_format
32
+ case @from_format
33
+ when :csf
34
+ data = @input
35
+ data = csf_string_to_hash(data) if data.kind_of?(String)
36
+ from = Citero::CSF.new(data)
37
+ when :openurl
38
+ from = Citero::Inputs::OpenUrl.new(@input)
39
+ when :pnx
40
+ from = Citero::Inputs::Pnx.new(@input)
41
+ else
42
+ raise ArgumentError
43
+ end
44
+ return from.csf
45
+ end
46
+
47
+ def self.to_(format)
48
+ @to_format = format.to_sym
49
+
50
+ case @from_format
51
+ when :csf
52
+ data = @input
53
+ data = csf_string_to_hash(data) if data.kind_of?(String)
54
+ from = Citero::CSF.new(data)
55
+ when :openurl
56
+ from = Citero::Inputs::OpenUrl.new(@input)
57
+ when :pnx
58
+ from = Citero::Inputs::Pnx.new(@input)
59
+ else
60
+ raise ArgumentError
61
+ end
62
+
63
+ case @to_format
64
+ when :ris
65
+ return Citero::Outputs::Ris.new(from).to_ris
66
+ when :openurl
67
+ return Citero::Outputs::OpenUrl.new(from).to_openurl
68
+ when :bibtex
69
+ return Citero::Outputs::Bibtex.new(from).to_bibtex
70
+ when :easybib
71
+ return Citero::Outputs::EasyBib.new(from).to_easybib
72
+ when :refworks_tagged
73
+ return Citero::Outputs::RefworksTagged.new(from).to_refworks_tagged
74
+ when :csf
75
+ str = ""
76
+ from.csf.each do |k,v|
77
+ if v.kind_of?(Array)
78
+ v.each do |va|
79
+ str = "#{str}#{k}: #{va.gsub('.','\.').gsub(',','\,')}\n"
80
+ end
81
+ else
82
+ str = "#{str}#{k}: #{v.gsub('.','\.').gsub(',','\,')}\n"
83
+ end
84
+ end
85
+ return str.chomp
86
+ end
87
+ end
88
+
89
+ private
90
+
91
+ def self.csf_string_to_hash(string)
92
+ hash = {}
93
+ string.lines.map(&:strip).each do |line|
94
+ k,v = line.split(':',2).map(&:strip)
95
+ hash[k] = [hash[k],v].compact.flatten
96
+ end
97
+ hash
98
+ end
99
+
100
+ def self.method_missing(method_sym, *arguments, &block)
101
+ super unless respond_to?(method_sym, *arguments, &block)
102
+ method_str = method_sym.to_s
103
+ if (method_str.include? "to_")
104
+ to_(method_str.split('_',2).last)
105
+ elsif (method_str.include? "from_")
106
+ from_(method_str.split('_',2).last)
107
+ else
108
+ super
109
+ end
110
+ end
111
+
112
+ def self.respond_to?(method_sym, *arguments, &block)
113
+ method_arr = method_sym.to_s.split('_',2)
114
+ return super unless method_arr.size > 1
115
+ if method_arr.first.eql?("from")
116
+ return self.from_formats.include?(method_arr.last.to_sym)
117
+ end
118
+ if method_arr.first.eql?("to")
119
+ return self.to_formats.include?(method_arr.last.to_sym)
120
+ end
121
+ super
122
+ end
123
+ end
@@ -0,0 +1,31 @@
1
+ module Citero
2
+ class CSF
3
+ extend Forwardable
4
+ def_delegators :@data, :[], :[]=, :size, :each, :inspect, :to_s
5
+ attr_reader :data
6
+ alias_method :csf, :data
7
+
8
+ def initialize(hash = nil)
9
+ @data = Hash.new
10
+ load_from_hash(hash) unless hash.nil?
11
+ end
12
+
13
+ def load_from_hash(hash)
14
+ hash.each_pair do |key,value|
15
+ next if value.nil?
16
+ self.send(:[]=, key, value)
17
+ end
18
+ end
19
+
20
+ def []=(key,value)
21
+ @data[key] = element_or_list(@data[key], value)
22
+ end
23
+
24
+ private
25
+ def element_or_list(new_value, old_value)
26
+ temp_arr = [new_value, old_value].flatten.compact
27
+ return temp_arr.first if temp_arr.size == 1
28
+ return temp_arr
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,7 @@
1
+ module Citero
2
+ module Inputs
3
+ require_relative 'inputs/readers'
4
+ require_relative 'inputs/pnx'
5
+ require_relative 'inputs/openurl'
6
+ end
7
+ end
@@ -0,0 +1,272 @@
1
+ module Citero
2
+ module Inputs
3
+ class OpenUrl
4
+ require 'open-uri'
5
+ require 'cgi'
6
+
7
+ attr_reader :csf, :params
8
+
9
+ def initialize(raw_data)
10
+ @raw_data = raw_data
11
+ construct_csf
12
+ end
13
+
14
+ private
15
+
16
+ def construct_csf
17
+ @csf = CSF.new
18
+ url = @raw_data
19
+ @params = CGI.parse(CGI::unescape(URI.parse(url).query))
20
+ remove_blanks_from_params
21
+
22
+ hash = [
23
+ {"itemType" => item_type},
24
+ rft_id,
25
+ map_one_to_one_fields,
26
+ isbn,
27
+ # rfr_id,
28
+ btitle,
29
+ atitle,
30
+ jtitle,
31
+ stitle,
32
+ title,
33
+ date,
34
+ author,
35
+ issn,
36
+ inventor,
37
+ authors,
38
+ publisher,
39
+ thesis_elements,
40
+ patent_elements,
41
+ webpage_elements,
42
+ {"importedFrom" => "OpenURL"}
43
+ ].compact.reduce({}, :merge)
44
+
45
+ @csf.load_from_hash(hash)
46
+ end
47
+
48
+ def remove_blanks_from_params
49
+ @params.each do |key,values|
50
+ values = values.uniq.compact.reject(&:empty?)
51
+ if values.empty?
52
+ @params.delete(key)
53
+ else
54
+ @params[key] = values
55
+ end
56
+ end
57
+ end
58
+
59
+
60
+ def get_format(openurl_format)
61
+ return simple_types[openurl_format] if simple_types.has_key?(openurl_format)
62
+ return get_book_format(openurl_format)
63
+ end
64
+
65
+ def get_book_format(openurl_format)
66
+ return 'document' unless openurl_format.eql?('info:ofi/fmt:kev:mtx:book')
67
+ return book_type[params['rft.genre']] if book_type.has_key?(params['rft.genre'])
68
+ return 'book'
69
+ end
70
+
71
+ def item_type
72
+ get_format(params["rft_val_fmt"].first)
73
+ end
74
+
75
+ def book_type
76
+ {
77
+ "bookitem" => "bookSection",
78
+ "conference" => "conferencePaper",
79
+ "proceeding" => "conferencePaper",
80
+ "report" => "report",
81
+ "document" => "document"
82
+ }
83
+ end
84
+
85
+ def simple_types
86
+ {
87
+ "info:ofi/fmt:kev:mtx:journal" => "journalArticle",
88
+ "info:ofi/fmt:kev:mtx:dissertation" => "thesis",
89
+ "info:ofi/fmt:kev:mtx:patent" => "patent",
90
+ "info:ofi/fmt:kev:mtx:dc" => "webpage",
91
+ "info:ofi/fmt:kev:mtx:audio" => "audioRecording"
92
+ }
93
+ end
94
+
95
+ def key_mappings
96
+ {
97
+ 'rft.type' => 'itemType',
98
+ 'rft.description' => 'abstractNote',
99
+ 'rft.rights' => 'rights',
100
+ 'rft.language' => 'language',
101
+ 'rft.subject' => 'tags',
102
+ 'rft.source' => 'publicationTitle',
103
+ 'rft.pub' => 'publisher',
104
+ 'rft.publisher' => 'publisher',
105
+ 'rft.place' => 'place',
106
+ 'rft.edition' => 'edition',
107
+ 'rft.series' => 'series',
108
+ 'rft.volume' => 'volume',
109
+ 'rft.issue' => 'issue',
110
+ 'rft.inventor' => 'inventor',
111
+ 'rft.contributor' => 'contributor',
112
+ 'rft.aucorp' => 'author',
113
+ 'rft.pages' => 'pages',
114
+ 'rft.spage' => 'startPage',
115
+ 'rft.epage' => 'endPage',
116
+ 'rft.tpages' => 'numPage'
117
+ }
118
+ end
119
+
120
+ def map_one_to_one_fields
121
+ arr = []
122
+ key_mappings.each do |k,v|
123
+ arr << create_sub_hash(v,k)
124
+ end
125
+ arr.compact.reduce({}, :merge)
126
+ end
127
+
128
+
129
+ def rft_id
130
+ rft_id = params['rft_id'].first
131
+ return if rft_id.nil?
132
+ if rft_id.start_with? 'info:doi/' || rft_id.start_with?('urn.isbn/')
133
+ return { "isbn" => rft_id[9..-1] } unless rft_id[9..-1].empty?
134
+ end
135
+ return { "url" => rft_id, "accessDate" => "" } if rft_id.match /^https?:\/\/.*/
136
+ return {}
137
+ end
138
+
139
+ def rfr_id
140
+ return { "rfr_id" => params['rfr_id'].first } if params['rfr_id'].first
141
+ return {}
142
+ end
143
+
144
+ def create_formatted_sub_hash(key, value)
145
+ return nil unless key and value and !value.empty?
146
+ return { key => value }
147
+ end
148
+
149
+ def create_sub_hash(key,value)
150
+ create_formatted_sub_hash(key, params[value])
151
+ end
152
+
153
+ def btitle
154
+ key = 'title' if ['book','report'].include?(item_type)
155
+ key = 'publicationTitle' if ['bookSection','conferencePaper'].include?(item_type)
156
+ create_sub_hash(key, 'rft.btitle')
157
+ end
158
+
159
+ def atitle
160
+ key = 'title' if ['journalArticle','bookSection','conferencePaper'].include?(item_type)
161
+ create_sub_hash(key, 'rft.atitle')
162
+ end
163
+
164
+ def jtitle
165
+ key = 'publicationTitle' if ['journalArticle'].include?(item_type)
166
+ create_sub_hash(key, 'rft.jtitle')
167
+ end
168
+
169
+ def stitle
170
+ key = 'journalAbbreviation' if ['journalArticle'].include?(item_type)
171
+ create_sub_hash(key, 'rft.stitle')
172
+ end
173
+
174
+ def title
175
+ key = 'title'
176
+ key = 'publicationTitle' if ['journalArticle','bookSection','conferencePaper'].include? item_type
177
+ create_sub_hash(key, 'rft.title')
178
+ end
179
+
180
+ def date
181
+ key = "date"
182
+ key = "issueDate" if item_type.eql? "patent"
183
+ create_sub_hash(key , 'rft.date')
184
+ end
185
+
186
+ def issn
187
+ issn = [params['rft.issn'], params['rft.eissn']].flatten.compact.uniq.reject(&:empty?)
188
+ create_formatted_sub_hash("issn", issn)
189
+ end
190
+
191
+ def author
192
+ first_name = params['rft.aufirst'].first&.strip
193
+ last_name = params['rft.aulast'].first&.strip
194
+
195
+ name = "#{last_name}," if last_name
196
+ name = "#{name} #{first_name}".strip
197
+
198
+ name = nil if name.empty?
199
+ output_name = name || params['rft.au'] || params['rft.creator']
200
+
201
+ if first_name and last_name
202
+ return create_formatted_sub_hash("author", [output_name,output_name])
203
+ end
204
+ create_formatted_sub_hash("author", output_name)
205
+ end
206
+
207
+ def inventor
208
+ first_name = params['rft.invfirst'].first
209
+ last_name = params['rft.invlast'].first
210
+ name = Citero::Utils::NameFormatter.new("#{first_name} #{last_name}")
211
+ output_name = name.to_standardized || params['rft.inventor']
212
+ create_formatted_sub_hash("author", output_name)
213
+ end
214
+
215
+ def authors
216
+ authors = ['rft.au', 'rft.creator', 'rft.addau'].collect{|key| params[key]}.flatten.collect(&:to_s)
217
+ authors.reject!(&:empty?)
218
+ create_formatted_sub_hash('author', authors ) unless authors.empty?
219
+ end
220
+
221
+ def isbn
222
+ create_sub_hash("isbn", 'rft.isbn')
223
+ end
224
+
225
+ def publisher
226
+ publisher = [params['rft.pub'], params['rft.publisher']].flatten
227
+ create_formatted_sub_hash("publisher", publisher)
228
+ end
229
+
230
+
231
+ def thesis_elements
232
+ hash = []
233
+ hash << create_sub_hash("publisher", "rft.inst")
234
+ hash << create_sub_hash("type", "rft.degree")
235
+ merged = hash.compact.reduce({}, :merge)
236
+ return nil if merged.empty?
237
+ merged
238
+ end
239
+
240
+ def patent_elements
241
+ hash = []
242
+ hash << create_sub_hash("assignee", "rft.assignee")
243
+ hash << create_sub_hash("patentNumber", "rft.number")
244
+ hash << create_sub_hash("date", "rft.appldate")
245
+ merged = hash.compact.reduce({}, :merge)
246
+ return nil if merged.empty?
247
+ merged
248
+ end
249
+
250
+ def webpage_elements
251
+ hash = []
252
+ hash << create_sub_hash("abstractNote", "rft.description")
253
+ hash << create_sub_hash("rights", "rft.rights")
254
+ hash << create_sub_hash("language", "rft.language")
255
+ hash << create_sub_hash("tags", "rft.subject")
256
+ hash << create_sub_hash("itemType", "rft.type")
257
+ hash << create_sub_hash("publicationTitle", "rft.source")
258
+ unless params["rft.identifier"].empty?
259
+ identifier = params["rft.identifier"].first
260
+ hash << create_formatted_sub_hash("isbn", (identifier - 'isbn').strip) if identifier.start_with? 'isbn'
261
+ hash << create_formatted_sub_hash("issn", (identifier - 'issn').strip) if identifier.start_with? 'issn'
262
+ hash << create_formatted_sub_hash("doi", (identifier - 'urn:doi:').strip) if identifier.start_with? 'urn:doi:'
263
+ hash << create_formatted_sub_hash("url", identifier.strip) if identifier.match /^https?:\/\/.*/
264
+ end
265
+ merged = hash.compact.reduce({}, :merge)
266
+ return nil if merged.empty?
267
+ merged
268
+ end
269
+
270
+ end
271
+ end
272
+ end