citero 1.0.0.alpha
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +2 -0
- data/README.md +27 -0
- data/Rakefile +7 -0
- data/lib/citero.rb +123 -0
- data/lib/citero/csf.rb +31 -0
- data/lib/citero/inputs.rb +7 -0
- data/lib/citero/inputs/openurl.rb +272 -0
- data/lib/citero/inputs/pnx.rb +137 -0
- data/lib/citero/inputs/readers.rb +7 -0
- data/lib/citero/inputs/readers/pnx_reader.rb +151 -0
- data/lib/citero/outputs.rb +9 -0
- data/lib/citero/outputs/bibtex.rb +174 -0
- data/lib/citero/outputs/easybib.rb +203 -0
- data/lib/citero/outputs/openurl.rb +199 -0
- data/lib/citero/outputs/refworks_tagged.rb +52 -0
- data/lib/citero/outputs/ris.rb +209 -0
- data/lib/citero/utils.rb +5 -0
- data/lib/citero/utils/name_formatter.rb +56 -0
- data/lib/citero/version.rb +3 -0
- metadata +159 -0
@@ -0,0 +1,137 @@
|
|
1
|
+
module Citero
|
2
|
+
module Inputs
|
3
|
+
class Pnx
|
4
|
+
|
5
|
+
attr_reader :csf
|
6
|
+
|
7
|
+
def initialize(raw_data)
|
8
|
+
@pnx_reader = Citero::Inputs::Readers::PnxReader.new(raw_data)
|
9
|
+
construct_csf
|
10
|
+
@csf
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def construct_csf
|
16
|
+
return @csf unless @csf.nil?
|
17
|
+
@csf = CSF.new
|
18
|
+
@hash = {}
|
19
|
+
add_item_type
|
20
|
+
parse_and_add_creators
|
21
|
+
parse_and_add_publisher
|
22
|
+
pages
|
23
|
+
add_identifiers
|
24
|
+
add_all_other_fields
|
25
|
+
@hash['importedFrom'] = 'PNX'
|
26
|
+
@csf.load_from_hash(@hash)
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
def item_type_conversion_hash
|
31
|
+
@item_type_conversion_hash ||= {
|
32
|
+
"audio" => "audioRecording",
|
33
|
+
"video" => "videoRecording",
|
34
|
+
"article" => "journalArticle",
|
35
|
+
"books" => "book",
|
36
|
+
"book" => "book",
|
37
|
+
"report" => "report",
|
38
|
+
"webpage" => "webpage",
|
39
|
+
"journal" => "journal",
|
40
|
+
"map" => "map",
|
41
|
+
"thesis" => "thesis"
|
42
|
+
}
|
43
|
+
end
|
44
|
+
|
45
|
+
def get_item_type(raw_type)
|
46
|
+
return item_type_conversion_hash[raw_type.downcase] if item_type_conversion_hash.include? raw_type.downcase
|
47
|
+
return 'document'
|
48
|
+
end
|
49
|
+
|
50
|
+
def add_item_type
|
51
|
+
@hash["itemType"] = get_item_type(@pnx_reader.type || '')
|
52
|
+
end
|
53
|
+
|
54
|
+
def parse_and_add_creators
|
55
|
+
contributors = []
|
56
|
+
|
57
|
+
creators = @pnx_reader.creator || @pnx_reader.contributor
|
58
|
+
contributors = @pnx_reader.contributor if !@pnx_reader.creator.nil?
|
59
|
+
|
60
|
+
creators = @pnx_reader.addau if (@pnx_reader.creator.to_s.empty? && @pnx_reader.contributor.to_s.empty?)
|
61
|
+
add_creators(creators, "author")
|
62
|
+
add_creators(contributors, "contributor")
|
63
|
+
end
|
64
|
+
|
65
|
+
def add_creators(creators,creator_type)
|
66
|
+
if (creators && !creators.empty?)
|
67
|
+
creators.split(";").each do |name|
|
68
|
+
@hash[creator_type] = [@hash[creator_type], name.strip].flatten.compact
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def add_identifiers
|
74
|
+
if @pnx_reader.identifier?
|
75
|
+
identifiers = @pnx_reader.identifier.split(";")
|
76
|
+
identifiers.each do |id|
|
77
|
+
if(id.include? "isbn")
|
78
|
+
@hash['isbn'] = [@hash['isbn'], id.scan(/[0-9]+/).to_a.join].flatten.compact
|
79
|
+
else
|
80
|
+
@hash['issn'] = [@hash['issn'], id.scan(/[0-9]+/).to_a.join].flatten.compact
|
81
|
+
end
|
82
|
+
end
|
83
|
+
else
|
84
|
+
@hash['eissn'] = @pnx_reader.eissn unless @pnx_reader.eissn.empty?
|
85
|
+
@hash['issn'] = [@hash['issn'], @pnx_reader.issn].flatten.compact unless @pnx_reader.issn.empty?
|
86
|
+
@hash['isbn'] = [@hash['isbn'], @pnx_reader.isbn].flatten.compact unless @pnx_reader.isbn.empty?
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def parse_and_add_publisher
|
91
|
+
if (@pnx_reader.pub.empty? && @pnx_reader.cop.empty? && @pnx_reader.publisher)
|
92
|
+
if @pnx_reader.publisher.include? " : "
|
93
|
+
pub_place = @pnx_reader.publisher.split(" : ",2).map(&:strip)
|
94
|
+
add_publisher_and_place(nil, pub_place.first)
|
95
|
+
else
|
96
|
+
add_publisher_and_place(@pnx_reader.publisher)
|
97
|
+
end
|
98
|
+
else
|
99
|
+
add_publisher_and_place(@pnx_reader.pub, @pnx_reader.cop)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def add_publisher_and_place(publisher = nil, place = nil)
|
104
|
+
@hash['publisher'] = publisher if publisher
|
105
|
+
@hash['place'] = place if place
|
106
|
+
end
|
107
|
+
|
108
|
+
def pages
|
109
|
+
return unless @pnx_reader.pages
|
110
|
+
raw_pages = @pnx_reader.pages.gsub(/[\(\)\[\]]/, "").gsub(/\D/, " ").strip()
|
111
|
+
@hash['numPages'] = raw_pages.split(" ").first unless raw_pages.empty?
|
112
|
+
end
|
113
|
+
|
114
|
+
def qualified_method_names
|
115
|
+
@qualified_method_names ||= {
|
116
|
+
"title" => "title",
|
117
|
+
"publicationDate" => "publication_date",
|
118
|
+
"journalTitle" => "journal_title",
|
119
|
+
"date" => "date",
|
120
|
+
"language" => "language",
|
121
|
+
"edition" => "edition",
|
122
|
+
"tags" => "tags",
|
123
|
+
"callNumber" => "call_number",
|
124
|
+
"pnxRecordId" => "pnx_record_id",
|
125
|
+
"description" => "description",
|
126
|
+
"notes" => "notes"
|
127
|
+
}
|
128
|
+
end
|
129
|
+
|
130
|
+
def add_all_other_fields
|
131
|
+
qualified_method_names.each do |standard_form, method_name|
|
132
|
+
@hash[standard_form] = @pnx_reader.send(method_name.to_sym) if @pnx_reader.send("#{method_name}?".to_sym)
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
@@ -0,0 +1,151 @@
|
|
1
|
+
module Citero
|
2
|
+
module Inputs
|
3
|
+
module Readers
|
4
|
+
class PnxReader
|
5
|
+
require 'ox'
|
6
|
+
|
7
|
+
XML_DECLARATION_START = "<?xml"
|
8
|
+
XML_DECLARATION = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
|
9
|
+
|
10
|
+
def initialize(data)
|
11
|
+
Ox.default_options = Ox.default_options.merge({ skip: :skip_none })
|
12
|
+
parse_data = data
|
13
|
+
parse_data = "#{XML_DECLARATION}#{data}" unless data.start_with?(XML_DECLARATION_START)
|
14
|
+
@data = Ox.parse(parse_data)
|
15
|
+
end
|
16
|
+
|
17
|
+
def type
|
18
|
+
@type ||= get_value_from_pnx("record/display/type")
|
19
|
+
end
|
20
|
+
|
21
|
+
def publisher
|
22
|
+
@publisher = get_value_from_pnx("record/display/publisher")
|
23
|
+
end
|
24
|
+
|
25
|
+
def language
|
26
|
+
@language ||= get_value_from_pnx("record/display/language")
|
27
|
+
end
|
28
|
+
|
29
|
+
def edition
|
30
|
+
@edition ||= get_value_from_pnx("record/display/edition")
|
31
|
+
end
|
32
|
+
|
33
|
+
def pages
|
34
|
+
@pages ||= get_value_from_pnx("record/display/format")
|
35
|
+
end
|
36
|
+
|
37
|
+
def identifier
|
38
|
+
@identifier ||= get_value_from_pnx("record/display/identifier")
|
39
|
+
end
|
40
|
+
|
41
|
+
def creator
|
42
|
+
@creator ||= get_value_from_pnx("record/display/creator")
|
43
|
+
end
|
44
|
+
|
45
|
+
def addau
|
46
|
+
@addau ||= get_value_from_pnx("record/addata/addau")
|
47
|
+
end
|
48
|
+
|
49
|
+
def contributor
|
50
|
+
@contributor ||= get_value_from_pnx("record/display/contributor")
|
51
|
+
end
|
52
|
+
|
53
|
+
def call_number
|
54
|
+
@call_number ||= get_value_from_pnx("record/enrichment/classificationlcc")
|
55
|
+
end
|
56
|
+
|
57
|
+
def pnx_record_id
|
58
|
+
@pnx_record_id ||= get_value_from_pnx("record/control/recordid")
|
59
|
+
end
|
60
|
+
|
61
|
+
def description
|
62
|
+
@description ||= get_value_from_pnx("record/display/format")
|
63
|
+
end
|
64
|
+
|
65
|
+
def pub
|
66
|
+
@publisher ||= get_all_values_from_pnx("record/addata/pub" )
|
67
|
+
end
|
68
|
+
|
69
|
+
def cop
|
70
|
+
@place_of_publication ||= get_all_values_from_pnx("record/addata/cop" )
|
71
|
+
end
|
72
|
+
|
73
|
+
def issn
|
74
|
+
@issn ||= get_all_values_from_pnx("record/addata/issn" )
|
75
|
+
end
|
76
|
+
|
77
|
+
def eissn
|
78
|
+
@eissn ||= get_all_values_from_pnx("record/addata/eissn" )
|
79
|
+
end
|
80
|
+
|
81
|
+
def isbn
|
82
|
+
@isbn ||= get_all_values_from_pnx("record/addata/isbn" )
|
83
|
+
end
|
84
|
+
|
85
|
+
def title
|
86
|
+
@title ||= get_all_values_from_pnx("record/display/title" )
|
87
|
+
end
|
88
|
+
|
89
|
+
def journal_title
|
90
|
+
@journal_title ||= get_all_values_from_pnx("record/addata/jtitle" )
|
91
|
+
end
|
92
|
+
|
93
|
+
def publication_date
|
94
|
+
@publication_date ||= [@data.locate("record/addata/date")].flatten.collect {|d| d&.nodes}.flatten
|
95
|
+
end
|
96
|
+
|
97
|
+
def date
|
98
|
+
@date ||= [@data.locate("record/display/creationdate") , @data.locate("record/search/creationdate")].flatten.collect {|d| d&.nodes}.flatten
|
99
|
+
end
|
100
|
+
|
101
|
+
def tags
|
102
|
+
@tags ||= [
|
103
|
+
@data.locate("record/search/subject")&.collect {|element| element&.nodes}.flatten,
|
104
|
+
@data.locate("record/display/subject")&.collect {|element| element&.nodes}.flatten
|
105
|
+
].flatten
|
106
|
+
return @tags unless @tags.empty?
|
107
|
+
end
|
108
|
+
|
109
|
+
def notes
|
110
|
+
notes = @data.locate("record/display/description").collect{ |element|
|
111
|
+
element = element.nodes while !element.is_a?(Array)
|
112
|
+
element.collect{|val| val.is_a?(String) ? val : val.value }
|
113
|
+
}.flatten
|
114
|
+
|
115
|
+
@notes ||= notes
|
116
|
+
end
|
117
|
+
|
118
|
+
private
|
119
|
+
|
120
|
+
def get_value_from_pnx(path)
|
121
|
+
@data.locate(path)&.first&.text
|
122
|
+
end
|
123
|
+
|
124
|
+
def get_all_values_from_pnx(path)
|
125
|
+
@data.locate(path).flatten.collect(&:text)
|
126
|
+
end
|
127
|
+
|
128
|
+
def method_missing(method_sym, *arguments, &block)
|
129
|
+
method_str = method_sym.to_s
|
130
|
+
if is_attribute_validator?(method_sym)
|
131
|
+
!send(method_str.chomp('?').to_sym).nil?
|
132
|
+
else
|
133
|
+
super
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
def respond_to?(method_sym, include_private = false)
|
138
|
+
if is_attribute_validator?(method_sym)
|
139
|
+
true
|
140
|
+
else
|
141
|
+
super
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
def is_attribute_validator?(method_sym)
|
146
|
+
method_sym.to_s[-1].eql?('?')
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
@@ -0,0 +1,174 @@
|
|
1
|
+
module Citero
|
2
|
+
module Outputs
|
3
|
+
class Bibtex
|
4
|
+
def initialize(csf)
|
5
|
+
@csf = csf.csf
|
6
|
+
end
|
7
|
+
|
8
|
+
def to_bibtex
|
9
|
+
"@#{bibtex_type}{#{cite_key}#{bibtex_fields}\n}"
|
10
|
+
end
|
11
|
+
|
12
|
+
def bibtex_type
|
13
|
+
export_type_map[@csf['itemType']] || 'misc'
|
14
|
+
end
|
15
|
+
|
16
|
+
def cite_key
|
17
|
+
[cite_key_author, cite_key_title, cite_key_date].compact.join('_')
|
18
|
+
end
|
19
|
+
|
20
|
+
def bibtex_fields
|
21
|
+
start = [""]
|
22
|
+
start << creators unless creators.empty?
|
23
|
+
start << publisher
|
24
|
+
export_field_map.each do |key,value|
|
25
|
+
start << map_to_bibtex_value(value,key)
|
26
|
+
end
|
27
|
+
start << note
|
28
|
+
start << tags
|
29
|
+
start << publicationTitle
|
30
|
+
start << pages
|
31
|
+
start << webpage
|
32
|
+
start.compact.join(",\n\t")
|
33
|
+
end
|
34
|
+
|
35
|
+
def publisher
|
36
|
+
return map_to_bibtex_value("publisher","school") if @csf['itemType'].eql?("thesis")
|
37
|
+
return map_to_bibtex_value("report","institution") if @csf['itemType'].eql?("report")
|
38
|
+
map_to_bibtex_value("publisher","publisher")
|
39
|
+
end
|
40
|
+
|
41
|
+
# TODO gotta concat these tags
|
42
|
+
def note
|
43
|
+
map_to_bibtex_value('annote', 'note')
|
44
|
+
end
|
45
|
+
|
46
|
+
def tags
|
47
|
+
map_to_bibtex_value('keywords', 'tags')
|
48
|
+
end
|
49
|
+
|
50
|
+
def publicationTitle
|
51
|
+
if ['bookSection', 'conferencePaper'].include? @csf['itemType']
|
52
|
+
map_to_bibtex_value('booktitle', 'publicationTitle')
|
53
|
+
else
|
54
|
+
map_to_bibtex_value('journal', 'publicationTitle')
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def join_names(name)
|
59
|
+
[name].flatten.collect do |name|
|
60
|
+
Citero::Utils::NameFormatter.new(name).to_standardized
|
61
|
+
end.join(' and ')
|
62
|
+
end
|
63
|
+
|
64
|
+
def creators
|
65
|
+
['author', 'inventor', 'contributor', 'editor',
|
66
|
+
'seriesEditor', 'translator'].collect do |creator|
|
67
|
+
name = join_names(@csf[creator])
|
68
|
+
creator = 'editor' if creator.eql?('seriesEditor')
|
69
|
+
add_to_bibtex_output(creator, name)
|
70
|
+
end.compact
|
71
|
+
end
|
72
|
+
|
73
|
+
def pages
|
74
|
+
add_to_bibtex_output('pages', @csf['pages']&.gsub("-", "--"))
|
75
|
+
add_to_bibtex_output('numPages', @csf['numPages']&.gsub(",", "\\,"))
|
76
|
+
end
|
77
|
+
|
78
|
+
def webpage
|
79
|
+
# Gotta see what this is supposed to be..
|
80
|
+
add_to_bibtex_output('howpublished','website') if @csf['itemType'].eql? 'webpage'
|
81
|
+
end
|
82
|
+
|
83
|
+
def expects_number_value?(key,value)
|
84
|
+
(/\A[-+]?\d+\z/ === value) && !['numPages','isbn','issn'].include?(key)
|
85
|
+
end
|
86
|
+
|
87
|
+
def add_to_bibtex_output(key,value)
|
88
|
+
return if value.nil? || !value.class.eql?(Array) && value.strip.empty?
|
89
|
+
value = value.join(', ') if value.class.eql?(Array)
|
90
|
+
output = "#{key} = "
|
91
|
+
value = "{#{value}}" unless expects_number_value?(key,value)
|
92
|
+
output = "#{output}#{value}"
|
93
|
+
end
|
94
|
+
|
95
|
+
def map_to_bibtex_value(key,csf_key)
|
96
|
+
value = @csf[csf_key]
|
97
|
+
add_to_bibtex_output(key,value)
|
98
|
+
end
|
99
|
+
|
100
|
+
def cite_key_author_last_name(name)
|
101
|
+
name = name.first if name.is_a? Array
|
102
|
+
Citero::Utils::NameFormatter.new(name).last_name&.downcase
|
103
|
+
end
|
104
|
+
|
105
|
+
def cite_key_author
|
106
|
+
cite_key_author_last_name(@csf['author']) || cite_key_author_last_name(@csf['contributor'])
|
107
|
+
end
|
108
|
+
|
109
|
+
def cite_key_title_first_non_stop_word(title)
|
110
|
+
title = title.first if title.is_a? Array
|
111
|
+
title&.downcase&.gsub(/^((a+|the+|on+)\s)+/,"")&.split(" ")&.first
|
112
|
+
end
|
113
|
+
|
114
|
+
def cite_key_title
|
115
|
+
cite_key_title_first_non_stop_word(@csf["title"])
|
116
|
+
end
|
117
|
+
|
118
|
+
def cite_key_date
|
119
|
+
@csf['date']&.first&.gsub(/[^0-9]/,'') || "????"
|
120
|
+
end
|
121
|
+
|
122
|
+
def export_type_map
|
123
|
+
@export_type_map ||= {
|
124
|
+
"book" => "book",
|
125
|
+
"bookSection" => "incollection",
|
126
|
+
"journalArticle" => "article",
|
127
|
+
"magazineArticle" => "article",
|
128
|
+
"newspaperArticle" => "article",
|
129
|
+
"thesis" => "phdthesis",
|
130
|
+
"manuscript" => "unpublished",
|
131
|
+
"patent" => "patent",
|
132
|
+
"letter" => "misc",
|
133
|
+
"interview" => "misc",
|
134
|
+
"film" => "misc",
|
135
|
+
"artwork" => "misc",
|
136
|
+
"webpage" => "misc",
|
137
|
+
"conferencePaper" => "inproceedings",
|
138
|
+
"report" => "techreport"
|
139
|
+
}
|
140
|
+
end
|
141
|
+
|
142
|
+
def export_field_map
|
143
|
+
@export_field_map ||= {
|
144
|
+
"attachments" => "file",
|
145
|
+
"extra" => "note",
|
146
|
+
"accessDate" => "urldate",
|
147
|
+
"reportNumber" => "number",
|
148
|
+
"seriesNumber" => "number",
|
149
|
+
"patentNumber" => "number",
|
150
|
+
"issue" => "number",
|
151
|
+
"place" => "address",
|
152
|
+
"section" => "chapter",
|
153
|
+
"rights" => "copyright",
|
154
|
+
"isbn" => "isbn",
|
155
|
+
"issn" => "issn",
|
156
|
+
"title" => "title",
|
157
|
+
"date" => "date",
|
158
|
+
"callNumber" => "iccn",
|
159
|
+
"archiveLocation" => "location",
|
160
|
+
"shortTitle" => "shorttitle",
|
161
|
+
"doi" => "doi",
|
162
|
+
"abstractNote" => "abstract",
|
163
|
+
"country" => "nationality",
|
164
|
+
"edition" => "edition",
|
165
|
+
"language" => "language",
|
166
|
+
"type" => "type",
|
167
|
+
"series" => "series",
|
168
|
+
"volume" => "volume",
|
169
|
+
"assignee" => "assignee"
|
170
|
+
}
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|