citero 1.0.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +2 -0
- data/README.md +27 -0
- data/Rakefile +7 -0
- data/lib/citero.rb +123 -0
- data/lib/citero/csf.rb +31 -0
- data/lib/citero/inputs.rb +7 -0
- data/lib/citero/inputs/openurl.rb +272 -0
- data/lib/citero/inputs/pnx.rb +137 -0
- data/lib/citero/inputs/readers.rb +7 -0
- data/lib/citero/inputs/readers/pnx_reader.rb +151 -0
- data/lib/citero/outputs.rb +9 -0
- data/lib/citero/outputs/bibtex.rb +174 -0
- data/lib/citero/outputs/easybib.rb +203 -0
- data/lib/citero/outputs/openurl.rb +199 -0
- data/lib/citero/outputs/refworks_tagged.rb +52 -0
- data/lib/citero/outputs/ris.rb +209 -0
- data/lib/citero/utils.rb +5 -0
- data/lib/citero/utils/name_formatter.rb +56 -0
- data/lib/citero/version.rb +3 -0
- metadata +159 -0
@@ -0,0 +1,137 @@
|
|
1
|
+
module Citero
|
2
|
+
module Inputs
|
3
|
+
class Pnx
|
4
|
+
|
5
|
+
attr_reader :csf
|
6
|
+
|
7
|
+
def initialize(raw_data)
|
8
|
+
@pnx_reader = Citero::Inputs::Readers::PnxReader.new(raw_data)
|
9
|
+
construct_csf
|
10
|
+
@csf
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def construct_csf
|
16
|
+
return @csf unless @csf.nil?
|
17
|
+
@csf = CSF.new
|
18
|
+
@hash = {}
|
19
|
+
add_item_type
|
20
|
+
parse_and_add_creators
|
21
|
+
parse_and_add_publisher
|
22
|
+
pages
|
23
|
+
add_identifiers
|
24
|
+
add_all_other_fields
|
25
|
+
@hash['importedFrom'] = 'PNX'
|
26
|
+
@csf.load_from_hash(@hash)
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
def item_type_conversion_hash
|
31
|
+
@item_type_conversion_hash ||= {
|
32
|
+
"audio" => "audioRecording",
|
33
|
+
"video" => "videoRecording",
|
34
|
+
"article" => "journalArticle",
|
35
|
+
"books" => "book",
|
36
|
+
"book" => "book",
|
37
|
+
"report" => "report",
|
38
|
+
"webpage" => "webpage",
|
39
|
+
"journal" => "journal",
|
40
|
+
"map" => "map",
|
41
|
+
"thesis" => "thesis"
|
42
|
+
}
|
43
|
+
end
|
44
|
+
|
45
|
+
def get_item_type(raw_type)
|
46
|
+
return item_type_conversion_hash[raw_type.downcase] if item_type_conversion_hash.include? raw_type.downcase
|
47
|
+
return 'document'
|
48
|
+
end
|
49
|
+
|
50
|
+
def add_item_type
|
51
|
+
@hash["itemType"] = get_item_type(@pnx_reader.type || '')
|
52
|
+
end
|
53
|
+
|
54
|
+
def parse_and_add_creators
|
55
|
+
contributors = []
|
56
|
+
|
57
|
+
creators = @pnx_reader.creator || @pnx_reader.contributor
|
58
|
+
contributors = @pnx_reader.contributor if !@pnx_reader.creator.nil?
|
59
|
+
|
60
|
+
creators = @pnx_reader.addau if (@pnx_reader.creator.to_s.empty? && @pnx_reader.contributor.to_s.empty?)
|
61
|
+
add_creators(creators, "author")
|
62
|
+
add_creators(contributors, "contributor")
|
63
|
+
end
|
64
|
+
|
65
|
+
def add_creators(creators,creator_type)
|
66
|
+
if (creators && !creators.empty?)
|
67
|
+
creators.split(";").each do |name|
|
68
|
+
@hash[creator_type] = [@hash[creator_type], name.strip].flatten.compact
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def add_identifiers
|
74
|
+
if @pnx_reader.identifier?
|
75
|
+
identifiers = @pnx_reader.identifier.split(";")
|
76
|
+
identifiers.each do |id|
|
77
|
+
if(id.include? "isbn")
|
78
|
+
@hash['isbn'] = [@hash['isbn'], id.scan(/[0-9]+/).to_a.join].flatten.compact
|
79
|
+
else
|
80
|
+
@hash['issn'] = [@hash['issn'], id.scan(/[0-9]+/).to_a.join].flatten.compact
|
81
|
+
end
|
82
|
+
end
|
83
|
+
else
|
84
|
+
@hash['eissn'] = @pnx_reader.eissn unless @pnx_reader.eissn.empty?
|
85
|
+
@hash['issn'] = [@hash['issn'], @pnx_reader.issn].flatten.compact unless @pnx_reader.issn.empty?
|
86
|
+
@hash['isbn'] = [@hash['isbn'], @pnx_reader.isbn].flatten.compact unless @pnx_reader.isbn.empty?
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def parse_and_add_publisher
|
91
|
+
if (@pnx_reader.pub.empty? && @pnx_reader.cop.empty? && @pnx_reader.publisher)
|
92
|
+
if @pnx_reader.publisher.include? " : "
|
93
|
+
pub_place = @pnx_reader.publisher.split(" : ",2).map(&:strip)
|
94
|
+
add_publisher_and_place(nil, pub_place.first)
|
95
|
+
else
|
96
|
+
add_publisher_and_place(@pnx_reader.publisher)
|
97
|
+
end
|
98
|
+
else
|
99
|
+
add_publisher_and_place(@pnx_reader.pub, @pnx_reader.cop)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def add_publisher_and_place(publisher = nil, place = nil)
|
104
|
+
@hash['publisher'] = publisher if publisher
|
105
|
+
@hash['place'] = place if place
|
106
|
+
end
|
107
|
+
|
108
|
+
def pages
|
109
|
+
return unless @pnx_reader.pages
|
110
|
+
raw_pages = @pnx_reader.pages.gsub(/[\(\)\[\]]/, "").gsub(/\D/, " ").strip()
|
111
|
+
@hash['numPages'] = raw_pages.split(" ").first unless raw_pages.empty?
|
112
|
+
end
|
113
|
+
|
114
|
+
def qualified_method_names
|
115
|
+
@qualified_method_names ||= {
|
116
|
+
"title" => "title",
|
117
|
+
"publicationDate" => "publication_date",
|
118
|
+
"journalTitle" => "journal_title",
|
119
|
+
"date" => "date",
|
120
|
+
"language" => "language",
|
121
|
+
"edition" => "edition",
|
122
|
+
"tags" => "tags",
|
123
|
+
"callNumber" => "call_number",
|
124
|
+
"pnxRecordId" => "pnx_record_id",
|
125
|
+
"description" => "description",
|
126
|
+
"notes" => "notes"
|
127
|
+
}
|
128
|
+
end
|
129
|
+
|
130
|
+
def add_all_other_fields
|
131
|
+
qualified_method_names.each do |standard_form, method_name|
|
132
|
+
@hash[standard_form] = @pnx_reader.send(method_name.to_sym) if @pnx_reader.send("#{method_name}?".to_sym)
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
@@ -0,0 +1,151 @@
|
|
1
|
+
module Citero
|
2
|
+
module Inputs
|
3
|
+
module Readers
|
4
|
+
class PnxReader
|
5
|
+
require 'ox'
|
6
|
+
|
7
|
+
XML_DECLARATION_START = "<?xml"
|
8
|
+
XML_DECLARATION = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
|
9
|
+
|
10
|
+
def initialize(data)
|
11
|
+
Ox.default_options = Ox.default_options.merge({ skip: :skip_none })
|
12
|
+
parse_data = data
|
13
|
+
parse_data = "#{XML_DECLARATION}#{data}" unless data.start_with?(XML_DECLARATION_START)
|
14
|
+
@data = Ox.parse(parse_data)
|
15
|
+
end
|
16
|
+
|
17
|
+
def type
|
18
|
+
@type ||= get_value_from_pnx("record/display/type")
|
19
|
+
end
|
20
|
+
|
21
|
+
def publisher
|
22
|
+
@publisher = get_value_from_pnx("record/display/publisher")
|
23
|
+
end
|
24
|
+
|
25
|
+
def language
|
26
|
+
@language ||= get_value_from_pnx("record/display/language")
|
27
|
+
end
|
28
|
+
|
29
|
+
def edition
|
30
|
+
@edition ||= get_value_from_pnx("record/display/edition")
|
31
|
+
end
|
32
|
+
|
33
|
+
def pages
|
34
|
+
@pages ||= get_value_from_pnx("record/display/format")
|
35
|
+
end
|
36
|
+
|
37
|
+
def identifier
|
38
|
+
@identifier ||= get_value_from_pnx("record/display/identifier")
|
39
|
+
end
|
40
|
+
|
41
|
+
def creator
|
42
|
+
@creator ||= get_value_from_pnx("record/display/creator")
|
43
|
+
end
|
44
|
+
|
45
|
+
def addau
|
46
|
+
@addau ||= get_value_from_pnx("record/addata/addau")
|
47
|
+
end
|
48
|
+
|
49
|
+
def contributor
|
50
|
+
@contributor ||= get_value_from_pnx("record/display/contributor")
|
51
|
+
end
|
52
|
+
|
53
|
+
def call_number
|
54
|
+
@call_number ||= get_value_from_pnx("record/enrichment/classificationlcc")
|
55
|
+
end
|
56
|
+
|
57
|
+
def pnx_record_id
|
58
|
+
@pnx_record_id ||= get_value_from_pnx("record/control/recordid")
|
59
|
+
end
|
60
|
+
|
61
|
+
def description
|
62
|
+
@description ||= get_value_from_pnx("record/display/format")
|
63
|
+
end
|
64
|
+
|
65
|
+
def pub
|
66
|
+
@publisher ||= get_all_values_from_pnx("record/addata/pub" )
|
67
|
+
end
|
68
|
+
|
69
|
+
def cop
|
70
|
+
@place_of_publication ||= get_all_values_from_pnx("record/addata/cop" )
|
71
|
+
end
|
72
|
+
|
73
|
+
def issn
|
74
|
+
@issn ||= get_all_values_from_pnx("record/addata/issn" )
|
75
|
+
end
|
76
|
+
|
77
|
+
def eissn
|
78
|
+
@eissn ||= get_all_values_from_pnx("record/addata/eissn" )
|
79
|
+
end
|
80
|
+
|
81
|
+
def isbn
|
82
|
+
@isbn ||= get_all_values_from_pnx("record/addata/isbn" )
|
83
|
+
end
|
84
|
+
|
85
|
+
def title
|
86
|
+
@title ||= get_all_values_from_pnx("record/display/title" )
|
87
|
+
end
|
88
|
+
|
89
|
+
def journal_title
|
90
|
+
@journal_title ||= get_all_values_from_pnx("record/addata/jtitle" )
|
91
|
+
end
|
92
|
+
|
93
|
+
def publication_date
|
94
|
+
@publication_date ||= [@data.locate("record/addata/date")].flatten.collect {|d| d&.nodes}.flatten
|
95
|
+
end
|
96
|
+
|
97
|
+
def date
|
98
|
+
@date ||= [@data.locate("record/display/creationdate") , @data.locate("record/search/creationdate")].flatten.collect {|d| d&.nodes}.flatten
|
99
|
+
end
|
100
|
+
|
101
|
+
def tags
|
102
|
+
@tags ||= [
|
103
|
+
@data.locate("record/search/subject")&.collect {|element| element&.nodes}.flatten,
|
104
|
+
@data.locate("record/display/subject")&.collect {|element| element&.nodes}.flatten
|
105
|
+
].flatten
|
106
|
+
return @tags unless @tags.empty?
|
107
|
+
end
|
108
|
+
|
109
|
+
def notes
|
110
|
+
notes = @data.locate("record/display/description").collect{ |element|
|
111
|
+
element = element.nodes while !element.is_a?(Array)
|
112
|
+
element.collect{|val| val.is_a?(String) ? val : val.value }
|
113
|
+
}.flatten
|
114
|
+
|
115
|
+
@notes ||= notes
|
116
|
+
end
|
117
|
+
|
118
|
+
private
|
119
|
+
|
120
|
+
def get_value_from_pnx(path)
|
121
|
+
@data.locate(path)&.first&.text
|
122
|
+
end
|
123
|
+
|
124
|
+
def get_all_values_from_pnx(path)
|
125
|
+
@data.locate(path).flatten.collect(&:text)
|
126
|
+
end
|
127
|
+
|
128
|
+
def method_missing(method_sym, *arguments, &block)
|
129
|
+
method_str = method_sym.to_s
|
130
|
+
if is_attribute_validator?(method_sym)
|
131
|
+
!send(method_str.chomp('?').to_sym).nil?
|
132
|
+
else
|
133
|
+
super
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
def respond_to?(method_sym, include_private = false)
|
138
|
+
if is_attribute_validator?(method_sym)
|
139
|
+
true
|
140
|
+
else
|
141
|
+
super
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
def is_attribute_validator?(method_sym)
|
146
|
+
method_sym.to_s[-1].eql?('?')
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
@@ -0,0 +1,174 @@
|
|
1
|
+
module Citero
|
2
|
+
module Outputs
|
3
|
+
class Bibtex
|
4
|
+
def initialize(csf)
|
5
|
+
@csf = csf.csf
|
6
|
+
end
|
7
|
+
|
8
|
+
def to_bibtex
|
9
|
+
"@#{bibtex_type}{#{cite_key}#{bibtex_fields}\n}"
|
10
|
+
end
|
11
|
+
|
12
|
+
def bibtex_type
|
13
|
+
export_type_map[@csf['itemType']] || 'misc'
|
14
|
+
end
|
15
|
+
|
16
|
+
def cite_key
|
17
|
+
[cite_key_author, cite_key_title, cite_key_date].compact.join('_')
|
18
|
+
end
|
19
|
+
|
20
|
+
def bibtex_fields
|
21
|
+
start = [""]
|
22
|
+
start << creators unless creators.empty?
|
23
|
+
start << publisher
|
24
|
+
export_field_map.each do |key,value|
|
25
|
+
start << map_to_bibtex_value(value,key)
|
26
|
+
end
|
27
|
+
start << note
|
28
|
+
start << tags
|
29
|
+
start << publicationTitle
|
30
|
+
start << pages
|
31
|
+
start << webpage
|
32
|
+
start.compact.join(",\n\t")
|
33
|
+
end
|
34
|
+
|
35
|
+
def publisher
|
36
|
+
return map_to_bibtex_value("publisher","school") if @csf['itemType'].eql?("thesis")
|
37
|
+
return map_to_bibtex_value("report","institution") if @csf['itemType'].eql?("report")
|
38
|
+
map_to_bibtex_value("publisher","publisher")
|
39
|
+
end
|
40
|
+
|
41
|
+
# TODO gotta concat these tags
|
42
|
+
def note
|
43
|
+
map_to_bibtex_value('annote', 'note')
|
44
|
+
end
|
45
|
+
|
46
|
+
def tags
|
47
|
+
map_to_bibtex_value('keywords', 'tags')
|
48
|
+
end
|
49
|
+
|
50
|
+
def publicationTitle
|
51
|
+
if ['bookSection', 'conferencePaper'].include? @csf['itemType']
|
52
|
+
map_to_bibtex_value('booktitle', 'publicationTitle')
|
53
|
+
else
|
54
|
+
map_to_bibtex_value('journal', 'publicationTitle')
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def join_names(name)
|
59
|
+
[name].flatten.collect do |name|
|
60
|
+
Citero::Utils::NameFormatter.new(name).to_standardized
|
61
|
+
end.join(' and ')
|
62
|
+
end
|
63
|
+
|
64
|
+
def creators
|
65
|
+
['author', 'inventor', 'contributor', 'editor',
|
66
|
+
'seriesEditor', 'translator'].collect do |creator|
|
67
|
+
name = join_names(@csf[creator])
|
68
|
+
creator = 'editor' if creator.eql?('seriesEditor')
|
69
|
+
add_to_bibtex_output(creator, name)
|
70
|
+
end.compact
|
71
|
+
end
|
72
|
+
|
73
|
+
def pages
|
74
|
+
add_to_bibtex_output('pages', @csf['pages']&.gsub("-", "--"))
|
75
|
+
add_to_bibtex_output('numPages', @csf['numPages']&.gsub(",", "\\,"))
|
76
|
+
end
|
77
|
+
|
78
|
+
def webpage
|
79
|
+
# Gotta see what this is supposed to be..
|
80
|
+
add_to_bibtex_output('howpublished','website') if @csf['itemType'].eql? 'webpage'
|
81
|
+
end
|
82
|
+
|
83
|
+
def expects_number_value?(key,value)
|
84
|
+
(/\A[-+]?\d+\z/ === value) && !['numPages','isbn','issn'].include?(key)
|
85
|
+
end
|
86
|
+
|
87
|
+
def add_to_bibtex_output(key,value)
|
88
|
+
return if value.nil? || !value.class.eql?(Array) && value.strip.empty?
|
89
|
+
value = value.join(', ') if value.class.eql?(Array)
|
90
|
+
output = "#{key} = "
|
91
|
+
value = "{#{value}}" unless expects_number_value?(key,value)
|
92
|
+
output = "#{output}#{value}"
|
93
|
+
end
|
94
|
+
|
95
|
+
def map_to_bibtex_value(key,csf_key)
|
96
|
+
value = @csf[csf_key]
|
97
|
+
add_to_bibtex_output(key,value)
|
98
|
+
end
|
99
|
+
|
100
|
+
def cite_key_author_last_name(name)
|
101
|
+
name = name.first if name.is_a? Array
|
102
|
+
Citero::Utils::NameFormatter.new(name).last_name&.downcase
|
103
|
+
end
|
104
|
+
|
105
|
+
def cite_key_author
|
106
|
+
cite_key_author_last_name(@csf['author']) || cite_key_author_last_name(@csf['contributor'])
|
107
|
+
end
|
108
|
+
|
109
|
+
def cite_key_title_first_non_stop_word(title)
|
110
|
+
title = title.first if title.is_a? Array
|
111
|
+
title&.downcase&.gsub(/^((a+|the+|on+)\s)+/,"")&.split(" ")&.first
|
112
|
+
end
|
113
|
+
|
114
|
+
def cite_key_title
|
115
|
+
cite_key_title_first_non_stop_word(@csf["title"])
|
116
|
+
end
|
117
|
+
|
118
|
+
def cite_key_date
|
119
|
+
@csf['date']&.first&.gsub(/[^0-9]/,'') || "????"
|
120
|
+
end
|
121
|
+
|
122
|
+
def export_type_map
|
123
|
+
@export_type_map ||= {
|
124
|
+
"book" => "book",
|
125
|
+
"bookSection" => "incollection",
|
126
|
+
"journalArticle" => "article",
|
127
|
+
"magazineArticle" => "article",
|
128
|
+
"newspaperArticle" => "article",
|
129
|
+
"thesis" => "phdthesis",
|
130
|
+
"manuscript" => "unpublished",
|
131
|
+
"patent" => "patent",
|
132
|
+
"letter" => "misc",
|
133
|
+
"interview" => "misc",
|
134
|
+
"film" => "misc",
|
135
|
+
"artwork" => "misc",
|
136
|
+
"webpage" => "misc",
|
137
|
+
"conferencePaper" => "inproceedings",
|
138
|
+
"report" => "techreport"
|
139
|
+
}
|
140
|
+
end
|
141
|
+
|
142
|
+
def export_field_map
|
143
|
+
@export_field_map ||= {
|
144
|
+
"attachments" => "file",
|
145
|
+
"extra" => "note",
|
146
|
+
"accessDate" => "urldate",
|
147
|
+
"reportNumber" => "number",
|
148
|
+
"seriesNumber" => "number",
|
149
|
+
"patentNumber" => "number",
|
150
|
+
"issue" => "number",
|
151
|
+
"place" => "address",
|
152
|
+
"section" => "chapter",
|
153
|
+
"rights" => "copyright",
|
154
|
+
"isbn" => "isbn",
|
155
|
+
"issn" => "issn",
|
156
|
+
"title" => "title",
|
157
|
+
"date" => "date",
|
158
|
+
"callNumber" => "iccn",
|
159
|
+
"archiveLocation" => "location",
|
160
|
+
"shortTitle" => "shorttitle",
|
161
|
+
"doi" => "doi",
|
162
|
+
"abstractNote" => "abstract",
|
163
|
+
"country" => "nationality",
|
164
|
+
"edition" => "edition",
|
165
|
+
"language" => "language",
|
166
|
+
"type" => "type",
|
167
|
+
"series" => "series",
|
168
|
+
"volume" => "volume",
|
169
|
+
"assignee" => "assignee"
|
170
|
+
}
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|