citero 1.0.0.alpha

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,137 @@
1
+ module Citero
2
+ module Inputs
3
+ class Pnx
4
+
5
+ attr_reader :csf
6
+
7
+ def initialize(raw_data)
8
+ @pnx_reader = Citero::Inputs::Readers::PnxReader.new(raw_data)
9
+ construct_csf
10
+ @csf
11
+ end
12
+
13
+ private
14
+
15
+ def construct_csf
16
+ return @csf unless @csf.nil?
17
+ @csf = CSF.new
18
+ @hash = {}
19
+ add_item_type
20
+ parse_and_add_creators
21
+ parse_and_add_publisher
22
+ pages
23
+ add_identifiers
24
+ add_all_other_fields
25
+ @hash['importedFrom'] = 'PNX'
26
+ @csf.load_from_hash(@hash)
27
+ end
28
+
29
+
30
+ def item_type_conversion_hash
31
+ @item_type_conversion_hash ||= {
32
+ "audio" => "audioRecording",
33
+ "video" => "videoRecording",
34
+ "article" => "journalArticle",
35
+ "books" => "book",
36
+ "book" => "book",
37
+ "report" => "report",
38
+ "webpage" => "webpage",
39
+ "journal" => "journal",
40
+ "map" => "map",
41
+ "thesis" => "thesis"
42
+ }
43
+ end
44
+
45
+ def get_item_type(raw_type)
46
+ return item_type_conversion_hash[raw_type.downcase] if item_type_conversion_hash.include? raw_type.downcase
47
+ return 'document'
48
+ end
49
+
50
+ def add_item_type
51
+ @hash["itemType"] = get_item_type(@pnx_reader.type || '')
52
+ end
53
+
54
+ def parse_and_add_creators
55
+ contributors = []
56
+
57
+ creators = @pnx_reader.creator || @pnx_reader.contributor
58
+ contributors = @pnx_reader.contributor if !@pnx_reader.creator.nil?
59
+
60
+ creators = @pnx_reader.addau if (@pnx_reader.creator.to_s.empty? && @pnx_reader.contributor.to_s.empty?)
61
+ add_creators(creators, "author")
62
+ add_creators(contributors, "contributor")
63
+ end
64
+
65
+ def add_creators(creators,creator_type)
66
+ if (creators && !creators.empty?)
67
+ creators.split(";").each do |name|
68
+ @hash[creator_type] = [@hash[creator_type], name.strip].flatten.compact
69
+ end
70
+ end
71
+ end
72
+
73
+ def add_identifiers
74
+ if @pnx_reader.identifier?
75
+ identifiers = @pnx_reader.identifier.split(";")
76
+ identifiers.each do |id|
77
+ if(id.include? "isbn")
78
+ @hash['isbn'] = [@hash['isbn'], id.scan(/[0-9]+/).to_a.join].flatten.compact
79
+ else
80
+ @hash['issn'] = [@hash['issn'], id.scan(/[0-9]+/).to_a.join].flatten.compact
81
+ end
82
+ end
83
+ else
84
+ @hash['eissn'] = @pnx_reader.eissn unless @pnx_reader.eissn.empty?
85
+ @hash['issn'] = [@hash['issn'], @pnx_reader.issn].flatten.compact unless @pnx_reader.issn.empty?
86
+ @hash['isbn'] = [@hash['isbn'], @pnx_reader.isbn].flatten.compact unless @pnx_reader.isbn.empty?
87
+ end
88
+ end
89
+
90
+ def parse_and_add_publisher
91
+ if (@pnx_reader.pub.empty? && @pnx_reader.cop.empty? && @pnx_reader.publisher)
92
+ if @pnx_reader.publisher.include? " : "
93
+ pub_place = @pnx_reader.publisher.split(" : ",2).map(&:strip)
94
+ add_publisher_and_place(nil, pub_place.first)
95
+ else
96
+ add_publisher_and_place(@pnx_reader.publisher)
97
+ end
98
+ else
99
+ add_publisher_and_place(@pnx_reader.pub, @pnx_reader.cop)
100
+ end
101
+ end
102
+
103
+ def add_publisher_and_place(publisher = nil, place = nil)
104
+ @hash['publisher'] = publisher if publisher
105
+ @hash['place'] = place if place
106
+ end
107
+
108
+ def pages
109
+ return unless @pnx_reader.pages
110
+ raw_pages = @pnx_reader.pages.gsub(/[\(\)\[\]]/, "").gsub(/\D/, " ").strip()
111
+ @hash['numPages'] = raw_pages.split(" ").first unless raw_pages.empty?
112
+ end
113
+
114
+ def qualified_method_names
115
+ @qualified_method_names ||= {
116
+ "title" => "title",
117
+ "publicationDate" => "publication_date",
118
+ "journalTitle" => "journal_title",
119
+ "date" => "date",
120
+ "language" => "language",
121
+ "edition" => "edition",
122
+ "tags" => "tags",
123
+ "callNumber" => "call_number",
124
+ "pnxRecordId" => "pnx_record_id",
125
+ "description" => "description",
126
+ "notes" => "notes"
127
+ }
128
+ end
129
+
130
+ def add_all_other_fields
131
+ qualified_method_names.each do |standard_form, method_name|
132
+ @hash[standard_form] = @pnx_reader.send(method_name.to_sym) if @pnx_reader.send("#{method_name}?".to_sym)
133
+ end
134
+ end
135
+ end
136
+ end
137
+ end
@@ -0,0 +1,7 @@
1
+ module Citero
2
+ module Inputs
3
+ module Readers
4
+ require_relative 'readers/pnx_reader'
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,151 @@
1
+ module Citero
2
+ module Inputs
3
+ module Readers
4
+ class PnxReader
5
+ require 'ox'
6
+
7
+ XML_DECLARATION_START = "<?xml"
8
+ XML_DECLARATION = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
9
+
10
+ def initialize(data)
11
+ Ox.default_options = Ox.default_options.merge({ skip: :skip_none })
12
+ parse_data = data
13
+ parse_data = "#{XML_DECLARATION}#{data}" unless data.start_with?(XML_DECLARATION_START)
14
+ @data = Ox.parse(parse_data)
15
+ end
16
+
17
+ def type
18
+ @type ||= get_value_from_pnx("record/display/type")
19
+ end
20
+
21
+ def publisher
22
+ @publisher = get_value_from_pnx("record/display/publisher")
23
+ end
24
+
25
+ def language
26
+ @language ||= get_value_from_pnx("record/display/language")
27
+ end
28
+
29
+ def edition
30
+ @edition ||= get_value_from_pnx("record/display/edition")
31
+ end
32
+
33
+ def pages
34
+ @pages ||= get_value_from_pnx("record/display/format")
35
+ end
36
+
37
+ def identifier
38
+ @identifier ||= get_value_from_pnx("record/display/identifier")
39
+ end
40
+
41
+ def creator
42
+ @creator ||= get_value_from_pnx("record/display/creator")
43
+ end
44
+
45
+ def addau
46
+ @addau ||= get_value_from_pnx("record/addata/addau")
47
+ end
48
+
49
+ def contributor
50
+ @contributor ||= get_value_from_pnx("record/display/contributor")
51
+ end
52
+
53
+ def call_number
54
+ @call_number ||= get_value_from_pnx("record/enrichment/classificationlcc")
55
+ end
56
+
57
+ def pnx_record_id
58
+ @pnx_record_id ||= get_value_from_pnx("record/control/recordid")
59
+ end
60
+
61
+ def description
62
+ @description ||= get_value_from_pnx("record/display/format")
63
+ end
64
+
65
+ def pub
66
+ @publisher ||= get_all_values_from_pnx("record/addata/pub" )
67
+ end
68
+
69
+ def cop
70
+ @place_of_publication ||= get_all_values_from_pnx("record/addata/cop" )
71
+ end
72
+
73
+ def issn
74
+ @issn ||= get_all_values_from_pnx("record/addata/issn" )
75
+ end
76
+
77
+ def eissn
78
+ @eissn ||= get_all_values_from_pnx("record/addata/eissn" )
79
+ end
80
+
81
+ def isbn
82
+ @isbn ||= get_all_values_from_pnx("record/addata/isbn" )
83
+ end
84
+
85
+ def title
86
+ @title ||= get_all_values_from_pnx("record/display/title" )
87
+ end
88
+
89
+ def journal_title
90
+ @journal_title ||= get_all_values_from_pnx("record/addata/jtitle" )
91
+ end
92
+
93
+ def publication_date
94
+ @publication_date ||= [@data.locate("record/addata/date")].flatten.collect {|d| d&.nodes}.flatten
95
+ end
96
+
97
+ def date
98
+ @date ||= [@data.locate("record/display/creationdate") , @data.locate("record/search/creationdate")].flatten.collect {|d| d&.nodes}.flatten
99
+ end
100
+
101
+ def tags
102
+ @tags ||= [
103
+ @data.locate("record/search/subject")&.collect {|element| element&.nodes}.flatten,
104
+ @data.locate("record/display/subject")&.collect {|element| element&.nodes}.flatten
105
+ ].flatten
106
+ return @tags unless @tags.empty?
107
+ end
108
+
109
+ def notes
110
+ notes = @data.locate("record/display/description").collect{ |element|
111
+ element = element.nodes while !element.is_a?(Array)
112
+ element.collect{|val| val.is_a?(String) ? val : val.value }
113
+ }.flatten
114
+
115
+ @notes ||= notes
116
+ end
117
+
118
+ private
119
+
120
+ def get_value_from_pnx(path)
121
+ @data.locate(path)&.first&.text
122
+ end
123
+
124
+ def get_all_values_from_pnx(path)
125
+ @data.locate(path).flatten.collect(&:text)
126
+ end
127
+
128
+ def method_missing(method_sym, *arguments, &block)
129
+ method_str = method_sym.to_s
130
+ if is_attribute_validator?(method_sym)
131
+ !send(method_str.chomp('?').to_sym).nil?
132
+ else
133
+ super
134
+ end
135
+ end
136
+
137
+ def respond_to?(method_sym, include_private = false)
138
+ if is_attribute_validator?(method_sym)
139
+ true
140
+ else
141
+ super
142
+ end
143
+ end
144
+
145
+ def is_attribute_validator?(method_sym)
146
+ method_sym.to_s[-1].eql?('?')
147
+ end
148
+ end
149
+ end
150
+ end
151
+ end
@@ -0,0 +1,9 @@
1
+ module Citero
2
+ module Outputs
3
+ require_relative 'outputs/ris'
4
+ require_relative 'outputs/openurl'
5
+ require_relative 'outputs/bibtex'
6
+ require_relative 'outputs/easybib'
7
+ require_relative 'outputs/refworks_tagged'
8
+ end
9
+ end
@@ -0,0 +1,174 @@
1
+ module Citero
2
+ module Outputs
3
+ class Bibtex
4
+ def initialize(csf)
5
+ @csf = csf.csf
6
+ end
7
+
8
+ def to_bibtex
9
+ "@#{bibtex_type}{#{cite_key}#{bibtex_fields}\n}"
10
+ end
11
+
12
+ def bibtex_type
13
+ export_type_map[@csf['itemType']] || 'misc'
14
+ end
15
+
16
+ def cite_key
17
+ [cite_key_author, cite_key_title, cite_key_date].compact.join('_')
18
+ end
19
+
20
+ def bibtex_fields
21
+ start = [""]
22
+ start << creators unless creators.empty?
23
+ start << publisher
24
+ export_field_map.each do |key,value|
25
+ start << map_to_bibtex_value(value,key)
26
+ end
27
+ start << note
28
+ start << tags
29
+ start << publicationTitle
30
+ start << pages
31
+ start << webpage
32
+ start.compact.join(",\n\t")
33
+ end
34
+
35
+ def publisher
36
+ return map_to_bibtex_value("publisher","school") if @csf['itemType'].eql?("thesis")
37
+ return map_to_bibtex_value("report","institution") if @csf['itemType'].eql?("report")
38
+ map_to_bibtex_value("publisher","publisher")
39
+ end
40
+
41
+ # TODO gotta concat these tags
42
+ def note
43
+ map_to_bibtex_value('annote', 'note')
44
+ end
45
+
46
+ def tags
47
+ map_to_bibtex_value('keywords', 'tags')
48
+ end
49
+
50
+ def publicationTitle
51
+ if ['bookSection', 'conferencePaper'].include? @csf['itemType']
52
+ map_to_bibtex_value('booktitle', 'publicationTitle')
53
+ else
54
+ map_to_bibtex_value('journal', 'publicationTitle')
55
+ end
56
+ end
57
+
58
+ def join_names(name)
59
+ [name].flatten.collect do |name|
60
+ Citero::Utils::NameFormatter.new(name).to_standardized
61
+ end.join(' and ')
62
+ end
63
+
64
+ def creators
65
+ ['author', 'inventor', 'contributor', 'editor',
66
+ 'seriesEditor', 'translator'].collect do |creator|
67
+ name = join_names(@csf[creator])
68
+ creator = 'editor' if creator.eql?('seriesEditor')
69
+ add_to_bibtex_output(creator, name)
70
+ end.compact
71
+ end
72
+
73
+ def pages
74
+ add_to_bibtex_output('pages', @csf['pages']&.gsub("-", "--"))
75
+ add_to_bibtex_output('numPages', @csf['numPages']&.gsub(",", "\\,"))
76
+ end
77
+
78
+ def webpage
79
+ # Gotta see what this is supposed to be..
80
+ add_to_bibtex_output('howpublished','website') if @csf['itemType'].eql? 'webpage'
81
+ end
82
+
83
+ def expects_number_value?(key,value)
84
+ (/\A[-+]?\d+\z/ === value) && !['numPages','isbn','issn'].include?(key)
85
+ end
86
+
87
+ def add_to_bibtex_output(key,value)
88
+ return if value.nil? || !value.class.eql?(Array) && value.strip.empty?
89
+ value = value.join(', ') if value.class.eql?(Array)
90
+ output = "#{key} = "
91
+ value = "{#{value}}" unless expects_number_value?(key,value)
92
+ output = "#{output}#{value}"
93
+ end
94
+
95
+ def map_to_bibtex_value(key,csf_key)
96
+ value = @csf[csf_key]
97
+ add_to_bibtex_output(key,value)
98
+ end
99
+
100
+ def cite_key_author_last_name(name)
101
+ name = name.first if name.is_a? Array
102
+ Citero::Utils::NameFormatter.new(name).last_name&.downcase
103
+ end
104
+
105
+ def cite_key_author
106
+ cite_key_author_last_name(@csf['author']) || cite_key_author_last_name(@csf['contributor'])
107
+ end
108
+
109
+ def cite_key_title_first_non_stop_word(title)
110
+ title = title.first if title.is_a? Array
111
+ title&.downcase&.gsub(/^((a+|the+|on+)\s)+/,"")&.split(" ")&.first
112
+ end
113
+
114
+ def cite_key_title
115
+ cite_key_title_first_non_stop_word(@csf["title"])
116
+ end
117
+
118
+ def cite_key_date
119
+ @csf['date']&.first&.gsub(/[^0-9]/,'') || "????"
120
+ end
121
+
122
+ def export_type_map
123
+ @export_type_map ||= {
124
+ "book" => "book",
125
+ "bookSection" => "incollection",
126
+ "journalArticle" => "article",
127
+ "magazineArticle" => "article",
128
+ "newspaperArticle" => "article",
129
+ "thesis" => "phdthesis",
130
+ "manuscript" => "unpublished",
131
+ "patent" => "patent",
132
+ "letter" => "misc",
133
+ "interview" => "misc",
134
+ "film" => "misc",
135
+ "artwork" => "misc",
136
+ "webpage" => "misc",
137
+ "conferencePaper" => "inproceedings",
138
+ "report" => "techreport"
139
+ }
140
+ end
141
+
142
+ def export_field_map
143
+ @export_field_map ||= {
144
+ "attachments" => "file",
145
+ "extra" => "note",
146
+ "accessDate" => "urldate",
147
+ "reportNumber" => "number",
148
+ "seriesNumber" => "number",
149
+ "patentNumber" => "number",
150
+ "issue" => "number",
151
+ "place" => "address",
152
+ "section" => "chapter",
153
+ "rights" => "copyright",
154
+ "isbn" => "isbn",
155
+ "issn" => "issn",
156
+ "title" => "title",
157
+ "date" => "date",
158
+ "callNumber" => "iccn",
159
+ "archiveLocation" => "location",
160
+ "shortTitle" => "shorttitle",
161
+ "doi" => "doi",
162
+ "abstractNote" => "abstract",
163
+ "country" => "nationality",
164
+ "edition" => "edition",
165
+ "language" => "language",
166
+ "type" => "type",
167
+ "series" => "series",
168
+ "volume" => "volume",
169
+ "assignee" => "assignee"
170
+ }
171
+ end
172
+ end
173
+ end
174
+ end