citero 1.0.0.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,137 @@
1
+ module Citero
2
+ module Inputs
3
+ class Pnx
4
+
5
+ attr_reader :csf
6
+
7
+ def initialize(raw_data)
8
+ @pnx_reader = Citero::Inputs::Readers::PnxReader.new(raw_data)
9
+ construct_csf
10
+ @csf
11
+ end
12
+
13
+ private
14
+
15
+ def construct_csf
16
+ return @csf unless @csf.nil?
17
+ @csf = CSF.new
18
+ @hash = {}
19
+ add_item_type
20
+ parse_and_add_creators
21
+ parse_and_add_publisher
22
+ pages
23
+ add_identifiers
24
+ add_all_other_fields
25
+ @hash['importedFrom'] = 'PNX'
26
+ @csf.load_from_hash(@hash)
27
+ end
28
+
29
+
30
+ def item_type_conversion_hash
31
+ @item_type_conversion_hash ||= {
32
+ "audio" => "audioRecording",
33
+ "video" => "videoRecording",
34
+ "article" => "journalArticle",
35
+ "books" => "book",
36
+ "book" => "book",
37
+ "report" => "report",
38
+ "webpage" => "webpage",
39
+ "journal" => "journal",
40
+ "map" => "map",
41
+ "thesis" => "thesis"
42
+ }
43
+ end
44
+
45
+ def get_item_type(raw_type)
46
+ return item_type_conversion_hash[raw_type.downcase] if item_type_conversion_hash.include? raw_type.downcase
47
+ return 'document'
48
+ end
49
+
50
+ def add_item_type
51
+ @hash["itemType"] = get_item_type(@pnx_reader.type || '')
52
+ end
53
+
54
+ def parse_and_add_creators
55
+ contributors = []
56
+
57
+ creators = @pnx_reader.creator || @pnx_reader.contributor
58
+ contributors = @pnx_reader.contributor if !@pnx_reader.creator.nil?
59
+
60
+ creators = @pnx_reader.addau if (@pnx_reader.creator.to_s.empty? && @pnx_reader.contributor.to_s.empty?)
61
+ add_creators(creators, "author")
62
+ add_creators(contributors, "contributor")
63
+ end
64
+
65
+ def add_creators(creators,creator_type)
66
+ if (creators && !creators.empty?)
67
+ creators.split(";").each do |name|
68
+ @hash[creator_type] = [@hash[creator_type], name.strip].flatten.compact
69
+ end
70
+ end
71
+ end
72
+
73
+ def add_identifiers
74
+ if @pnx_reader.identifier?
75
+ identifiers = @pnx_reader.identifier.split(";")
76
+ identifiers.each do |id|
77
+ if(id.include? "isbn")
78
+ @hash['isbn'] = [@hash['isbn'], id.scan(/[0-9]+/).to_a.join].flatten.compact
79
+ else
80
+ @hash['issn'] = [@hash['issn'], id.scan(/[0-9]+/).to_a.join].flatten.compact
81
+ end
82
+ end
83
+ else
84
+ @hash['eissn'] = @pnx_reader.eissn unless @pnx_reader.eissn.empty?
85
+ @hash['issn'] = [@hash['issn'], @pnx_reader.issn].flatten.compact unless @pnx_reader.issn.empty?
86
+ @hash['isbn'] = [@hash['isbn'], @pnx_reader.isbn].flatten.compact unless @pnx_reader.isbn.empty?
87
+ end
88
+ end
89
+
90
+ def parse_and_add_publisher
91
+ if (@pnx_reader.pub.empty? && @pnx_reader.cop.empty? && @pnx_reader.publisher)
92
+ if @pnx_reader.publisher.include? " : "
93
+ pub_place = @pnx_reader.publisher.split(" : ",2).map(&:strip)
94
+ add_publisher_and_place(nil, pub_place.first)
95
+ else
96
+ add_publisher_and_place(@pnx_reader.publisher)
97
+ end
98
+ else
99
+ add_publisher_and_place(@pnx_reader.pub, @pnx_reader.cop)
100
+ end
101
+ end
102
+
103
+ def add_publisher_and_place(publisher = nil, place = nil)
104
+ @hash['publisher'] = publisher if publisher
105
+ @hash['place'] = place if place
106
+ end
107
+
108
+ def pages
109
+ return unless @pnx_reader.pages
110
+ raw_pages = @pnx_reader.pages.gsub(/[\(\)\[\]]/, "").gsub(/\D/, " ").strip()
111
+ @hash['numPages'] = raw_pages.split(" ").first unless raw_pages.empty?
112
+ end
113
+
114
+ def qualified_method_names
115
+ @qualified_method_names ||= {
116
+ "title" => "title",
117
+ "publicationDate" => "publication_date",
118
+ "journalTitle" => "journal_title",
119
+ "date" => "date",
120
+ "language" => "language",
121
+ "edition" => "edition",
122
+ "tags" => "tags",
123
+ "callNumber" => "call_number",
124
+ "pnxRecordId" => "pnx_record_id",
125
+ "description" => "description",
126
+ "notes" => "notes"
127
+ }
128
+ end
129
+
130
+ def add_all_other_fields
131
+ qualified_method_names.each do |standard_form, method_name|
132
+ @hash[standard_form] = @pnx_reader.send(method_name.to_sym) if @pnx_reader.send("#{method_name}?".to_sym)
133
+ end
134
+ end
135
+ end
136
+ end
137
+ end
@@ -0,0 +1,7 @@
1
+ module Citero
2
+ module Inputs
3
+ module Readers
4
+ require_relative 'readers/pnx_reader'
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,151 @@
1
+ module Citero
2
+ module Inputs
3
+ module Readers
4
+ class PnxReader
5
+ require 'ox'
6
+
7
+ XML_DECLARATION_START = "<?xml"
8
+ XML_DECLARATION = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
9
+
10
+ def initialize(data)
11
+ Ox.default_options = Ox.default_options.merge({ skip: :skip_none })
12
+ parse_data = data
13
+ parse_data = "#{XML_DECLARATION}#{data}" unless data.start_with?(XML_DECLARATION_START)
14
+ @data = Ox.parse(parse_data)
15
+ end
16
+
17
+ def type
18
+ @type ||= get_value_from_pnx("record/display/type")
19
+ end
20
+
21
+ def publisher
22
+ @publisher = get_value_from_pnx("record/display/publisher")
23
+ end
24
+
25
+ def language
26
+ @language ||= get_value_from_pnx("record/display/language")
27
+ end
28
+
29
+ def edition
30
+ @edition ||= get_value_from_pnx("record/display/edition")
31
+ end
32
+
33
+ def pages
34
+ @pages ||= get_value_from_pnx("record/display/format")
35
+ end
36
+
37
+ def identifier
38
+ @identifier ||= get_value_from_pnx("record/display/identifier")
39
+ end
40
+
41
+ def creator
42
+ @creator ||= get_value_from_pnx("record/display/creator")
43
+ end
44
+
45
+ def addau
46
+ @addau ||= get_value_from_pnx("record/addata/addau")
47
+ end
48
+
49
+ def contributor
50
+ @contributor ||= get_value_from_pnx("record/display/contributor")
51
+ end
52
+
53
+ def call_number
54
+ @call_number ||= get_value_from_pnx("record/enrichment/classificationlcc")
55
+ end
56
+
57
+ def pnx_record_id
58
+ @pnx_record_id ||= get_value_from_pnx("record/control/recordid")
59
+ end
60
+
61
+ def description
62
+ @description ||= get_value_from_pnx("record/display/format")
63
+ end
64
+
65
+ def pub
66
+ @publisher ||= get_all_values_from_pnx("record/addata/pub" )
67
+ end
68
+
69
+ def cop
70
+ @place_of_publication ||= get_all_values_from_pnx("record/addata/cop" )
71
+ end
72
+
73
+ def issn
74
+ @issn ||= get_all_values_from_pnx("record/addata/issn" )
75
+ end
76
+
77
+ def eissn
78
+ @eissn ||= get_all_values_from_pnx("record/addata/eissn" )
79
+ end
80
+
81
+ def isbn
82
+ @isbn ||= get_all_values_from_pnx("record/addata/isbn" )
83
+ end
84
+
85
+ def title
86
+ @title ||= get_all_values_from_pnx("record/display/title" )
87
+ end
88
+
89
+ def journal_title
90
+ @journal_title ||= get_all_values_from_pnx("record/addata/jtitle" )
91
+ end
92
+
93
+ def publication_date
94
+ @publication_date ||= [@data.locate("record/addata/date")].flatten.collect {|d| d&.nodes}.flatten
95
+ end
96
+
97
+ def date
98
+ @date ||= [@data.locate("record/display/creationdate") , @data.locate("record/search/creationdate")].flatten.collect {|d| d&.nodes}.flatten
99
+ end
100
+
101
+ def tags
102
+ @tags ||= [
103
+ @data.locate("record/search/subject")&.collect {|element| element&.nodes}.flatten,
104
+ @data.locate("record/display/subject")&.collect {|element| element&.nodes}.flatten
105
+ ].flatten
106
+ return @tags unless @tags.empty?
107
+ end
108
+
109
+ def notes
110
+ notes = @data.locate("record/display/description").collect{ |element|
111
+ element = element.nodes while !element.is_a?(Array)
112
+ element.collect{|val| val.is_a?(String) ? val : val.value }
113
+ }.flatten
114
+
115
+ @notes ||= notes
116
+ end
117
+
118
+ private
119
+
120
+ def get_value_from_pnx(path)
121
+ @data.locate(path)&.first&.text
122
+ end
123
+
124
+ def get_all_values_from_pnx(path)
125
+ @data.locate(path).flatten.collect(&:text)
126
+ end
127
+
128
+ def method_missing(method_sym, *arguments, &block)
129
+ method_str = method_sym.to_s
130
+ if is_attribute_validator?(method_sym)
131
+ !send(method_str.chomp('?').to_sym).nil?
132
+ else
133
+ super
134
+ end
135
+ end
136
+
137
+ def respond_to?(method_sym, include_private = false)
138
+ if is_attribute_validator?(method_sym)
139
+ true
140
+ else
141
+ super
142
+ end
143
+ end
144
+
145
+ def is_attribute_validator?(method_sym)
146
+ method_sym.to_s[-1].eql?('?')
147
+ end
148
+ end
149
+ end
150
+ end
151
+ end
@@ -0,0 +1,9 @@
1
+ module Citero
2
+ module Outputs
3
+ require_relative 'outputs/ris'
4
+ require_relative 'outputs/openurl'
5
+ require_relative 'outputs/bibtex'
6
+ require_relative 'outputs/easybib'
7
+ require_relative 'outputs/refworks_tagged'
8
+ end
9
+ end
@@ -0,0 +1,174 @@
1
+ module Citero
2
+ module Outputs
3
+ class Bibtex
4
+ def initialize(csf)
5
+ @csf = csf.csf
6
+ end
7
+
8
+ def to_bibtex
9
+ "@#{bibtex_type}{#{cite_key}#{bibtex_fields}\n}"
10
+ end
11
+
12
+ def bibtex_type
13
+ export_type_map[@csf['itemType']] || 'misc'
14
+ end
15
+
16
+ def cite_key
17
+ [cite_key_author, cite_key_title, cite_key_date].compact.join('_')
18
+ end
19
+
20
+ def bibtex_fields
21
+ start = [""]
22
+ start << creators unless creators.empty?
23
+ start << publisher
24
+ export_field_map.each do |key,value|
25
+ start << map_to_bibtex_value(value,key)
26
+ end
27
+ start << note
28
+ start << tags
29
+ start << publicationTitle
30
+ start << pages
31
+ start << webpage
32
+ start.compact.join(",\n\t")
33
+ end
34
+
35
+ def publisher
36
+ return map_to_bibtex_value("publisher","school") if @csf['itemType'].eql?("thesis")
37
+ return map_to_bibtex_value("report","institution") if @csf['itemType'].eql?("report")
38
+ map_to_bibtex_value("publisher","publisher")
39
+ end
40
+
41
+ # TODO gotta concat these tags
42
+ def note
43
+ map_to_bibtex_value('annote', 'note')
44
+ end
45
+
46
+ def tags
47
+ map_to_bibtex_value('keywords', 'tags')
48
+ end
49
+
50
+ def publicationTitle
51
+ if ['bookSection', 'conferencePaper'].include? @csf['itemType']
52
+ map_to_bibtex_value('booktitle', 'publicationTitle')
53
+ else
54
+ map_to_bibtex_value('journal', 'publicationTitle')
55
+ end
56
+ end
57
+
58
+ def join_names(name)
59
+ [name].flatten.collect do |name|
60
+ Citero::Utils::NameFormatter.new(name).to_standardized
61
+ end.join(' and ')
62
+ end
63
+
64
+ def creators
65
+ ['author', 'inventor', 'contributor', 'editor',
66
+ 'seriesEditor', 'translator'].collect do |creator|
67
+ name = join_names(@csf[creator])
68
+ creator = 'editor' if creator.eql?('seriesEditor')
69
+ add_to_bibtex_output(creator, name)
70
+ end.compact
71
+ end
72
+
73
+ def pages
74
+ add_to_bibtex_output('pages', @csf['pages']&.gsub("-", "--"))
75
+ add_to_bibtex_output('numPages', @csf['numPages']&.gsub(",", "\\,"))
76
+ end
77
+
78
+ def webpage
79
+ # Gotta see what this is supposed to be..
80
+ add_to_bibtex_output('howpublished','website') if @csf['itemType'].eql? 'webpage'
81
+ end
82
+
83
+ def expects_number_value?(key,value)
84
+ (/\A[-+]?\d+\z/ === value) && !['numPages','isbn','issn'].include?(key)
85
+ end
86
+
87
+ def add_to_bibtex_output(key,value)
88
+ return if value.nil? || !value.class.eql?(Array) && value.strip.empty?
89
+ value = value.join(', ') if value.class.eql?(Array)
90
+ output = "#{key} = "
91
+ value = "{#{value}}" unless expects_number_value?(key,value)
92
+ output = "#{output}#{value}"
93
+ end
94
+
95
+ def map_to_bibtex_value(key,csf_key)
96
+ value = @csf[csf_key]
97
+ add_to_bibtex_output(key,value)
98
+ end
99
+
100
+ def cite_key_author_last_name(name)
101
+ name = name.first if name.is_a? Array
102
+ Citero::Utils::NameFormatter.new(name).last_name&.downcase
103
+ end
104
+
105
+ def cite_key_author
106
+ cite_key_author_last_name(@csf['author']) || cite_key_author_last_name(@csf['contributor'])
107
+ end
108
+
109
+ def cite_key_title_first_non_stop_word(title)
110
+ title = title.first if title.is_a? Array
111
+ title&.downcase&.gsub(/^((a+|the+|on+)\s)+/,"")&.split(" ")&.first
112
+ end
113
+
114
+ def cite_key_title
115
+ cite_key_title_first_non_stop_word(@csf["title"])
116
+ end
117
+
118
+ def cite_key_date
119
+ @csf['date']&.first&.gsub(/[^0-9]/,'') || "????"
120
+ end
121
+
122
+ def export_type_map
123
+ @export_type_map ||= {
124
+ "book" => "book",
125
+ "bookSection" => "incollection",
126
+ "journalArticle" => "article",
127
+ "magazineArticle" => "article",
128
+ "newspaperArticle" => "article",
129
+ "thesis" => "phdthesis",
130
+ "manuscript" => "unpublished",
131
+ "patent" => "patent",
132
+ "letter" => "misc",
133
+ "interview" => "misc",
134
+ "film" => "misc",
135
+ "artwork" => "misc",
136
+ "webpage" => "misc",
137
+ "conferencePaper" => "inproceedings",
138
+ "report" => "techreport"
139
+ }
140
+ end
141
+
142
+ def export_field_map
143
+ @export_field_map ||= {
144
+ "attachments" => "file",
145
+ "extra" => "note",
146
+ "accessDate" => "urldate",
147
+ "reportNumber" => "number",
148
+ "seriesNumber" => "number",
149
+ "patentNumber" => "number",
150
+ "issue" => "number",
151
+ "place" => "address",
152
+ "section" => "chapter",
153
+ "rights" => "copyright",
154
+ "isbn" => "isbn",
155
+ "issn" => "issn",
156
+ "title" => "title",
157
+ "date" => "date",
158
+ "callNumber" => "iccn",
159
+ "archiveLocation" => "location",
160
+ "shortTitle" => "shorttitle",
161
+ "doi" => "doi",
162
+ "abstractNote" => "abstract",
163
+ "country" => "nationality",
164
+ "edition" => "edition",
165
+ "language" => "language",
166
+ "type" => "type",
167
+ "series" => "series",
168
+ "volume" => "volume",
169
+ "assignee" => "assignee"
170
+ }
171
+ end
172
+ end
173
+ end
174
+ end