relaton-bipm 1.14.1 → 1.14.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/Gemfile +6 -0
- data/README.adoc +32 -12
- data/grammars/basicdoc.rng +0 -1
- data/grammars/biblio.rng +12 -2
- data/lib/relaton_bipm/bipm_bibliography.rb +12 -310
- data/lib/relaton_bipm/bipm_si_brochure_parser.rb +8 -4
- data/lib/relaton_bipm/comment_periond.rb +1 -1
- data/lib/relaton_bipm/data_fetcher.rb +17 -5
- data/lib/relaton_bipm/data_outcomes_parser.rb +68 -29
- data/lib/relaton_bipm/id_parser.rb +134 -0
- data/lib/relaton_bipm/processor.rb +5 -4
- data/lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb +311 -0
- data/lib/relaton_bipm/rawdata_bipm_metrologia/fetcher.rb +176 -0
- data/lib/relaton_bipm/version.rb +1 -1
- data/lib/relaton_bipm.rb +5 -1
- data/relaton_bipm.gemspec +2 -6
- metadata +26 -80
- data/lib/relaton_bipm/index.rb +0 -68
@@ -7,13 +7,21 @@ module RelatonBipm
|
|
7
7
|
"Statement" => "DECL",
|
8
8
|
}.freeze
|
9
9
|
|
10
|
+
TRANSLATIONS = {
|
11
|
+
"Déclaration" => "Declaration",
|
12
|
+
"Réunion" => "Meeting",
|
13
|
+
"Recommandation" => "Recommendation",
|
14
|
+
"Résolution" => "Resolution",
|
15
|
+
"Décision" => "Decision",
|
16
|
+
}.freeze
|
17
|
+
|
10
18
|
#
|
11
19
|
# Create data-outcomes parser
|
12
20
|
#
|
13
21
|
# @param [RelatonBipm::DataFetcher] data_fetcher data fetcher
|
14
22
|
#
|
15
23
|
def initialize(data_fetcher)
|
16
|
-
@data_fetcher = data_fetcher
|
24
|
+
@data_fetcher = WeakRef.new data_fetcher
|
17
25
|
end
|
18
26
|
|
19
27
|
#
|
@@ -68,21 +76,11 @@ module RelatonBipm
|
|
68
76
|
# @param [String] dir output directory
|
69
77
|
#
|
70
78
|
def fetch_meeting(en_file, body, type, dir) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
71
|
-
en =
|
72
|
-
en_md = en
|
73
|
-
|
74
|
-
fr = RelatonBib.parse_yaml File.read(fr_file, encoding: "UTF-8"), [Date]
|
75
|
-
fr_md = fr["metadata"]
|
76
|
-
gh_src = "https://raw.githubusercontent.com/metanorma/bipm-data-outcomes/"
|
77
|
-
src_en = gh_src + en_file.split("/")[-3..].unshift("main").join("/")
|
78
|
-
src_fr = gh_src + fr_file.split("/")[-3..].unshift("main").join("/")
|
79
|
-
src = [
|
80
|
-
{ type: "src", content: src_en, language: "en", script: "Latn" },
|
81
|
-
{ type: "src", content: src_fr, language: "fr", script: "Latn" },
|
82
|
-
]
|
79
|
+
_, en, fr_file, fr = read_files en_file
|
80
|
+
en_md, fr_md, num, part = meeting_md en, fr
|
81
|
+
src = meeting_links en_file, fr_file
|
83
82
|
|
84
|
-
|
85
|
-
file = "#{num}.yaml"
|
83
|
+
file = "#{num}.#{@data_fetcher.ext}"
|
86
84
|
path = File.join dir, file
|
87
85
|
hash = bibitem body: body, type: type, en: en_md, fr: fr_md, num: num, src: src, pdf: en["pdf"]
|
88
86
|
if @data_fetcher.files.include?(path) && part
|
@@ -92,13 +90,13 @@ module RelatonBipm
|
|
92
90
|
has_part_item = RelatonBipm::BipmBibliographicItem.from_hash(yaml)
|
93
91
|
has_part_item.relation << RelatonBib::DocumentRelation.new(type: "partOf", bibitem: item)
|
94
92
|
@data_fetcher.write_file path, has_part_item, warn_duplicate: false
|
95
|
-
path = File.join dir, "#{num}-#{part}.
|
93
|
+
path = File.join dir, "#{num}-#{part}.#{@data_fetcher.ext}"
|
96
94
|
elsif part
|
97
95
|
hash[:title].each { |t| t[:content] = t[:content].sub(/\s\(.+\)$/, "") }
|
98
96
|
h = bibitem body: body, type: type, en: en_md, fr: fr_md, num: num, src: src, pdf: en["pdf"]
|
99
97
|
add_part h, part
|
100
98
|
part_item = RelatonBipm::BipmBibliographicItem.new(**h)
|
101
|
-
part_item_path = File.join dir, "#{num}-#{part}.
|
99
|
+
part_item_path = File.join dir, "#{num}-#{part}.#{@data_fetcher.ext}"
|
102
100
|
@data_fetcher.write_file part_item_path, part_item
|
103
101
|
add_to_index part_item, part_item_path
|
104
102
|
hash[:relation] = [RelatonBib::DocumentRelation.new(type: "partOf", bibitem: part_item)]
|
@@ -111,6 +109,40 @@ module RelatonBipm
|
|
111
109
|
fetch_resolution body: body, en: en, fr: fr, dir: dir, src: src, num: num
|
112
110
|
end
|
113
111
|
|
112
|
+
#
|
113
|
+
# Read English and French files
|
114
|
+
#
|
115
|
+
# @param [String] en_file Path to English file
|
116
|
+
#
|
117
|
+
# @return [Array<Hash, String, nil>] English / French metadata and file path
|
118
|
+
#
|
119
|
+
def read_files(en_file)
|
120
|
+
fr_file = en_file.sub "en", "fr"
|
121
|
+
[en_file, fr_file].map do |file|
|
122
|
+
if File.exist? file
|
123
|
+
data = RelatonBib.parse_yaml(File.read(file, encoding: "UTF-8"), [Date])
|
124
|
+
path = file
|
125
|
+
end
|
126
|
+
[path, data]
|
127
|
+
end.flatten
|
128
|
+
end
|
129
|
+
|
130
|
+
def meeting_md(eng, frn)
|
131
|
+
en_md = eng["metadata"]
|
132
|
+
num, part = en_md["identifier"].to_s.split("-")
|
133
|
+
[en_md, frn&.dig("metadata"), num, part]
|
134
|
+
end
|
135
|
+
|
136
|
+
def meeting_links(en_file, fr_file)
|
137
|
+
gh_src = "https://raw.githubusercontent.com/metanorma/bipm-data-outcomes/"
|
138
|
+
{ "en" => en_file, "fr" => fr_file }.map do |lang, file|
|
139
|
+
next unless file
|
140
|
+
|
141
|
+
src = gh_src + file.split("/")[-3..].unshift("main").join("/")
|
142
|
+
{ type: "src", content: src, language: lang, script: "Latn" }
|
143
|
+
end.compact
|
144
|
+
end
|
145
|
+
|
114
146
|
#
|
115
147
|
# Parse BIPM resolutions and write them to YAML files
|
116
148
|
#
|
@@ -146,9 +178,7 @@ module RelatonBipm
|
|
146
178
|
hash[:contributor] = contributors date, args[:body]
|
147
179
|
hash[:structuredidentifier] = RelatonBipm::StructuredIdentifier.new docnumber: num
|
148
180
|
item = RelatonBipm::BipmBibliographicItem.new(**hash)
|
149
|
-
file = year
|
150
|
-
file += "-#{num_justed}" # if num.size < 4
|
151
|
-
file += ".yaml"
|
181
|
+
file = "#{year}-#{num_justed}.#{@data_fetcher.ext}"
|
152
182
|
out_dir = File.join args[:dir], r["type"].downcase
|
153
183
|
FileUtils.mkdir_p out_dir
|
154
184
|
path = File.join out_dir, file
|
@@ -209,6 +239,9 @@ module RelatonBipm
|
|
209
239
|
end
|
210
240
|
key << item.docidentifier.detect { |i| i.language == "fr" }.id
|
211
241
|
@data_fetcher.index[key] = path
|
242
|
+
@data_fetcher.index_new.add_or_update key, path
|
243
|
+
key2 = Id.new(item.docnumber).normalized_hash
|
244
|
+
@data_fetcher.index2.add_or_update key2, path
|
212
245
|
end
|
213
246
|
|
214
247
|
#
|
@@ -344,8 +377,7 @@ module RelatonBipm
|
|
344
377
|
docnum = create_docnum args[:body], args[:type], args[:num], args[:en]["date"]
|
345
378
|
hash = { title: [], type: "proceedings", doctype: args[:type],
|
346
379
|
place: [RelatonBib::Place.new(city: "Paris")] }
|
347
|
-
hash[:title]
|
348
|
-
hash[:title] << create_title(args[:fr]["title"], "fr") if args[:fr]["title"]
|
380
|
+
hash[:title] = create_titles args.slice(:en, :fr)
|
349
381
|
hash[:date] = [{ type: "published", on: args[:en]["date"] }]
|
350
382
|
hash[:docid] = create_docids docnum
|
351
383
|
hash[:docnumber] = docnum # .sub(" --", "").sub(/\s\(\d{4}\)/, "")
|
@@ -358,6 +390,12 @@ module RelatonBipm
|
|
358
390
|
hash
|
359
391
|
end
|
360
392
|
|
393
|
+
def create_titles(data)
|
394
|
+
data.each_with_object([]) do |(lang, md), mem|
|
395
|
+
mem << create_title(md["title"], lang.to_s) if md && md["title"]
|
396
|
+
end
|
397
|
+
end
|
398
|
+
|
361
399
|
#
|
362
400
|
# Create links
|
363
401
|
#
|
@@ -366,12 +404,13 @@ module RelatonBipm
|
|
366
404
|
# @return [Array<Hash>] Array of links
|
367
405
|
#
|
368
406
|
def create_links(**args)
|
369
|
-
links = [
|
370
|
-
|
371
|
-
|
372
|
-
|
407
|
+
links = args.slice(:en, :fr).each_with_object([]) do |(lang, md), mem|
|
408
|
+
next unless md && md["url"]
|
409
|
+
|
410
|
+
mem << { type: "citation", content: md["url"], language: lang.to_s, script: "Latn" }
|
411
|
+
end
|
373
412
|
RelatonBib.array(args[:pdf]).each { |pdf| links << { type: "pdf", content: pdf } }
|
374
|
-
links += args[:src] if args[:src]
|
413
|
+
links += args[:src] if args[:src]
|
375
414
|
links
|
376
415
|
end
|
377
416
|
|
@@ -456,8 +495,8 @@ module RelatonBipm
|
|
456
495
|
# @return [RelatonBib::DocumentIdentifier] french document ID
|
457
496
|
#
|
458
497
|
def create_docid_fr(en_id)
|
459
|
-
tr =
|
460
|
-
id = en_id.sub
|
498
|
+
tr = TRANSLATIONS.detect { |_, v| en_id.include? v }
|
499
|
+
id = tr ? en_id.sub(tr[1], tr[0]) : en_id
|
461
500
|
make_docid(id: id, type: "BIPM", primary: true, language: "fr", script: "Latn")
|
462
501
|
end
|
463
502
|
|
@@ -0,0 +1,134 @@
|
|
1
|
+
module RelatonBipm
|
2
|
+
class Id
|
3
|
+
class Parser < Parslet::Parser
|
4
|
+
rule(:space) { match("\s").repeat(1) }
|
5
|
+
rule(:space?) { space.maybe }
|
6
|
+
rule(:comma) { str(",") >> space? }
|
7
|
+
rule(:lparen) { str("(") }
|
8
|
+
rule(:rparen) { str(")") }
|
9
|
+
rule(:slash) { str("/") }
|
10
|
+
|
11
|
+
rule(:delimeter) { str("--") >> space }
|
12
|
+
rule(:delimeter?) { delimeter.maybe }
|
13
|
+
|
14
|
+
rule(:lang) { comma >> match["A-Z"].repeat(2, 2).as(:lang) }
|
15
|
+
rule(:lang?) { lang.maybe }
|
16
|
+
|
17
|
+
rule(:number) { match["0-9-"].repeat(1).as(:number) >> space? }
|
18
|
+
rule(:number?) { number.maybe }
|
19
|
+
|
20
|
+
rule(:year) { match["0-9"].repeat(4, 4).as(:year) }
|
21
|
+
rule(:year_paren) { lparen >> year >> lang? >> rparen }
|
22
|
+
rule(:num_year) { number? >> year_paren }
|
23
|
+
rule(:year_num) { year >> str("-") >> number }
|
24
|
+
rule(:num_and_year) { num_year | year_num | number }
|
25
|
+
|
26
|
+
rule(:sect) { lparen >> match["IVX"].repeat >> rparen }
|
27
|
+
rule(:suff) { match["a-zA-Z-"].repeat(1) }
|
28
|
+
rule(:cgmp) { str("CGPM") }
|
29
|
+
rule(:cipm) { str("CIPM") >> (str(" MRA") | match["A-Z-"]).maybe }
|
30
|
+
rule(:cc) { str("CC") >> suff >> sect.maybe }
|
31
|
+
rule(:jc) { str("JC") >> suff }
|
32
|
+
rule(:cec) { str("CEC") }
|
33
|
+
rule(:wgms) { str("WG-MS") }
|
34
|
+
rule(:group) { (cgmp | cipm | cc | jc | cec | wgms).as(:group) }
|
35
|
+
|
36
|
+
rule(:type) { match["[:alpha:]"].repeat(1).as(:type) >> space }
|
37
|
+
|
38
|
+
rule(:type_group) { type >> group >> slash >> num_and_year }
|
39
|
+
rule(:group_type) { group >> space >> delimeter? >> type >> num_and_year }
|
40
|
+
rule(:outcome) { group_type | type_group }
|
41
|
+
|
42
|
+
rule(:append) { comma >> str("Appendix") >> space >> number }
|
43
|
+
rule(:brochure) { str("SI").as(:group) >> space >> str("Brochure").as(:type) >> append.maybe }
|
44
|
+
|
45
|
+
rule(:metrologia) { str("Metrologia").as(:group) >> (space >> match["a-zA-Z0-9\s"].repeat(1).as(:number)).maybe }
|
46
|
+
|
47
|
+
rule(:result) { outcome | brochure | metrologia }
|
48
|
+
|
49
|
+
root :result
|
50
|
+
end
|
51
|
+
|
52
|
+
TYPES = {
|
53
|
+
"Resolution" => "RES",
|
54
|
+
"Résolution" => "RES",
|
55
|
+
"Recommendation" => "REC",
|
56
|
+
"Recommandation" => "REC",
|
57
|
+
"Decision" => "DECN",
|
58
|
+
"Décision" => "DECN",
|
59
|
+
"Declaration" => "Déclaration",
|
60
|
+
"Réunion" => "Meeting",
|
61
|
+
}.freeze
|
62
|
+
|
63
|
+
# @return [Hash] the parsed id components
|
64
|
+
attr_accessor :id
|
65
|
+
|
66
|
+
#
|
67
|
+
# Create a new Id object
|
68
|
+
#
|
69
|
+
# @param [String] id id string
|
70
|
+
#
|
71
|
+
def initialize(id)
|
72
|
+
@id = Parser.new.parse(id)
|
73
|
+
rescue Parslet::ParseFailed => e
|
74
|
+
warn "[relaton-bipm] Incorrect reference: #{id}"
|
75
|
+
# warn "[relaton-bipm] #{e.parse_failure_cause.ascii_tree}"
|
76
|
+
raise RelatonBib::RequestError, e
|
77
|
+
end
|
78
|
+
|
79
|
+
#
|
80
|
+
# Compare two Id objects
|
81
|
+
#
|
82
|
+
# @param [RelatonBipm::Id, Hash] other the other Id object
|
83
|
+
#
|
84
|
+
# @return [Boolean] true if the two Id objects are equal
|
85
|
+
#
|
86
|
+
def ==(other)
|
87
|
+
other_hash = other.is_a?(Id) ? other.normalized_hash : other
|
88
|
+
hash = normalized_hash
|
89
|
+
hash.delete(:year) unless other_hash[:year]
|
90
|
+
other_hash.delete(:year) unless hash[:year]
|
91
|
+
hash.delete(:lang) unless other_hash[:lang]
|
92
|
+
other_hash.delete(:lang) unless hash[:lang]
|
93
|
+
hash == other_hash
|
94
|
+
end
|
95
|
+
|
96
|
+
#
|
97
|
+
# Transform ID parts.
|
98
|
+
# Traslate type into abbreviation, remove leading zeros from number
|
99
|
+
#
|
100
|
+
# @return [Hash] the normalized ID parts
|
101
|
+
#
|
102
|
+
def normalized_hash # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity
|
103
|
+
@normalized_hash ||= begin
|
104
|
+
hash = { group: id[:group].to_s.sub("CCDS", "CCTF") }
|
105
|
+
hash[:type] = normalized_type if id[:type]
|
106
|
+
norm_num = normalized_number
|
107
|
+
hash[:number] = norm_num unless norm_num.nil? || norm_num.empty?
|
108
|
+
hash[:year] = id[:year].to_s if id[:year]
|
109
|
+
hash[:lang] = id[:lang].to_s if id[:lang]
|
110
|
+
hash
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
#
|
115
|
+
# Translate type into abbreviation
|
116
|
+
#
|
117
|
+
# @return [String] the normalized type
|
118
|
+
#
|
119
|
+
def normalized_type
|
120
|
+
TYPES[id[:type].to_s] || id[:type].to_s
|
121
|
+
end
|
122
|
+
|
123
|
+
#
|
124
|
+
# Remove leading zeros from number
|
125
|
+
#
|
126
|
+
# @return [String, nil] the normalized number
|
127
|
+
#
|
128
|
+
def normalized_number
|
129
|
+
return unless id[:number]
|
130
|
+
|
131
|
+
id[:number].to_s.sub(/^0+/, "")
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
@@ -9,7 +9,7 @@ module RelatonBipm
|
|
9
9
|
@prefix = "BIPM"
|
10
10
|
@defaultprefix = %r{^(?:BIPM|CCTF|CCDS|CGPM|CIPM)(?!\w)}
|
11
11
|
@idtype = "BIPM"
|
12
|
-
@datasets = %w[bipm-data-outcomes bipm-si-brochure]
|
12
|
+
@datasets = %w[bipm-data-outcomes bipm-si-brochure rawdata-bipm-metrologia]
|
13
13
|
end
|
14
14
|
|
15
15
|
# @param code [String]
|
@@ -21,10 +21,11 @@ module RelatonBipm
|
|
21
21
|
end
|
22
22
|
|
23
23
|
#
|
24
|
-
# Fetch all the documents from https://github.com/metanorma/bipm-data-outcomes
|
25
|
-
#
|
24
|
+
# Fetch all the documents from https://github.com/metanorma/bipm-data-outcomes,
|
25
|
+
# https://github.com/metanorma/bipm-si-brochure, https://github.com/relaton/rawdata-bipm-metrologia
|
26
26
|
#
|
27
|
-
# @param [String] source source name
|
27
|
+
# @param [String] source source name (bipm-data-outcomes, bipm-si-brochure,
|
28
|
+
# rawdata-bipm-metrologia)
|
28
29
|
# @param [Hash] opts
|
29
30
|
# @option opts [String] :output directory to output documents
|
30
31
|
# @option opts [String] :format
|
@@ -0,0 +1,311 @@
|
|
1
|
+
module RelatonBipm
|
2
|
+
module RawdataBipmMetrologia
|
3
|
+
class ArticleParser
|
4
|
+
ATTRS = %i[docid title contributor date copyright abstract relation series
|
5
|
+
extent type doctype].freeze
|
6
|
+
#
|
7
|
+
# Create new parser and parse document
|
8
|
+
#
|
9
|
+
# @param [Nokogiri::XML::Element] doc document XML element
|
10
|
+
#
|
11
|
+
# @return [RelatonBipm::BipmBibliographicItem] document
|
12
|
+
#
|
13
|
+
def self.parse(doc)
|
14
|
+
new(doc).parse
|
15
|
+
end
|
16
|
+
|
17
|
+
#
|
18
|
+
# Initialize parser
|
19
|
+
#
|
20
|
+
# @param [Nokogiri::XML::Element] doc XML document
|
21
|
+
#
|
22
|
+
def initialize(doc)
|
23
|
+
@doc = doc
|
24
|
+
@meta = @doc.at("./front/article-meta")
|
25
|
+
end
|
26
|
+
|
27
|
+
#
|
28
|
+
# Create new document
|
29
|
+
#
|
30
|
+
# @return [RelatonBipm::BipmBibliographicItem] document
|
31
|
+
#
|
32
|
+
def parse
|
33
|
+
attrs = ATTRS.to_h { |a| [a, send("parse_#{a}")] }
|
34
|
+
BipmBibliographicItem.new(**attrs)
|
35
|
+
end
|
36
|
+
|
37
|
+
#
|
38
|
+
# Parse docid
|
39
|
+
#
|
40
|
+
# @return [Array<RelatonBib::DocumentIdentifier>] array of document identifiers
|
41
|
+
#
|
42
|
+
def parse_docid
|
43
|
+
pubid = "#{journal_title} #{volume_issue_article}"
|
44
|
+
primary_id = create_docid pubid, "BIPM", true
|
45
|
+
@meta.xpath("./article-id[@pub-id-type='doi']")
|
46
|
+
.each_with_object([primary_id]) do |id, m|
|
47
|
+
m << create_docid(id.text, id["pub-id-type"])
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
#
|
52
|
+
# Parse volume, issue and page
|
53
|
+
#
|
54
|
+
# @return [Array<String>] array of volume, issue and page
|
55
|
+
#
|
56
|
+
def volume_issue_article
|
57
|
+
volume = @meta.at("./volume").text
|
58
|
+
issue = @meta.at("./issue").text
|
59
|
+
# page = @doc.at("./front/article-meta/fpage")&.text || manuscript
|
60
|
+
[volume, issue, article].join(" ")
|
61
|
+
end
|
62
|
+
|
63
|
+
def article
|
64
|
+
@meta.at("./article-id[@pub-id-type='manuscript']").text.match(/[^_]+$/).to_s
|
65
|
+
end
|
66
|
+
|
67
|
+
#
|
68
|
+
# Parse journal title
|
69
|
+
#
|
70
|
+
# @return [String] journal title
|
71
|
+
#
|
72
|
+
def journal_title
|
73
|
+
@doc.at("./front/journal-meta/journal-title-group/journal-title").text
|
74
|
+
end
|
75
|
+
|
76
|
+
#
|
77
|
+
# Create document identifier
|
78
|
+
#
|
79
|
+
# @param [String] id document id
|
80
|
+
# @param [String] type id type
|
81
|
+
# @param [Boolean, nil] primary is primary id
|
82
|
+
#
|
83
|
+
# @return [RelatonBib::DocumentIdentifier] document identifier
|
84
|
+
#
|
85
|
+
def create_docid(id, type, primary = nil)
|
86
|
+
RelatonBib::DocumentIdentifier.new id: id, type: type, primary: primary
|
87
|
+
end
|
88
|
+
|
89
|
+
#
|
90
|
+
# Parse title
|
91
|
+
#
|
92
|
+
# @return [Array<RelatonBib::TypedTitleString>] array of title strings
|
93
|
+
#
|
94
|
+
def parse_title
|
95
|
+
@meta.xpath("./title-group/article-title").map do |t|
|
96
|
+
next if t.text.empty?
|
97
|
+
|
98
|
+
RelatonBib::TypedTitleString.new content: t.text, language: t[:"xml:lang"], script: "Latn"
|
99
|
+
end.compact
|
100
|
+
end
|
101
|
+
|
102
|
+
#
|
103
|
+
# Parse contributor
|
104
|
+
#
|
105
|
+
# @return [Array<RelatonBib::Contributor>] array of contributors
|
106
|
+
#
|
107
|
+
def parse_contributor
|
108
|
+
@meta.xpath("./contrib-group/contrib").map do |c|
|
109
|
+
entity = create_person(c) || create_organization(c)
|
110
|
+
RelatonBib::ContributionInfo.new(entity: entity, role: [type: c[:"contrib-type"]])
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def create_person(contrib)
|
115
|
+
name = contrib.at("./name")
|
116
|
+
return unless name
|
117
|
+
|
118
|
+
RelatonBib::Person.new name: fullname(name), affiliation: affiliation(contrib)
|
119
|
+
end
|
120
|
+
|
121
|
+
def create_organization(contrib)
|
122
|
+
RelatonBib::Organization.new name: contrib.at("./collab").text
|
123
|
+
end
|
124
|
+
|
125
|
+
#
|
126
|
+
# Parse affiliations
|
127
|
+
#
|
128
|
+
# @param [Nokogiri::XML::Element] contrib contributor element
|
129
|
+
#
|
130
|
+
# @return [Array<RelatonBib::Affiliation>] array of affiliations
|
131
|
+
#
|
132
|
+
def affiliation(contrib) # rubocop:disable Metrics/AbcSize
|
133
|
+
contrib.xpath("./xref[@ref-type='aff']").map do |x|
|
134
|
+
a = @meta.at("./contrib-group/aff[@id='#{x[:rid]}']/label/following-sibling::node()")
|
135
|
+
parts = a.text.split(", ")
|
136
|
+
orgname = parts[0..-3].join(", ")
|
137
|
+
city, country = parts[-2..]
|
138
|
+
address = []
|
139
|
+
address << RelatonBib::Address.new(city: city, country: country) if city && country
|
140
|
+
org = RelatonBib::Organization.new name: orgname, contact: address
|
141
|
+
RelatonBib::Affiliation.new organization: org
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
#
|
146
|
+
# Create full name
|
147
|
+
#
|
148
|
+
# @param [Nokogiri::XML::Element] contrib contributor element
|
149
|
+
#
|
150
|
+
# @return [RelatonBib::FullName] full name
|
151
|
+
#
|
152
|
+
def fullname(name)
|
153
|
+
fname = forename name.at("./given-names")
|
154
|
+
sname = name.at("./surname").text
|
155
|
+
surname = RelatonBib::LocalizedString.new sname, "en", "Latn"
|
156
|
+
RelatonBib::FullName.new surname: surname, forename: fname
|
157
|
+
end
|
158
|
+
|
159
|
+
#
|
160
|
+
# Parse forename
|
161
|
+
#
|
162
|
+
# @param [String] given_name given name
|
163
|
+
#
|
164
|
+
# @return [Array<RelatonBib::Forename>] array of forenames
|
165
|
+
#
|
166
|
+
def forename(given_name) # rubocop:disable Metrics/MethodLength
|
167
|
+
return [] unless given_name
|
168
|
+
|
169
|
+
given_name.text.scan(/(\w+)(?:\s(\w)(?:\s|$))?/).map do |nm, int|
|
170
|
+
if nm.size == 1
|
171
|
+
name = nil
|
172
|
+
init = nm
|
173
|
+
else
|
174
|
+
name = nm
|
175
|
+
init = int
|
176
|
+
end
|
177
|
+
RelatonBib::Forename.new(content: name, language: ["en"], script: ["Latn"], initial: init)
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
#
|
182
|
+
# Parse date
|
183
|
+
#
|
184
|
+
# @return [Array<RelatonBib::BibliographicDate>] array of dates
|
185
|
+
#
|
186
|
+
def parse_date
|
187
|
+
on = dates.min
|
188
|
+
[RelatonBib::BibliographicDate.new(type: "published", on: on)]
|
189
|
+
end
|
190
|
+
|
191
|
+
#
|
192
|
+
# Parse date
|
193
|
+
#
|
194
|
+
# @yield [date, type] date and type
|
195
|
+
#
|
196
|
+
# @return [Array<String, Object>] string date or whatever block returns
|
197
|
+
#
|
198
|
+
def dates
|
199
|
+
@meta.xpath("./pub-date").map do |d|
|
200
|
+
month = date_part(d, "month")
|
201
|
+
day = date_part(d, "day")
|
202
|
+
date = "#{d.at('./year').text}-#{month}-#{day}"
|
203
|
+
block_given? ? yield(date, d[:"pub-type"]) : date
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
def date_part(date, type)
|
208
|
+
part = date.at("./#{type}")&.text
|
209
|
+
return "01" if part.nil? || part.empty?
|
210
|
+
|
211
|
+
part.rjust(2, "0")
|
212
|
+
end
|
213
|
+
|
214
|
+
#
|
215
|
+
# Parse copyright
|
216
|
+
#
|
217
|
+
# @return [Array<RelatonBib::CopyrightAssociation>] array of copyright associations
|
218
|
+
#
|
219
|
+
def parse_copyright
|
220
|
+
@meta.xpath("./permissions").each_with_object([]) do |l, m|
|
221
|
+
from = l.at("./copyright-year")
|
222
|
+
next unless from
|
223
|
+
|
224
|
+
owner = l.at("./copyright-statement").text.split(" & ").map do |c|
|
225
|
+
/(?<name>[A-z]+(?:\s[A-z]+)*)/ =~ c
|
226
|
+
org = RelatonBib::Organization.new name: name
|
227
|
+
RelatonBib::ContributionInfo.new(entity: org)
|
228
|
+
end
|
229
|
+
m << RelatonBib::CopyrightAssociation.new(owner: owner, from: from.text)
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
#
|
234
|
+
# Parse abstract
|
235
|
+
#
|
236
|
+
# @return [Array<RelatonBib::FormattedString>] array of abstracts
|
237
|
+
#
|
238
|
+
def parse_abstract
|
239
|
+
@meta.xpath("./abstract").map do |a|
|
240
|
+
RelatonBib::FormattedString.new(
|
241
|
+
content: a.inner_html, language: a[:"xml:lang"], script: ["Latn"], format: "text/html",
|
242
|
+
)
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
#
|
247
|
+
# Parese relation
|
248
|
+
#
|
249
|
+
# @return [Array<RelatonBib::DocumentRelation>] array of document relations
|
250
|
+
#
|
251
|
+
def parse_relation
|
252
|
+
dates do |d, t|
|
253
|
+
RelatonBib::DocumentRelation.new(type: "hasManifestation", bibitem: bibitem(d, t))
|
254
|
+
end
|
255
|
+
end
|
256
|
+
|
257
|
+
#
|
258
|
+
# Create bibitem
|
259
|
+
#
|
260
|
+
# @param [String] date
|
261
|
+
# @param [String] type date type
|
262
|
+
#
|
263
|
+
# @return [RelatonBipm::BipmBibliographicItem] bibitem
|
264
|
+
#
|
265
|
+
def bibitem(date, type)
|
266
|
+
dt = RelatonBib::BibliographicDate.new(type: type, on: date)
|
267
|
+
carrier = type == "epub" ? "online" : "print"
|
268
|
+
medium = RelatonBib::Medium.new carrier: carrier
|
269
|
+
BipmBibliographicItem.new title: parse_title, date: [dt], medium: medium
|
270
|
+
end
|
271
|
+
|
272
|
+
#
|
273
|
+
# Parse series
|
274
|
+
#
|
275
|
+
# @return [Array<RelatonBib::Series>] array of series
|
276
|
+
#
|
277
|
+
def parse_series
|
278
|
+
title = RelatonBib::TypedTitleString.new(
|
279
|
+
content: journal_title, language: ["en"], script: ["Latn"],
|
280
|
+
)
|
281
|
+
[RelatonBib::Series.new(title: title)]
|
282
|
+
end
|
283
|
+
|
284
|
+
#
|
285
|
+
# Parse extent
|
286
|
+
#
|
287
|
+
# @return [Array<RelatonBib::Extent>] array of extents
|
288
|
+
#
|
289
|
+
def parse_extent
|
290
|
+
@meta.xpath("./volume|./issue|./fpage").map do |e|
|
291
|
+
if e.name == "fpage"
|
292
|
+
type = "page"
|
293
|
+
to = @meta.at("./lpage")&.text
|
294
|
+
else
|
295
|
+
type = e.name
|
296
|
+
end
|
297
|
+
RelatonBib::Locality.new type, e.text, to
|
298
|
+
end
|
299
|
+
# %w[volume issue page].map.with_index do |t, i|
|
300
|
+
# RelatonBib::Locality.new t, volume_issue_page[i]
|
301
|
+
# end
|
302
|
+
end
|
303
|
+
|
304
|
+
def parse_type
|
305
|
+
"article"
|
306
|
+
end
|
307
|
+
|
308
|
+
alias_method :parse_doctype, :parse_type
|
309
|
+
end
|
310
|
+
end
|
311
|
+
end
|