relaton-bipm 1.14.1 → 1.14.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/Gemfile +6 -0
- data/README.adoc +32 -12
- data/grammars/basicdoc.rng +0 -1
- data/grammars/biblio.rng +12 -2
- data/lib/relaton_bipm/bipm_bibliography.rb +12 -310
- data/lib/relaton_bipm/bipm_si_brochure_parser.rb +8 -4
- data/lib/relaton_bipm/comment_periond.rb +1 -1
- data/lib/relaton_bipm/data_fetcher.rb +17 -5
- data/lib/relaton_bipm/data_outcomes_parser.rb +68 -29
- data/lib/relaton_bipm/id_parser.rb +134 -0
- data/lib/relaton_bipm/processor.rb +5 -4
- data/lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb +311 -0
- data/lib/relaton_bipm/rawdata_bipm_metrologia/fetcher.rb +176 -0
- data/lib/relaton_bipm/version.rb +1 -1
- data/lib/relaton_bipm.rb +5 -1
- data/relaton_bipm.gemspec +2 -6
- metadata +26 -80
- data/lib/relaton_bipm/index.rb +0 -68
@@ -7,13 +7,21 @@ module RelatonBipm
|
|
7
7
|
"Statement" => "DECL",
|
8
8
|
}.freeze
|
9
9
|
|
10
|
+
TRANSLATIONS = {
|
11
|
+
"Déclaration" => "Declaration",
|
12
|
+
"Réunion" => "Meeting",
|
13
|
+
"Recommandation" => "Recommendation",
|
14
|
+
"Résolution" => "Resolution",
|
15
|
+
"Décision" => "Decision",
|
16
|
+
}.freeze
|
17
|
+
|
10
18
|
#
|
11
19
|
# Create data-outcomes parser
|
12
20
|
#
|
13
21
|
# @param [RelatonBipm::DataFetcher] data_fetcher data fetcher
|
14
22
|
#
|
15
23
|
def initialize(data_fetcher)
|
16
|
-
@data_fetcher = data_fetcher
|
24
|
+
@data_fetcher = WeakRef.new data_fetcher
|
17
25
|
end
|
18
26
|
|
19
27
|
#
|
@@ -68,21 +76,11 @@ module RelatonBipm
|
|
68
76
|
# @param [String] dir output directory
|
69
77
|
#
|
70
78
|
def fetch_meeting(en_file, body, type, dir) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
71
|
-
en =
|
72
|
-
en_md = en
|
73
|
-
|
74
|
-
fr = RelatonBib.parse_yaml File.read(fr_file, encoding: "UTF-8"), [Date]
|
75
|
-
fr_md = fr["metadata"]
|
76
|
-
gh_src = "https://raw.githubusercontent.com/metanorma/bipm-data-outcomes/"
|
77
|
-
src_en = gh_src + en_file.split("/")[-3..].unshift("main").join("/")
|
78
|
-
src_fr = gh_src + fr_file.split("/")[-3..].unshift("main").join("/")
|
79
|
-
src = [
|
80
|
-
{ type: "src", content: src_en, language: "en", script: "Latn" },
|
81
|
-
{ type: "src", content: src_fr, language: "fr", script: "Latn" },
|
82
|
-
]
|
79
|
+
_, en, fr_file, fr = read_files en_file
|
80
|
+
en_md, fr_md, num, part = meeting_md en, fr
|
81
|
+
src = meeting_links en_file, fr_file
|
83
82
|
|
84
|
-
|
85
|
-
file = "#{num}.yaml"
|
83
|
+
file = "#{num}.#{@data_fetcher.ext}"
|
86
84
|
path = File.join dir, file
|
87
85
|
hash = bibitem body: body, type: type, en: en_md, fr: fr_md, num: num, src: src, pdf: en["pdf"]
|
88
86
|
if @data_fetcher.files.include?(path) && part
|
@@ -92,13 +90,13 @@ module RelatonBipm
|
|
92
90
|
has_part_item = RelatonBipm::BipmBibliographicItem.from_hash(yaml)
|
93
91
|
has_part_item.relation << RelatonBib::DocumentRelation.new(type: "partOf", bibitem: item)
|
94
92
|
@data_fetcher.write_file path, has_part_item, warn_duplicate: false
|
95
|
-
path = File.join dir, "#{num}-#{part}.
|
93
|
+
path = File.join dir, "#{num}-#{part}.#{@data_fetcher.ext}"
|
96
94
|
elsif part
|
97
95
|
hash[:title].each { |t| t[:content] = t[:content].sub(/\s\(.+\)$/, "") }
|
98
96
|
h = bibitem body: body, type: type, en: en_md, fr: fr_md, num: num, src: src, pdf: en["pdf"]
|
99
97
|
add_part h, part
|
100
98
|
part_item = RelatonBipm::BipmBibliographicItem.new(**h)
|
101
|
-
part_item_path = File.join dir, "#{num}-#{part}.
|
99
|
+
part_item_path = File.join dir, "#{num}-#{part}.#{@data_fetcher.ext}"
|
102
100
|
@data_fetcher.write_file part_item_path, part_item
|
103
101
|
add_to_index part_item, part_item_path
|
104
102
|
hash[:relation] = [RelatonBib::DocumentRelation.new(type: "partOf", bibitem: part_item)]
|
@@ -111,6 +109,40 @@ module RelatonBipm
|
|
111
109
|
fetch_resolution body: body, en: en, fr: fr, dir: dir, src: src, num: num
|
112
110
|
end
|
113
111
|
|
112
|
+
#
|
113
|
+
# Read English and French files
|
114
|
+
#
|
115
|
+
# @param [String] en_file Path to English file
|
116
|
+
#
|
117
|
+
# @return [Array<Hash, String, nil>] English / French metadata and file path
|
118
|
+
#
|
119
|
+
def read_files(en_file)
|
120
|
+
fr_file = en_file.sub "en", "fr"
|
121
|
+
[en_file, fr_file].map do |file|
|
122
|
+
if File.exist? file
|
123
|
+
data = RelatonBib.parse_yaml(File.read(file, encoding: "UTF-8"), [Date])
|
124
|
+
path = file
|
125
|
+
end
|
126
|
+
[path, data]
|
127
|
+
end.flatten
|
128
|
+
end
|
129
|
+
|
130
|
+
def meeting_md(eng, frn)
|
131
|
+
en_md = eng["metadata"]
|
132
|
+
num, part = en_md["identifier"].to_s.split("-")
|
133
|
+
[en_md, frn&.dig("metadata"), num, part]
|
134
|
+
end
|
135
|
+
|
136
|
+
def meeting_links(en_file, fr_file)
|
137
|
+
gh_src = "https://raw.githubusercontent.com/metanorma/bipm-data-outcomes/"
|
138
|
+
{ "en" => en_file, "fr" => fr_file }.map do |lang, file|
|
139
|
+
next unless file
|
140
|
+
|
141
|
+
src = gh_src + file.split("/")[-3..].unshift("main").join("/")
|
142
|
+
{ type: "src", content: src, language: lang, script: "Latn" }
|
143
|
+
end.compact
|
144
|
+
end
|
145
|
+
|
114
146
|
#
|
115
147
|
# Parse BIPM resolutions and write them to YAML files
|
116
148
|
#
|
@@ -146,9 +178,7 @@ module RelatonBipm
|
|
146
178
|
hash[:contributor] = contributors date, args[:body]
|
147
179
|
hash[:structuredidentifier] = RelatonBipm::StructuredIdentifier.new docnumber: num
|
148
180
|
item = RelatonBipm::BipmBibliographicItem.new(**hash)
|
149
|
-
file = year
|
150
|
-
file += "-#{num_justed}" # if num.size < 4
|
151
|
-
file += ".yaml"
|
181
|
+
file = "#{year}-#{num_justed}.#{@data_fetcher.ext}"
|
152
182
|
out_dir = File.join args[:dir], r["type"].downcase
|
153
183
|
FileUtils.mkdir_p out_dir
|
154
184
|
path = File.join out_dir, file
|
@@ -209,6 +239,9 @@ module RelatonBipm
|
|
209
239
|
end
|
210
240
|
key << item.docidentifier.detect { |i| i.language == "fr" }.id
|
211
241
|
@data_fetcher.index[key] = path
|
242
|
+
@data_fetcher.index_new.add_or_update key, path
|
243
|
+
key2 = Id.new(item.docnumber).normalized_hash
|
244
|
+
@data_fetcher.index2.add_or_update key2, path
|
212
245
|
end
|
213
246
|
|
214
247
|
#
|
@@ -344,8 +377,7 @@ module RelatonBipm
|
|
344
377
|
docnum = create_docnum args[:body], args[:type], args[:num], args[:en]["date"]
|
345
378
|
hash = { title: [], type: "proceedings", doctype: args[:type],
|
346
379
|
place: [RelatonBib::Place.new(city: "Paris")] }
|
347
|
-
hash[:title]
|
348
|
-
hash[:title] << create_title(args[:fr]["title"], "fr") if args[:fr]["title"]
|
380
|
+
hash[:title] = create_titles args.slice(:en, :fr)
|
349
381
|
hash[:date] = [{ type: "published", on: args[:en]["date"] }]
|
350
382
|
hash[:docid] = create_docids docnum
|
351
383
|
hash[:docnumber] = docnum # .sub(" --", "").sub(/\s\(\d{4}\)/, "")
|
@@ -358,6 +390,12 @@ module RelatonBipm
|
|
358
390
|
hash
|
359
391
|
end
|
360
392
|
|
393
|
+
def create_titles(data)
|
394
|
+
data.each_with_object([]) do |(lang, md), mem|
|
395
|
+
mem << create_title(md["title"], lang.to_s) if md && md["title"]
|
396
|
+
end
|
397
|
+
end
|
398
|
+
|
361
399
|
#
|
362
400
|
# Create links
|
363
401
|
#
|
@@ -366,12 +404,13 @@ module RelatonBipm
|
|
366
404
|
# @return [Array<Hash>] Array of links
|
367
405
|
#
|
368
406
|
def create_links(**args)
|
369
|
-
links = [
|
370
|
-
|
371
|
-
|
372
|
-
|
407
|
+
links = args.slice(:en, :fr).each_with_object([]) do |(lang, md), mem|
|
408
|
+
next unless md && md["url"]
|
409
|
+
|
410
|
+
mem << { type: "citation", content: md["url"], language: lang.to_s, script: "Latn" }
|
411
|
+
end
|
373
412
|
RelatonBib.array(args[:pdf]).each { |pdf| links << { type: "pdf", content: pdf } }
|
374
|
-
links += args[:src] if args[:src]
|
413
|
+
links += args[:src] if args[:src]
|
375
414
|
links
|
376
415
|
end
|
377
416
|
|
@@ -456,8 +495,8 @@ module RelatonBipm
|
|
456
495
|
# @return [RelatonBib::DocumentIdentifier] french document ID
|
457
496
|
#
|
458
497
|
def create_docid_fr(en_id)
|
459
|
-
tr =
|
460
|
-
id = en_id.sub
|
498
|
+
tr = TRANSLATIONS.detect { |_, v| en_id.include? v }
|
499
|
+
id = tr ? en_id.sub(tr[1], tr[0]) : en_id
|
461
500
|
make_docid(id: id, type: "BIPM", primary: true, language: "fr", script: "Latn")
|
462
501
|
end
|
463
502
|
|
@@ -0,0 +1,134 @@
|
|
1
|
+
module RelatonBipm
|
2
|
+
class Id
|
3
|
+
class Parser < Parslet::Parser
|
4
|
+
rule(:space) { match("\s").repeat(1) }
|
5
|
+
rule(:space?) { space.maybe }
|
6
|
+
rule(:comma) { str(",") >> space? }
|
7
|
+
rule(:lparen) { str("(") }
|
8
|
+
rule(:rparen) { str(")") }
|
9
|
+
rule(:slash) { str("/") }
|
10
|
+
|
11
|
+
rule(:delimeter) { str("--") >> space }
|
12
|
+
rule(:delimeter?) { delimeter.maybe }
|
13
|
+
|
14
|
+
rule(:lang) { comma >> match["A-Z"].repeat(2, 2).as(:lang) }
|
15
|
+
rule(:lang?) { lang.maybe }
|
16
|
+
|
17
|
+
rule(:number) { match["0-9-"].repeat(1).as(:number) >> space? }
|
18
|
+
rule(:number?) { number.maybe }
|
19
|
+
|
20
|
+
rule(:year) { match["0-9"].repeat(4, 4).as(:year) }
|
21
|
+
rule(:year_paren) { lparen >> year >> lang? >> rparen }
|
22
|
+
rule(:num_year) { number? >> year_paren }
|
23
|
+
rule(:year_num) { year >> str("-") >> number }
|
24
|
+
rule(:num_and_year) { num_year | year_num | number }
|
25
|
+
|
26
|
+
rule(:sect) { lparen >> match["IVX"].repeat >> rparen }
|
27
|
+
rule(:suff) { match["a-zA-Z-"].repeat(1) }
|
28
|
+
rule(:cgmp) { str("CGPM") }
|
29
|
+
rule(:cipm) { str("CIPM") >> (str(" MRA") | match["A-Z-"]).maybe }
|
30
|
+
rule(:cc) { str("CC") >> suff >> sect.maybe }
|
31
|
+
rule(:jc) { str("JC") >> suff }
|
32
|
+
rule(:cec) { str("CEC") }
|
33
|
+
rule(:wgms) { str("WG-MS") }
|
34
|
+
rule(:group) { (cgmp | cipm | cc | jc | cec | wgms).as(:group) }
|
35
|
+
|
36
|
+
rule(:type) { match["[:alpha:]"].repeat(1).as(:type) >> space }
|
37
|
+
|
38
|
+
rule(:type_group) { type >> group >> slash >> num_and_year }
|
39
|
+
rule(:group_type) { group >> space >> delimeter? >> type >> num_and_year }
|
40
|
+
rule(:outcome) { group_type | type_group }
|
41
|
+
|
42
|
+
rule(:append) { comma >> str("Appendix") >> space >> number }
|
43
|
+
rule(:brochure) { str("SI").as(:group) >> space >> str("Brochure").as(:type) >> append.maybe }
|
44
|
+
|
45
|
+
rule(:metrologia) { str("Metrologia").as(:group) >> (space >> match["a-zA-Z0-9\s"].repeat(1).as(:number)).maybe }
|
46
|
+
|
47
|
+
rule(:result) { outcome | brochure | metrologia }
|
48
|
+
|
49
|
+
root :result
|
50
|
+
end
|
51
|
+
|
52
|
+
TYPES = {
|
53
|
+
"Resolution" => "RES",
|
54
|
+
"Résolution" => "RES",
|
55
|
+
"Recommendation" => "REC",
|
56
|
+
"Recommandation" => "REC",
|
57
|
+
"Decision" => "DECN",
|
58
|
+
"Décision" => "DECN",
|
59
|
+
"Declaration" => "Déclaration",
|
60
|
+
"Réunion" => "Meeting",
|
61
|
+
}.freeze
|
62
|
+
|
63
|
+
# @return [Hash] the parsed id components
|
64
|
+
attr_accessor :id
|
65
|
+
|
66
|
+
#
|
67
|
+
# Create a new Id object
|
68
|
+
#
|
69
|
+
# @param [String] id id string
|
70
|
+
#
|
71
|
+
def initialize(id)
|
72
|
+
@id = Parser.new.parse(id)
|
73
|
+
rescue Parslet::ParseFailed => e
|
74
|
+
warn "[relaton-bipm] Incorrect reference: #{id}"
|
75
|
+
# warn "[relaton-bipm] #{e.parse_failure_cause.ascii_tree}"
|
76
|
+
raise RelatonBib::RequestError, e
|
77
|
+
end
|
78
|
+
|
79
|
+
#
|
80
|
+
# Compare two Id objects
|
81
|
+
#
|
82
|
+
# @param [RelatonBipm::Id, Hash] other the other Id object
|
83
|
+
#
|
84
|
+
# @return [Boolean] true if the two Id objects are equal
|
85
|
+
#
|
86
|
+
def ==(other)
|
87
|
+
other_hash = other.is_a?(Id) ? other.normalized_hash : other
|
88
|
+
hash = normalized_hash
|
89
|
+
hash.delete(:year) unless other_hash[:year]
|
90
|
+
other_hash.delete(:year) unless hash[:year]
|
91
|
+
hash.delete(:lang) unless other_hash[:lang]
|
92
|
+
other_hash.delete(:lang) unless hash[:lang]
|
93
|
+
hash == other_hash
|
94
|
+
end
|
95
|
+
|
96
|
+
#
|
97
|
+
# Transform ID parts.
|
98
|
+
# Traslate type into abbreviation, remove leading zeros from number
|
99
|
+
#
|
100
|
+
# @return [Hash] the normalized ID parts
|
101
|
+
#
|
102
|
+
def normalized_hash # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity
|
103
|
+
@normalized_hash ||= begin
|
104
|
+
hash = { group: id[:group].to_s.sub("CCDS", "CCTF") }
|
105
|
+
hash[:type] = normalized_type if id[:type]
|
106
|
+
norm_num = normalized_number
|
107
|
+
hash[:number] = norm_num unless norm_num.nil? || norm_num.empty?
|
108
|
+
hash[:year] = id[:year].to_s if id[:year]
|
109
|
+
hash[:lang] = id[:lang].to_s if id[:lang]
|
110
|
+
hash
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
#
|
115
|
+
# Translate type into abbreviation
|
116
|
+
#
|
117
|
+
# @return [String] the normalized type
|
118
|
+
#
|
119
|
+
def normalized_type
|
120
|
+
TYPES[id[:type].to_s] || id[:type].to_s
|
121
|
+
end
|
122
|
+
|
123
|
+
#
|
124
|
+
# Remove leading zeros from number
|
125
|
+
#
|
126
|
+
# @return [String, nil] the normalized number
|
127
|
+
#
|
128
|
+
def normalized_number
|
129
|
+
return unless id[:number]
|
130
|
+
|
131
|
+
id[:number].to_s.sub(/^0+/, "")
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
@@ -9,7 +9,7 @@ module RelatonBipm
|
|
9
9
|
@prefix = "BIPM"
|
10
10
|
@defaultprefix = %r{^(?:BIPM|CCTF|CCDS|CGPM|CIPM)(?!\w)}
|
11
11
|
@idtype = "BIPM"
|
12
|
-
@datasets = %w[bipm-data-outcomes bipm-si-brochure]
|
12
|
+
@datasets = %w[bipm-data-outcomes bipm-si-brochure rawdata-bipm-metrologia]
|
13
13
|
end
|
14
14
|
|
15
15
|
# @param code [String]
|
@@ -21,10 +21,11 @@ module RelatonBipm
|
|
21
21
|
end
|
22
22
|
|
23
23
|
#
|
24
|
-
# Fetch all the documents from https://github.com/metanorma/bipm-data-outcomes
|
25
|
-
#
|
24
|
+
# Fetch all the documents from https://github.com/metanorma/bipm-data-outcomes,
|
25
|
+
# https://github.com/metanorma/bipm-si-brochure, https://github.com/relaton/rawdata-bipm-metrologia
|
26
26
|
#
|
27
|
-
# @param [String] source source name
|
27
|
+
# @param [String] source source name (bipm-data-outcomes, bipm-si-brochure,
|
28
|
+
# rawdata-bipm-metrologia)
|
28
29
|
# @param [Hash] opts
|
29
30
|
# @option opts [String] :output directory to output documents
|
30
31
|
# @option opts [String] :format
|
@@ -0,0 +1,311 @@
|
|
1
|
+
module RelatonBipm
|
2
|
+
module RawdataBipmMetrologia
|
3
|
+
class ArticleParser
|
4
|
+
ATTRS = %i[docid title contributor date copyright abstract relation series
|
5
|
+
extent type doctype].freeze
|
6
|
+
#
|
7
|
+
# Create new parser and parse document
|
8
|
+
#
|
9
|
+
# @param [Nokogiri::XML::Element] doc document XML element
|
10
|
+
#
|
11
|
+
# @return [RelatonBipm::BipmBibliographicItem] document
|
12
|
+
#
|
13
|
+
def self.parse(doc)
|
14
|
+
new(doc).parse
|
15
|
+
end
|
16
|
+
|
17
|
+
#
|
18
|
+
# Initialize parser
|
19
|
+
#
|
20
|
+
# @param [Nokogiri::XML::Element] doc XML document
|
21
|
+
#
|
22
|
+
def initialize(doc)
|
23
|
+
@doc = doc
|
24
|
+
@meta = @doc.at("./front/article-meta")
|
25
|
+
end
|
26
|
+
|
27
|
+
#
|
28
|
+
# Create new document
|
29
|
+
#
|
30
|
+
# @return [RelatonBipm::BipmBibliographicItem] document
|
31
|
+
#
|
32
|
+
def parse
|
33
|
+
attrs = ATTRS.to_h { |a| [a, send("parse_#{a}")] }
|
34
|
+
BipmBibliographicItem.new(**attrs)
|
35
|
+
end
|
36
|
+
|
37
|
+
#
|
38
|
+
# Parse docid
|
39
|
+
#
|
40
|
+
# @return [Array<RelatonBib::DocumentIdentifier>] array of document identifiers
|
41
|
+
#
|
42
|
+
def parse_docid
|
43
|
+
pubid = "#{journal_title} #{volume_issue_article}"
|
44
|
+
primary_id = create_docid pubid, "BIPM", true
|
45
|
+
@meta.xpath("./article-id[@pub-id-type='doi']")
|
46
|
+
.each_with_object([primary_id]) do |id, m|
|
47
|
+
m << create_docid(id.text, id["pub-id-type"])
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
#
|
52
|
+
# Parse volume, issue and page
|
53
|
+
#
|
54
|
+
# @return [Array<String>] array of volume, issue and page
|
55
|
+
#
|
56
|
+
def volume_issue_article
|
57
|
+
volume = @meta.at("./volume").text
|
58
|
+
issue = @meta.at("./issue").text
|
59
|
+
# page = @doc.at("./front/article-meta/fpage")&.text || manuscript
|
60
|
+
[volume, issue, article].join(" ")
|
61
|
+
end
|
62
|
+
|
63
|
+
def article
|
64
|
+
@meta.at("./article-id[@pub-id-type='manuscript']").text.match(/[^_]+$/).to_s
|
65
|
+
end
|
66
|
+
|
67
|
+
#
|
68
|
+
# Parse journal title
|
69
|
+
#
|
70
|
+
# @return [String] journal title
|
71
|
+
#
|
72
|
+
def journal_title
|
73
|
+
@doc.at("./front/journal-meta/journal-title-group/journal-title").text
|
74
|
+
end
|
75
|
+
|
76
|
+
#
|
77
|
+
# Create document identifier
|
78
|
+
#
|
79
|
+
# @param [String] id document id
|
80
|
+
# @param [String] type id type
|
81
|
+
# @param [Boolean, nil] primary is primary id
|
82
|
+
#
|
83
|
+
# @return [RelatonBib::DocumentIdentifier] document identifier
|
84
|
+
#
|
85
|
+
def create_docid(id, type, primary = nil)
|
86
|
+
RelatonBib::DocumentIdentifier.new id: id, type: type, primary: primary
|
87
|
+
end
|
88
|
+
|
89
|
+
#
|
90
|
+
# Parse title
|
91
|
+
#
|
92
|
+
# @return [Array<RelatonBib::TypedTitleString>] array of title strings
|
93
|
+
#
|
94
|
+
def parse_title
|
95
|
+
@meta.xpath("./title-group/article-title").map do |t|
|
96
|
+
next if t.text.empty?
|
97
|
+
|
98
|
+
RelatonBib::TypedTitleString.new content: t.text, language: t[:"xml:lang"], script: "Latn"
|
99
|
+
end.compact
|
100
|
+
end
|
101
|
+
|
102
|
+
#
|
103
|
+
# Parse contributor
|
104
|
+
#
|
105
|
+
# @return [Array<RelatonBib::Contributor>] array of contributors
|
106
|
+
#
|
107
|
+
def parse_contributor
|
108
|
+
@meta.xpath("./contrib-group/contrib").map do |c|
|
109
|
+
entity = create_person(c) || create_organization(c)
|
110
|
+
RelatonBib::ContributionInfo.new(entity: entity, role: [type: c[:"contrib-type"]])
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def create_person(contrib)
|
115
|
+
name = contrib.at("./name")
|
116
|
+
return unless name
|
117
|
+
|
118
|
+
RelatonBib::Person.new name: fullname(name), affiliation: affiliation(contrib)
|
119
|
+
end
|
120
|
+
|
121
|
+
def create_organization(contrib)
|
122
|
+
RelatonBib::Organization.new name: contrib.at("./collab").text
|
123
|
+
end
|
124
|
+
|
125
|
+
#
|
126
|
+
# Parse affiliations
|
127
|
+
#
|
128
|
+
# @param [Nokogiri::XML::Element] contrib contributor element
|
129
|
+
#
|
130
|
+
# @return [Array<RelatonBib::Affiliation>] array of affiliations
|
131
|
+
#
|
132
|
+
def affiliation(contrib) # rubocop:disable Metrics/AbcSize
|
133
|
+
contrib.xpath("./xref[@ref-type='aff']").map do |x|
|
134
|
+
a = @meta.at("./contrib-group/aff[@id='#{x[:rid]}']/label/following-sibling::node()")
|
135
|
+
parts = a.text.split(", ")
|
136
|
+
orgname = parts[0..-3].join(", ")
|
137
|
+
city, country = parts[-2..]
|
138
|
+
address = []
|
139
|
+
address << RelatonBib::Address.new(city: city, country: country) if city && country
|
140
|
+
org = RelatonBib::Organization.new name: orgname, contact: address
|
141
|
+
RelatonBib::Affiliation.new organization: org
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
#
|
146
|
+
# Create full name
|
147
|
+
#
|
148
|
+
# @param [Nokogiri::XML::Element] contrib contributor element
|
149
|
+
#
|
150
|
+
# @return [RelatonBib::FullName] full name
|
151
|
+
#
|
152
|
+
def fullname(name)
|
153
|
+
fname = forename name.at("./given-names")
|
154
|
+
sname = name.at("./surname").text
|
155
|
+
surname = RelatonBib::LocalizedString.new sname, "en", "Latn"
|
156
|
+
RelatonBib::FullName.new surname: surname, forename: fname
|
157
|
+
end
|
158
|
+
|
159
|
+
#
|
160
|
+
# Parse forename
|
161
|
+
#
|
162
|
+
# @param [String] given_name given name
|
163
|
+
#
|
164
|
+
# @return [Array<RelatonBib::Forename>] array of forenames
|
165
|
+
#
|
166
|
+
def forename(given_name) # rubocop:disable Metrics/MethodLength
|
167
|
+
return [] unless given_name
|
168
|
+
|
169
|
+
given_name.text.scan(/(\w+)(?:\s(\w)(?:\s|$))?/).map do |nm, int|
|
170
|
+
if nm.size == 1
|
171
|
+
name = nil
|
172
|
+
init = nm
|
173
|
+
else
|
174
|
+
name = nm
|
175
|
+
init = int
|
176
|
+
end
|
177
|
+
RelatonBib::Forename.new(content: name, language: ["en"], script: ["Latn"], initial: init)
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
#
|
182
|
+
# Parse date
|
183
|
+
#
|
184
|
+
# @return [Array<RelatonBib::BibliographicDate>] array of dates
|
185
|
+
#
|
186
|
+
def parse_date
|
187
|
+
on = dates.min
|
188
|
+
[RelatonBib::BibliographicDate.new(type: "published", on: on)]
|
189
|
+
end
|
190
|
+
|
191
|
+
#
|
192
|
+
# Parse date
|
193
|
+
#
|
194
|
+
# @yield [date, type] date and type
|
195
|
+
#
|
196
|
+
# @return [Array<String, Object>] string date or whatever block returns
|
197
|
+
#
|
198
|
+
def dates
|
199
|
+
@meta.xpath("./pub-date").map do |d|
|
200
|
+
month = date_part(d, "month")
|
201
|
+
day = date_part(d, "day")
|
202
|
+
date = "#{d.at('./year').text}-#{month}-#{day}"
|
203
|
+
block_given? ? yield(date, d[:"pub-type"]) : date
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
def date_part(date, type)
|
208
|
+
part = date.at("./#{type}")&.text
|
209
|
+
return "01" if part.nil? || part.empty?
|
210
|
+
|
211
|
+
part.rjust(2, "0")
|
212
|
+
end
|
213
|
+
|
214
|
+
#
|
215
|
+
# Parse copyright
|
216
|
+
#
|
217
|
+
# @return [Array<RelatonBib::CopyrightAssociation>] array of copyright associations
|
218
|
+
#
|
219
|
+
def parse_copyright
|
220
|
+
@meta.xpath("./permissions").each_with_object([]) do |l, m|
|
221
|
+
from = l.at("./copyright-year")
|
222
|
+
next unless from
|
223
|
+
|
224
|
+
owner = l.at("./copyright-statement").text.split(" & ").map do |c|
|
225
|
+
/(?<name>[A-z]+(?:\s[A-z]+)*)/ =~ c
|
226
|
+
org = RelatonBib::Organization.new name: name
|
227
|
+
RelatonBib::ContributionInfo.new(entity: org)
|
228
|
+
end
|
229
|
+
m << RelatonBib::CopyrightAssociation.new(owner: owner, from: from.text)
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
#
|
234
|
+
# Parse abstract
|
235
|
+
#
|
236
|
+
# @return [Array<RelatonBib::FormattedString>] array of abstracts
|
237
|
+
#
|
238
|
+
def parse_abstract
|
239
|
+
@meta.xpath("./abstract").map do |a|
|
240
|
+
RelatonBib::FormattedString.new(
|
241
|
+
content: a.inner_html, language: a[:"xml:lang"], script: ["Latn"], format: "text/html",
|
242
|
+
)
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
#
|
247
|
+
# Parese relation
|
248
|
+
#
|
249
|
+
# @return [Array<RelatonBib::DocumentRelation>] array of document relations
|
250
|
+
#
|
251
|
+
def parse_relation
|
252
|
+
dates do |d, t|
|
253
|
+
RelatonBib::DocumentRelation.new(type: "hasManifestation", bibitem: bibitem(d, t))
|
254
|
+
end
|
255
|
+
end
|
256
|
+
|
257
|
+
#
|
258
|
+
# Create bibitem
|
259
|
+
#
|
260
|
+
# @param [String] date
|
261
|
+
# @param [String] type date type
|
262
|
+
#
|
263
|
+
# @return [RelatonBipm::BipmBibliographicItem] bibitem
|
264
|
+
#
|
265
|
+
def bibitem(date, type)
|
266
|
+
dt = RelatonBib::BibliographicDate.new(type: type, on: date)
|
267
|
+
carrier = type == "epub" ? "online" : "print"
|
268
|
+
medium = RelatonBib::Medium.new carrier: carrier
|
269
|
+
BipmBibliographicItem.new title: parse_title, date: [dt], medium: medium
|
270
|
+
end
|
271
|
+
|
272
|
+
#
|
273
|
+
# Parse series
|
274
|
+
#
|
275
|
+
# @return [Array<RelatonBib::Series>] array of series
|
276
|
+
#
|
277
|
+
def parse_series
|
278
|
+
title = RelatonBib::TypedTitleString.new(
|
279
|
+
content: journal_title, language: ["en"], script: ["Latn"],
|
280
|
+
)
|
281
|
+
[RelatonBib::Series.new(title: title)]
|
282
|
+
end
|
283
|
+
|
284
|
+
#
|
285
|
+
# Parse extent
|
286
|
+
#
|
287
|
+
# @return [Array<RelatonBib::Extent>] array of extents
|
288
|
+
#
|
289
|
+
def parse_extent
|
290
|
+
@meta.xpath("./volume|./issue|./fpage").map do |e|
|
291
|
+
if e.name == "fpage"
|
292
|
+
type = "page"
|
293
|
+
to = @meta.at("./lpage")&.text
|
294
|
+
else
|
295
|
+
type = e.name
|
296
|
+
end
|
297
|
+
RelatonBib::Locality.new type, e.text, to
|
298
|
+
end
|
299
|
+
# %w[volume issue page].map.with_index do |t, i|
|
300
|
+
# RelatonBib::Locality.new t, volume_issue_page[i]
|
301
|
+
# end
|
302
|
+
end
|
303
|
+
|
304
|
+
def parse_type
|
305
|
+
"article"
|
306
|
+
end
|
307
|
+
|
308
|
+
alias_method :parse_doctype, :parse_type
|
309
|
+
end
|
310
|
+
end
|
311
|
+
end
|