relaton-bipm 1.14.1 → 1.14.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/README.adoc +10 -3
- data/lib/relaton_bipm/bipm_bibliography.rb +2 -1
- data/lib/relaton_bipm/bipm_si_brochure_parser.rb +3 -2
- data/lib/relaton_bipm/comment_periond.rb +1 -1
- data/lib/relaton_bipm/data_fetcher.rb +11 -3
- data/lib/relaton_bipm/data_outcomes_parser.rb +4 -6
- data/lib/relaton_bipm/processor.rb +5 -4
- data/lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb +301 -0
- data/lib/relaton_bipm/rawdata_bipm_metrologia/fetcher.rb +171 -0
- data/lib/relaton_bipm/version.rb +1 -1
- data/lib/relaton_bipm.rb +2 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: eba1bdf78beca2e7a36717565c644e97c59f1c83a9c3357967ae121321e8a7a2
|
4
|
+
data.tar.gz: 34abb5dc4b173daaaec433f928b98ab51c8009480b85d554e3f968c6e2b8bd44
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7bad79524f1203e310be0c2539862323ecd5caa3b971b4aed21bab5cae82f4bba786b35389659f2709e9047ab8c6da9cf4b297ff5c735db2585508d1fd3ed410
|
7
|
+
data.tar.gz: 865ee3c035f133d44c4f59b7ced2ab00d1d3a44375a808a2b87dbbe78be16fd33e2dee57f338f50249c4d0acccd7e2fbc3e18b582882af819b2d7ca829c0f4fd
|
data/.gitignore
CHANGED
data/README.adoc
CHANGED
@@ -189,9 +189,9 @@ item = RelatonBipm::BipmBibliography.get "BIPM SI Brochure"
|
|
189
189
|
...
|
190
190
|
|
191
191
|
# get BIPM Metrologia page
|
192
|
-
bib = RelatonBipm::BipmBibliography.get "BIPM Metrologia 29 6
|
193
|
-
[relaton-bipm] ("BIPM Metrologia 29 6
|
194
|
-
[relaton-bipm] ("BIPM Metrologia 29 6
|
192
|
+
bib = RelatonBipm::BipmBibliography.get "BIPM Metrologia 29 6 001"
|
193
|
+
[relaton-bipm] ("BIPM Metrologia 29 6 001") fetching...
|
194
|
+
[relaton-bipm] ("BIPM Metrologia 29 6 001") found Metrologia 29 6 001
|
195
195
|
=> #<RelatonBipm::BipmBibliographicItem:0x007f8857f94d40
|
196
196
|
...
|
197
197
|
|
@@ -321,6 +321,7 @@ RelatonBipm::BipmBibliographicItem.from_hash hash
|
|
321
321
|
This gem uses the following datasets as data sources:
|
322
322
|
- `bipm-data-outcomes` - looking for a local directory with the repository https://github.com/metanorma/bipm-data-outcomes
|
323
323
|
- `bipm-si-brochute` - looking for a local directory with the repository https://github.com/metanorma/bipm-si-brochure
|
324
|
+
- `rawdata-bipm-metrologia` - looking for a local directory with the repository https://github.com/relaton/rawdata-bipm-metrologia
|
324
325
|
|
325
326
|
The method `RelatonBipm::DataFetcher.fetch(source, output: "data", format: "yaml")` fetches all the documents from the dataset and saves them to the `./data` folder in YAML format.
|
326
327
|
Arguments:
|
@@ -342,6 +343,12 @@ Started at: 2022-06-23 09:37:12 +0200
|
|
342
343
|
Stopped at: 2022-06-23 09:37:12 +0200
|
343
344
|
Done in: 0 sec.
|
344
345
|
=> nil
|
346
|
+
|
347
|
+
RelatonBipm::DataFetcher.fetch "rawdata-bipm-metrologia"
|
348
|
+
Started at: 2022-06-23 09:39:12 +0200
|
349
|
+
Stopped at: 2022-06-23 09:40:34 +0200
|
350
|
+
Done in: 82 sec.
|
351
|
+
=> nil
|
345
352
|
----
|
346
353
|
|
347
354
|
== Development
|
@@ -18,7 +18,8 @@ module RelatonBipm
|
|
18
18
|
def search(text, _year = nil, _opts = {}) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
19
19
|
warn "[relaton-bipm] (\"#{text}\") fetching..."
|
20
20
|
ref = text.sub(/^BIPM\s/, "")
|
21
|
-
item = ref.match?(/^Metrologia/i) ? get_metrologia(ref, magent) : get_bipm(ref, magent)
|
21
|
+
# item = ref.match?(/^Metrologia/i) ? get_metrologia(ref, magent) : get_bipm(ref, magent)
|
22
|
+
item = get_bipm(ref, magent)
|
22
23
|
unless item
|
23
24
|
warn "[relaton-bipm] (\"#{text}\") not found."
|
24
25
|
return
|
@@ -33,8 +33,9 @@ module RelatonBipm
|
|
33
33
|
doc = docstd.at "/bibdata"
|
34
34
|
hash1 = RelatonBipm::XMLParser.from_xml(doc.to_xml).to_hash
|
35
35
|
fix_si_brochure_id hash1
|
36
|
-
|
37
|
-
|
36
|
+
basename = File.join @data_fetcher.output, File.basename(f).sub(/(?:-(?:en|fr))?\.rxl$/, "")
|
37
|
+
outfile = "#{basename}.#{@data_fetcher.ext}"
|
38
|
+
@data_fetcher.index[[hash1["docnumber"] || basename]] = outfile
|
38
39
|
hash = if File.exist? outfile
|
39
40
|
warn_duplicate = false
|
40
41
|
hash2 = YAML.load_file outfile
|
@@ -43,6 +43,7 @@ module RelatonBipm
|
|
43
43
|
case source
|
44
44
|
when "bipm-data-outcomes" then DataOutcomesParser.parse(self)
|
45
45
|
when "bipm-si-brochure" then BipmSiBrochureParser.parse(self)
|
46
|
+
when "rawdata-bipm-metrologia" then RawdataBipmMetrologia::Fetcher.fetch(self)
|
46
47
|
end
|
47
48
|
File.write @index_path, @index.to_yaml, encoding: "UTF-8"
|
48
49
|
end
|
@@ -54,15 +55,22 @@ module RelatonBipm
|
|
54
55
|
# @param [RelatonBipm::BipmBibliographicItem] item document to save
|
55
56
|
# @param [Boolean, nil] warn_duplicate Warn if document already exists
|
56
57
|
#
|
57
|
-
# @return [<Type>] <description>
|
58
|
-
#
|
59
58
|
def write_file(path, item, warn_duplicate: true)
|
59
|
+
content = serialize item
|
60
60
|
if @files.include?(path)
|
61
61
|
warn "File #{path} already exists" if warn_duplicate
|
62
62
|
else
|
63
63
|
@files << path
|
64
64
|
end
|
65
|
-
File.write path,
|
65
|
+
File.write path, content, encoding: "UTF-8"
|
66
|
+
end
|
67
|
+
|
68
|
+
def serialize(item)
|
69
|
+
case @format
|
70
|
+
when "xml" then item.to_xml bibdata: true
|
71
|
+
when "yaml" then item.to_hash.to_yaml
|
72
|
+
when "bibxml" then item.to_bibxml
|
73
|
+
end
|
66
74
|
end
|
67
75
|
end
|
68
76
|
end
|
@@ -82,7 +82,7 @@ module RelatonBipm
|
|
82
82
|
]
|
83
83
|
|
84
84
|
/^(?<num>\d+)(?:-_(?<part>\d+))?-\d{4}$/ =~ en_md["url"].split("/").last
|
85
|
-
file = "#{num}.
|
85
|
+
file = "#{num}.#{@data_fetcher.ext}"
|
86
86
|
path = File.join dir, file
|
87
87
|
hash = bibitem body: body, type: type, en: en_md, fr: fr_md, num: num, src: src, pdf: en["pdf"]
|
88
88
|
if @data_fetcher.files.include?(path) && part
|
@@ -92,13 +92,13 @@ module RelatonBipm
|
|
92
92
|
has_part_item = RelatonBipm::BipmBibliographicItem.from_hash(yaml)
|
93
93
|
has_part_item.relation << RelatonBib::DocumentRelation.new(type: "partOf", bibitem: item)
|
94
94
|
@data_fetcher.write_file path, has_part_item, warn_duplicate: false
|
95
|
-
path = File.join dir, "#{num}-#{part}.
|
95
|
+
path = File.join dir, "#{num}-#{part}.#{@data_fetcher.ext}"
|
96
96
|
elsif part
|
97
97
|
hash[:title].each { |t| t[:content] = t[:content].sub(/\s\(.+\)$/, "") }
|
98
98
|
h = bibitem body: body, type: type, en: en_md, fr: fr_md, num: num, src: src, pdf: en["pdf"]
|
99
99
|
add_part h, part
|
100
100
|
part_item = RelatonBipm::BipmBibliographicItem.new(**h)
|
101
|
-
part_item_path = File.join dir, "#{num}-#{part}.
|
101
|
+
part_item_path = File.join dir, "#{num}-#{part}.#{@data_fetcher.ext}"
|
102
102
|
@data_fetcher.write_file part_item_path, part_item
|
103
103
|
add_to_index part_item, part_item_path
|
104
104
|
hash[:relation] = [RelatonBib::DocumentRelation.new(type: "partOf", bibitem: part_item)]
|
@@ -146,9 +146,7 @@ module RelatonBipm
|
|
146
146
|
hash[:contributor] = contributors date, args[:body]
|
147
147
|
hash[:structuredidentifier] = RelatonBipm::StructuredIdentifier.new docnumber: num
|
148
148
|
item = RelatonBipm::BipmBibliographicItem.new(**hash)
|
149
|
-
file = year
|
150
|
-
file += "-#{num_justed}" # if num.size < 4
|
151
|
-
file += ".yaml"
|
149
|
+
file = "#{year}-#{num_justed}.#{@data_fetcher.ext}"
|
152
150
|
out_dir = File.join args[:dir], r["type"].downcase
|
153
151
|
FileUtils.mkdir_p out_dir
|
154
152
|
path = File.join out_dir, file
|
@@ -9,7 +9,7 @@ module RelatonBipm
|
|
9
9
|
@prefix = "BIPM"
|
10
10
|
@defaultprefix = %r{^(?:BIPM|CCTF|CCDS|CGPM|CIPM)(?!\w)}
|
11
11
|
@idtype = "BIPM"
|
12
|
-
@datasets = %w[bipm-data-outcomes bipm-si-brochure]
|
12
|
+
@datasets = %w[bipm-data-outcomes bipm-si-brochure rawdata-bipm-metrologia]
|
13
13
|
end
|
14
14
|
|
15
15
|
# @param code [String]
|
@@ -21,10 +21,11 @@ module RelatonBipm
|
|
21
21
|
end
|
22
22
|
|
23
23
|
#
|
24
|
-
# Fetch all the documents from https://github.com/metanorma/bipm-data-outcomes
|
25
|
-
#
|
24
|
+
# Fetch all the documents from https://github.com/metanorma/bipm-data-outcomes,
|
25
|
+
# https://github.com/metanorma/bipm-si-brochure, https://github.com/relaton/rawdata-bipm-metrologia
|
26
26
|
#
|
27
|
-
# @param [String] source source name
|
27
|
+
# @param [String] source source name (bipm-data-outcomes, bipm-si-brochure,
|
28
|
+
# rawdata-bipm-metrologia)
|
28
29
|
# @param [Hash] opts
|
29
30
|
# @option opts [String] :output directory to output documents
|
30
31
|
# @option opts [String] :format
|
@@ -0,0 +1,301 @@
|
|
1
|
+
module RelatonBipm
|
2
|
+
module RawdataBipmMetrologia
|
3
|
+
class ArticleParser
|
4
|
+
ATTRS = %i[docid title contributor date copyright abstract relation series
|
5
|
+
extent type doctype].freeze
|
6
|
+
#
|
7
|
+
# Create new parser and parse document
|
8
|
+
#
|
9
|
+
# @param [Nokogiri::XML::Element] doc document XML element
|
10
|
+
#
|
11
|
+
# @return [RelatonBipm::BipmBibliographicItem] document
|
12
|
+
#
|
13
|
+
def self.parse(doc)
|
14
|
+
new(doc).parse
|
15
|
+
end
|
16
|
+
|
17
|
+
#
|
18
|
+
# Initialize parser
|
19
|
+
#
|
20
|
+
# @param [Nokogiri::XML::Element] doc XML document
|
21
|
+
#
|
22
|
+
def initialize(doc)
|
23
|
+
@doc = doc
|
24
|
+
end
|
25
|
+
|
26
|
+
#
|
27
|
+
# Create new document
|
28
|
+
#
|
29
|
+
# @return [RelatonBipm::BipmBibliographicItem] document
|
30
|
+
#
|
31
|
+
def parse
|
32
|
+
attrs = ATTRS.to_h { |a| [a, send("parse_#{a}")] }
|
33
|
+
BipmBibliographicItem.new(**attrs)
|
34
|
+
end
|
35
|
+
|
36
|
+
#
|
37
|
+
# Parse docid
|
38
|
+
#
|
39
|
+
# @return [Array<RelatonBib::DocumentIdentifier>] array of document identifiers
|
40
|
+
#
|
41
|
+
def parse_docid
|
42
|
+
pubid = "#{journal_title} #{volume_issue_page.join(' ')}"
|
43
|
+
primary_id = create_docid pubid, "BIPM", true
|
44
|
+
@doc.xpath("./front/article-meta/article-id[@pub-id-type='doi']")
|
45
|
+
.each_with_object([primary_id]) do |id, m|
|
46
|
+
m << create_docid(id.text, id["pub-id-type"])
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
#
|
51
|
+
# Parse volume, issue and page
|
52
|
+
#
|
53
|
+
# @return [Array<String>] array of volume, issue and page
|
54
|
+
#
|
55
|
+
def volume_issue_page
|
56
|
+
@volume_issue_page ||= begin
|
57
|
+
volume = @doc.at("./front/article-meta/volume").text
|
58
|
+
issue = @doc.at("./front/article-meta/issue").text
|
59
|
+
# page = @doc.at("./front/article-meta/fpage")&.text || manuscript
|
60
|
+
[volume, issue, article]
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def article
|
65
|
+
@doc.at("./front/article-meta/article-id[@pub-id-type='manuscript']").text.match(/[^_]+$/).to_s
|
66
|
+
end
|
67
|
+
|
68
|
+
#
|
69
|
+
# Parse journal title
|
70
|
+
#
|
71
|
+
# @return [String] journal title
|
72
|
+
#
|
73
|
+
def journal_title
|
74
|
+
@doc.at("./front/journal-meta/journal-title-group/journal-title").text
|
75
|
+
end
|
76
|
+
|
77
|
+
#
|
78
|
+
# Create document identifier
|
79
|
+
#
|
80
|
+
# @param [String] id document id
|
81
|
+
# @param [String] type id type
|
82
|
+
# @param [Boolean, nil] primary is primary id
|
83
|
+
#
|
84
|
+
# @return [RelatonBib::DocumentIdentifier] document identifier
|
85
|
+
#
|
86
|
+
def create_docid(id, type, primary = nil)
|
87
|
+
RelatonBib::DocumentIdentifier.new id: id, type: type, primary: primary
|
88
|
+
end
|
89
|
+
|
90
|
+
#
|
91
|
+
# Parse title
|
92
|
+
#
|
93
|
+
# @return [Array<RelatonBib::TypedTitleString>] array of title strings
|
94
|
+
#
|
95
|
+
def parse_title
|
96
|
+
@doc.xpath("./front/article-meta/title-group/article-title").map do |t|
|
97
|
+
RelatonBib::TypedTitleString.new content: t.text, language: t[:"xml:lang"], script: "Latn"
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
#
|
102
|
+
# Parse contributor
|
103
|
+
#
|
104
|
+
# @return [Array<RelatonBib::Contributor>] array of contributors
|
105
|
+
#
|
106
|
+
def parse_contributor
|
107
|
+
@doc.xpath("./front/article-meta/contrib-group/contrib").map do |c|
|
108
|
+
entity = create_person(c) || create_organization(c)
|
109
|
+
RelatonBib::ContributionInfo.new(entity: entity, role: [type: c[:"contrib-type"]])
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def create_person(contrib)
|
114
|
+
name = contrib.at("./name")
|
115
|
+
return unless name
|
116
|
+
|
117
|
+
RelatonBib::Person.new name: fullname(name), affiliation: affiliation(contrib)
|
118
|
+
end
|
119
|
+
|
120
|
+
def create_organization(contrib)
|
121
|
+
RelatonBib::Organization.new name: contrib.at("./collab").text
|
122
|
+
end
|
123
|
+
|
124
|
+
#
|
125
|
+
# Parse affiliations
|
126
|
+
#
|
127
|
+
# @param [Nokogiri::XML::Element] contrib contributor element
|
128
|
+
#
|
129
|
+
# @return [Array<RelatonBib::Affiliation>] array of affiliations
|
130
|
+
#
|
131
|
+
def affiliation(contrib) # rubocop:disable Metrics/AbcSize
|
132
|
+
contrib.xpath("./xref[@ref-type='aff']").map do |x|
|
133
|
+
a = @doc.at("./front/article-meta/contrib-group/aff[@id='#{x[:rid]}']/label/following-sibling::node()")
|
134
|
+
parts = a.text.split(", ")
|
135
|
+
orgname = parts[0..-3].join(", ")
|
136
|
+
city, country = parts[-2..]
|
137
|
+
address = []
|
138
|
+
address << RelatonBib::Address.new(city: city, country: country) if city && country
|
139
|
+
org = RelatonBib::Organization.new name: orgname, contact: address
|
140
|
+
RelatonBib::Affiliation.new organization: org
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
#
|
145
|
+
# Create full name
|
146
|
+
#
|
147
|
+
# @param [Nokogiri::XML::Element] contrib contributor element
|
148
|
+
#
|
149
|
+
# @return [RelatonBib::FullName] full name
|
150
|
+
#
|
151
|
+
def fullname(name)
|
152
|
+
fname = forename name.at("./given-names")
|
153
|
+
sname = name.at("./surname").text
|
154
|
+
surname = RelatonBib::LocalizedString.new sname, "en", "Latn"
|
155
|
+
RelatonBib::FullName.new surname: surname, forename: fname
|
156
|
+
end
|
157
|
+
|
158
|
+
#
|
159
|
+
# Parse forename
|
160
|
+
#
|
161
|
+
# @param [String] given_name given name
|
162
|
+
#
|
163
|
+
# @return [Array<RelatonBib::Forename>] array of forenames
|
164
|
+
#
|
165
|
+
def forename(given_name) # rubocop:disable Metrics/MethodLength
|
166
|
+
return [] unless given_name
|
167
|
+
|
168
|
+
given_name.text.scan(/(\w+)(?:\s(\w)(?:\s|$))?/).map do |nm, int|
|
169
|
+
if nm.size == 1
|
170
|
+
name = nil
|
171
|
+
init = nm
|
172
|
+
else
|
173
|
+
name = nm
|
174
|
+
init = int
|
175
|
+
end
|
176
|
+
RelatonBib::Forename.new(content: name, language: ["en"], script: ["Latn"], initial: init)
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
#
|
181
|
+
# Parse date
|
182
|
+
#
|
183
|
+
# @return [Array<RelatonBib::BibliographicDate>] array of dates
|
184
|
+
#
|
185
|
+
def parse_date
|
186
|
+
on = dates.min
|
187
|
+
[RelatonBib::BibliographicDate.new(type: "published", on: on)]
|
188
|
+
end
|
189
|
+
|
190
|
+
#
|
191
|
+
# Parse date
|
192
|
+
#
|
193
|
+
# @yield [date, type] date and type
|
194
|
+
#
|
195
|
+
# @return [Array<String, Object>] string date or whatever block returns
|
196
|
+
#
|
197
|
+
def dates
|
198
|
+
@doc.xpath("./front/article-meta/pub-date").map do |d|
|
199
|
+
month = date_part(d, "month")
|
200
|
+
day = date_part(d, "day")
|
201
|
+
date = "#{d.at('./year').text}-#{month}-#{day}"
|
202
|
+
block_given? ? yield(date, d[:"pub-type"]) : date
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
def date_part(date, type)
|
207
|
+
part = date.at("./#{type}")&.text
|
208
|
+
return "01" if part.nil? || part.empty?
|
209
|
+
|
210
|
+
part.rjust(2, "0")
|
211
|
+
end
|
212
|
+
|
213
|
+
#
|
214
|
+
# Parse copyright
|
215
|
+
#
|
216
|
+
# @return [Array<RelatonBib::CopyrightAssociation>] array of copyright associations
|
217
|
+
#
|
218
|
+
def parse_copyright
|
219
|
+
@doc.xpath("./front/article-meta/permissions").each_with_object([]) do |l, m|
|
220
|
+
from = l.at("./copyright-year")
|
221
|
+
next unless from
|
222
|
+
|
223
|
+
owner = l.at("./copyright-statement").text.split(" & ").map do |c|
|
224
|
+
/(?<name>[A-z]+(?:\s[A-z]+)*)/ =~ c
|
225
|
+
org = RelatonBib::Organization.new name: name
|
226
|
+
RelatonBib::ContributionInfo.new(entity: org)
|
227
|
+
end
|
228
|
+
m << RelatonBib::CopyrightAssociation.new(owner: owner, from: from.text)
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
#
|
233
|
+
# Parse abstract
|
234
|
+
#
|
235
|
+
# @return [Array<RelatonBib::FormattedString>] array of abstracts
|
236
|
+
#
|
237
|
+
def parse_abstract
|
238
|
+
@doc.xpath("./front/article-meta/abstract").map do |a|
|
239
|
+
RelatonBib::FormattedString.new(
|
240
|
+
content: a.inner_html, language: a[:"xml:lang"], script: ["Latn"], format: "text/html",
|
241
|
+
)
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
#
|
246
|
+
# Parese relation
|
247
|
+
#
|
248
|
+
# @return [Array<RelatonBib::DocumentRelation>] array of document relations
|
249
|
+
#
|
250
|
+
def parse_relation
|
251
|
+
dates do |d, t|
|
252
|
+
RelatonBib::DocumentRelation.new(type: "hasManifestation", bibitem: bibitem(d, t))
|
253
|
+
end
|
254
|
+
end
|
255
|
+
|
256
|
+
#
|
257
|
+
# Create bibitem
|
258
|
+
#
|
259
|
+
# @param [String] date
|
260
|
+
# @param [String] type date type
|
261
|
+
#
|
262
|
+
# @return [RelatonBipm::BipmBibliographicItem] bibitem
|
263
|
+
#
|
264
|
+
def bibitem(date, type)
|
265
|
+
dt = RelatonBib::BibliographicDate.new(type: type, on: date)
|
266
|
+
carrier = type == "epub" ? "online" : "print"
|
267
|
+
medium = RelatonBib::Medium.new carrier: carrier
|
268
|
+
BipmBibliographicItem.new title: parse_title, date: [dt], medium: medium
|
269
|
+
end
|
270
|
+
|
271
|
+
#
|
272
|
+
# Parse series
|
273
|
+
#
|
274
|
+
# @return [Array<RelatonBib::Series>] array of series
|
275
|
+
#
|
276
|
+
def parse_series
|
277
|
+
title = RelatonBib::TypedTitleString.new(
|
278
|
+
content: journal_title, language: ["en"], script: ["Latn"],
|
279
|
+
)
|
280
|
+
[RelatonBib::Series.new(title: title)]
|
281
|
+
end
|
282
|
+
|
283
|
+
#
|
284
|
+
# Parse extent
|
285
|
+
#
|
286
|
+
# @return [Array<RelatonBib::Extent>] array of extents
|
287
|
+
#
|
288
|
+
def parse_extent
|
289
|
+
%w[volume issue page].map.with_index do |t, i|
|
290
|
+
RelatonBib::Locality.new t, volume_issue_page[i]
|
291
|
+
end
|
292
|
+
end
|
293
|
+
|
294
|
+
def parse_type
|
295
|
+
"article"
|
296
|
+
end
|
297
|
+
|
298
|
+
alias_method :parse_doctype, :parse_type
|
299
|
+
end
|
300
|
+
end
|
301
|
+
end
|
@@ -0,0 +1,171 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RelatonBipm
|
4
|
+
module RawdataBipmMetrologia
|
5
|
+
class Fetcher
|
6
|
+
DIR = "rawdata-bipm-metrologia/data/*content/0026-1394"
|
7
|
+
|
8
|
+
# @param data_fetcher [RelatonBipm::DataFetcher]
|
9
|
+
def self.fetch(data_fetcher)
|
10
|
+
new(data_fetcher).fetch
|
11
|
+
end
|
12
|
+
|
13
|
+
# @param data_fetcher [RelatonBipm::DataFetcher]
|
14
|
+
def initialize(data_fetcher)
|
15
|
+
@data_fetcher = data_fetcher
|
16
|
+
end
|
17
|
+
|
18
|
+
#
|
19
|
+
# Fetch documents from rawdata-bipm-metrologia and save to files
|
20
|
+
#
|
21
|
+
def fetch
|
22
|
+
fetch_metrologia
|
23
|
+
fetch_volumes
|
24
|
+
fetch_issues
|
25
|
+
fetch_articles
|
26
|
+
end
|
27
|
+
|
28
|
+
#
|
29
|
+
# Fetch articles from rawdata-bipm-metrologia and save to files
|
30
|
+
#
|
31
|
+
def fetch_articles # rubocop:disable Metrics/AbcSize
|
32
|
+
Dir["#{DIR}/**/*.xml"].each do |path|
|
33
|
+
doc = Nokogiri::XML File.read(path, encoding: "UTF-8")
|
34
|
+
item = ArticleParser.parse doc.at("/article")
|
35
|
+
file = "#{item.docidentifier.first.id.downcase.gsub(' ', '-')}.#{@data_fetcher.ext}"
|
36
|
+
out_path = File.join(@data_fetcher.output, file)
|
37
|
+
@data_fetcher.index[[item.docidentifier.first.id]] = out_path
|
38
|
+
@data_fetcher.write_file out_path, item
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
#
|
43
|
+
# Fetch volumes from rawdata-bipm-metrologia and save to files
|
44
|
+
#
|
45
|
+
def fetch_volumes
|
46
|
+
Dir["#{DIR}/*"].map { |path| path.split("/").last }.uniq.each do |volume|
|
47
|
+
fetch_metrologia volume
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
#
|
52
|
+
# Fetch issues from rawdata-bipm-metrologia and save to files
|
53
|
+
#
|
54
|
+
def fetch_issues
|
55
|
+
Dir["#{DIR}/*/*"].each do |path|
|
56
|
+
volume, issue = path.split("/").last(2)
|
57
|
+
fetch_metrologia volume, issue
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
#
|
62
|
+
# Fetch metrologia root document from rawdata-bipm-metrologia and save to a file
|
63
|
+
#
|
64
|
+
# @overload set(volume, issue)
|
65
|
+
# @param [String] volume volume number
|
66
|
+
# @param [String] issue issue number
|
67
|
+
# @overload set(volume)
|
68
|
+
# @param [String] volume volume number
|
69
|
+
#
|
70
|
+
def fetch_metrologia(*args)
|
71
|
+
id = identifier(*args)
|
72
|
+
item = BipmBibliographicItem.new(
|
73
|
+
type: "article", formattedref: formattedref(id), docid: docidentifier(id),
|
74
|
+
language: ["en"], script: ["Latn"], relation: relation(*args),
|
75
|
+
link: typed_uri(*args)
|
76
|
+
)
|
77
|
+
file = "#{id.downcase.gsub(' ', '-')}.#{@data_fetcher.ext}"
|
78
|
+
path = File.join(@data_fetcher.output, file)
|
79
|
+
@data_fetcher.index[[id]] = path
|
80
|
+
@data_fetcher.write_file path, item
|
81
|
+
end
|
82
|
+
|
83
|
+
#
|
84
|
+
# Create formattedref
|
85
|
+
#
|
86
|
+
# @param [String] id document identifier
|
87
|
+
#
|
88
|
+
# @return [RelatonBib::FormattedRef] formattedref
|
89
|
+
#
|
90
|
+
def formattedref(id)
|
91
|
+
RelatonBib::FormattedRef.new content: id, language: "en", script: "Latn"
|
92
|
+
end
|
93
|
+
|
94
|
+
#
|
95
|
+
# Create docidentifier
|
96
|
+
#
|
97
|
+
# @param [String] id document identifier
|
98
|
+
#
|
99
|
+
# @return [Array<RelatonBib::DocumentIdentifier>] docidentifier
|
100
|
+
#
|
101
|
+
def docidentifier(id)
|
102
|
+
[RelatonBib::DocumentIdentifier.new(id: id, type: "BIPM", primary: true)]
|
103
|
+
end
|
104
|
+
|
105
|
+
#
|
106
|
+
# Create identifier
|
107
|
+
#
|
108
|
+
# @overload set(volume, issue, article)
|
109
|
+
# @param [String] volume volume number
|
110
|
+
# @param [String] issue issue number
|
111
|
+
# @param [String] article article number
|
112
|
+
# @overload set(volume, issue)
|
113
|
+
# @param [String] volume volume number
|
114
|
+
# @param [String] issue issue number
|
115
|
+
# @overload set(volume)
|
116
|
+
# @param [String] volume volume number
|
117
|
+
#
|
118
|
+
# @return [String] document identifier
|
119
|
+
#
|
120
|
+
def identifier(*args)
|
121
|
+
["Metrologia", *id_parts(*args)].join(" ")
|
122
|
+
end
|
123
|
+
|
124
|
+
def id_parts(*args)
|
125
|
+
args.map { |p| p.match(/[^_]+$/).to_s }
|
126
|
+
end
|
127
|
+
|
128
|
+
#
|
129
|
+
# Fetch relations
|
130
|
+
#
|
131
|
+
# @param (see #fetch_metrologia)
|
132
|
+
#
|
133
|
+
# @return [Array<RelatonBib::DocumentRelation>] relations
|
134
|
+
#
|
135
|
+
def relation(*args)
|
136
|
+
dir = [DIR, *args].join("/")
|
137
|
+
Dir["#{dir}/*"].map do |path|
|
138
|
+
part = path.split("/").last
|
139
|
+
id = identifier(*args, part)
|
140
|
+
RelatonBib::DocumentRelation.new(type: "partOf", bibitem: rel_bibitem(id))
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
#
|
145
|
+
# Create relation bibitem
|
146
|
+
#
|
147
|
+
# @param [String] id document identifier
|
148
|
+
#
|
149
|
+
# @return [RelatonBipm::BipmBibliographicItem] bibitem
|
150
|
+
#
|
151
|
+
def rel_bibitem(id)
|
152
|
+
BipmBibliographicItem.new(
|
153
|
+
formattedref: formattedref(id), docid: docidentifier(id),
|
154
|
+
)
|
155
|
+
end
|
156
|
+
|
157
|
+
def typed_uri(*args)
|
158
|
+
[RelatonBib::TypedUri.new(type: "src", content: link(*args))]
|
159
|
+
end
|
160
|
+
|
161
|
+
def link(*args)
|
162
|
+
params = id_parts(*args).join("/")
|
163
|
+
case args.size
|
164
|
+
when 0 then "https://iopscience.iop.org/journal/0026-1394"
|
165
|
+
when 1 then "https://iopscience.iop.org/volume/0026-1394/#{params}"
|
166
|
+
when 2 then "https://iopscience.iop.org/issue/0026-1394/#{params}"
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
data/lib/relaton_bipm/version.rb
CHANGED
data/lib/relaton_bipm.rb
CHANGED
@@ -17,6 +17,8 @@ require "relaton_bipm/index"
|
|
17
17
|
require "relaton_bipm/data_fetcher"
|
18
18
|
require "relaton_bipm/data_outcomes_parser"
|
19
19
|
require "relaton_bipm/bipm_si_brochure_parser"
|
20
|
+
require "relaton_bipm/rawdata_bipm_metrologia/fetcher"
|
21
|
+
require "relaton_bipm/rawdata_bipm_metrologia/article_parser"
|
20
22
|
|
21
23
|
module RelatonBipm
|
22
24
|
class Error < StandardError; end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-bipm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.14.
|
4
|
+
version: 1.14.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-02-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: byebug
|
@@ -218,6 +218,8 @@ files:
|
|
218
218
|
- lib/relaton_bipm/hash_converter.rb
|
219
219
|
- lib/relaton_bipm/index.rb
|
220
220
|
- lib/relaton_bipm/processor.rb
|
221
|
+
- lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb
|
222
|
+
- lib/relaton_bipm/rawdata_bipm_metrologia/fetcher.rb
|
221
223
|
- lib/relaton_bipm/structured_identifier.rb
|
222
224
|
- lib/relaton_bipm/version.rb
|
223
225
|
- lib/relaton_bipm/workgroup.rb
|