relaton-bipm 1.14.1 → 1.14.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/README.adoc +10 -3
- data/lib/relaton_bipm/bipm_bibliography.rb +2 -1
- data/lib/relaton_bipm/bipm_si_brochure_parser.rb +3 -2
- data/lib/relaton_bipm/comment_periond.rb +1 -1
- data/lib/relaton_bipm/data_fetcher.rb +11 -3
- data/lib/relaton_bipm/data_outcomes_parser.rb +4 -6
- data/lib/relaton_bipm/processor.rb +5 -4
- data/lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb +301 -0
- data/lib/relaton_bipm/rawdata_bipm_metrologia/fetcher.rb +171 -0
- data/lib/relaton_bipm/version.rb +1 -1
- data/lib/relaton_bipm.rb +2 -0
- metadata +4 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: eba1bdf78beca2e7a36717565c644e97c59f1c83a9c3357967ae121321e8a7a2
|
|
4
|
+
data.tar.gz: 34abb5dc4b173daaaec433f928b98ab51c8009480b85d554e3f968c6e2b8bd44
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7bad79524f1203e310be0c2539862323ecd5caa3b971b4aed21bab5cae82f4bba786b35389659f2709e9047ab8c6da9cf4b297ff5c735db2585508d1fd3ed410
|
|
7
|
+
data.tar.gz: 865ee3c035f133d44c4f59b7ced2ab00d1d3a44375a808a2b87dbbe78be16fd33e2dee57f338f50249c4d0acccd7e2fbc3e18b582882af819b2d7ca829c0f4fd
|
data/.gitignore
CHANGED
data/README.adoc
CHANGED
|
@@ -189,9 +189,9 @@ item = RelatonBipm::BipmBibliography.get "BIPM SI Brochure"
|
|
|
189
189
|
...
|
|
190
190
|
|
|
191
191
|
# get BIPM Metrologia page
|
|
192
|
-
bib = RelatonBipm::BipmBibliography.get "BIPM Metrologia 29 6
|
|
193
|
-
[relaton-bipm] ("BIPM Metrologia 29 6
|
|
194
|
-
[relaton-bipm] ("BIPM Metrologia 29 6
|
|
192
|
+
bib = RelatonBipm::BipmBibliography.get "BIPM Metrologia 29 6 001"
|
|
193
|
+
[relaton-bipm] ("BIPM Metrologia 29 6 001") fetching...
|
|
194
|
+
[relaton-bipm] ("BIPM Metrologia 29 6 001") found Metrologia 29 6 001
|
|
195
195
|
=> #<RelatonBipm::BipmBibliographicItem:0x007f8857f94d40
|
|
196
196
|
...
|
|
197
197
|
|
|
@@ -321,6 +321,7 @@ RelatonBipm::BipmBibliographicItem.from_hash hash
|
|
|
321
321
|
This gem uses the following datasets as data sources:
|
|
322
322
|
- `bipm-data-outcomes` - looking for a local directory with the repository https://github.com/metanorma/bipm-data-outcomes
|
|
323
323
|
- `bipm-si-brochute` - looking for a local directory with the repository https://github.com/metanorma/bipm-si-brochure
|
|
324
|
+
- `rawdata-bipm-metrologia` - looking for a local directory with the repository https://github.com/relaton/rawdata-bipm-metrologia
|
|
324
325
|
|
|
325
326
|
The method `RelatonBipm::DataFetcher.fetch(source, output: "data", format: "yaml")` fetches all the documents from the dataset and saves them to the `./data` folder in YAML format.
|
|
326
327
|
Arguments:
|
|
@@ -342,6 +343,12 @@ Started at: 2022-06-23 09:37:12 +0200
|
|
|
342
343
|
Stopped at: 2022-06-23 09:37:12 +0200
|
|
343
344
|
Done in: 0 sec.
|
|
344
345
|
=> nil
|
|
346
|
+
|
|
347
|
+
RelatonBipm::DataFetcher.fetch "rawdata-bipm-metrologia"
|
|
348
|
+
Started at: 2022-06-23 09:39:12 +0200
|
|
349
|
+
Stopped at: 2022-06-23 09:40:34 +0200
|
|
350
|
+
Done in: 82 sec.
|
|
351
|
+
=> nil
|
|
345
352
|
----
|
|
346
353
|
|
|
347
354
|
== Development
|
|
@@ -18,7 +18,8 @@ module RelatonBipm
|
|
|
18
18
|
def search(text, _year = nil, _opts = {}) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
|
19
19
|
warn "[relaton-bipm] (\"#{text}\") fetching..."
|
|
20
20
|
ref = text.sub(/^BIPM\s/, "")
|
|
21
|
-
item = ref.match?(/^Metrologia/i) ? get_metrologia(ref, magent) : get_bipm(ref, magent)
|
|
21
|
+
# item = ref.match?(/^Metrologia/i) ? get_metrologia(ref, magent) : get_bipm(ref, magent)
|
|
22
|
+
item = get_bipm(ref, magent)
|
|
22
23
|
unless item
|
|
23
24
|
warn "[relaton-bipm] (\"#{text}\") not found."
|
|
24
25
|
return
|
|
@@ -33,8 +33,9 @@ module RelatonBipm
|
|
|
33
33
|
doc = docstd.at "/bibdata"
|
|
34
34
|
hash1 = RelatonBipm::XMLParser.from_xml(doc.to_xml).to_hash
|
|
35
35
|
fix_si_brochure_id hash1
|
|
36
|
-
|
|
37
|
-
|
|
36
|
+
basename = File.join @data_fetcher.output, File.basename(f).sub(/(?:-(?:en|fr))?\.rxl$/, "")
|
|
37
|
+
outfile = "#{basename}.#{@data_fetcher.ext}"
|
|
38
|
+
@data_fetcher.index[[hash1["docnumber"] || basename]] = outfile
|
|
38
39
|
hash = if File.exist? outfile
|
|
39
40
|
warn_duplicate = false
|
|
40
41
|
hash2 = YAML.load_file outfile
|
|
@@ -43,6 +43,7 @@ module RelatonBipm
|
|
|
43
43
|
case source
|
|
44
44
|
when "bipm-data-outcomes" then DataOutcomesParser.parse(self)
|
|
45
45
|
when "bipm-si-brochure" then BipmSiBrochureParser.parse(self)
|
|
46
|
+
when "rawdata-bipm-metrologia" then RawdataBipmMetrologia::Fetcher.fetch(self)
|
|
46
47
|
end
|
|
47
48
|
File.write @index_path, @index.to_yaml, encoding: "UTF-8"
|
|
48
49
|
end
|
|
@@ -54,15 +55,22 @@ module RelatonBipm
|
|
|
54
55
|
# @param [RelatonBipm::BipmBibliographicItem] item document to save
|
|
55
56
|
# @param [Boolean, nil] warn_duplicate Warn if document already exists
|
|
56
57
|
#
|
|
57
|
-
# @return [<Type>] <description>
|
|
58
|
-
#
|
|
59
58
|
def write_file(path, item, warn_duplicate: true)
|
|
59
|
+
content = serialize item
|
|
60
60
|
if @files.include?(path)
|
|
61
61
|
warn "File #{path} already exists" if warn_duplicate
|
|
62
62
|
else
|
|
63
63
|
@files << path
|
|
64
64
|
end
|
|
65
|
-
File.write path,
|
|
65
|
+
File.write path, content, encoding: "UTF-8"
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def serialize(item)
|
|
69
|
+
case @format
|
|
70
|
+
when "xml" then item.to_xml bibdata: true
|
|
71
|
+
when "yaml" then item.to_hash.to_yaml
|
|
72
|
+
when "bibxml" then item.to_bibxml
|
|
73
|
+
end
|
|
66
74
|
end
|
|
67
75
|
end
|
|
68
76
|
end
|
|
@@ -82,7 +82,7 @@ module RelatonBipm
|
|
|
82
82
|
]
|
|
83
83
|
|
|
84
84
|
/^(?<num>\d+)(?:-_(?<part>\d+))?-\d{4}$/ =~ en_md["url"].split("/").last
|
|
85
|
-
file = "#{num}.
|
|
85
|
+
file = "#{num}.#{@data_fetcher.ext}"
|
|
86
86
|
path = File.join dir, file
|
|
87
87
|
hash = bibitem body: body, type: type, en: en_md, fr: fr_md, num: num, src: src, pdf: en["pdf"]
|
|
88
88
|
if @data_fetcher.files.include?(path) && part
|
|
@@ -92,13 +92,13 @@ module RelatonBipm
|
|
|
92
92
|
has_part_item = RelatonBipm::BipmBibliographicItem.from_hash(yaml)
|
|
93
93
|
has_part_item.relation << RelatonBib::DocumentRelation.new(type: "partOf", bibitem: item)
|
|
94
94
|
@data_fetcher.write_file path, has_part_item, warn_duplicate: false
|
|
95
|
-
path = File.join dir, "#{num}-#{part}.
|
|
95
|
+
path = File.join dir, "#{num}-#{part}.#{@data_fetcher.ext}"
|
|
96
96
|
elsif part
|
|
97
97
|
hash[:title].each { |t| t[:content] = t[:content].sub(/\s\(.+\)$/, "") }
|
|
98
98
|
h = bibitem body: body, type: type, en: en_md, fr: fr_md, num: num, src: src, pdf: en["pdf"]
|
|
99
99
|
add_part h, part
|
|
100
100
|
part_item = RelatonBipm::BipmBibliographicItem.new(**h)
|
|
101
|
-
part_item_path = File.join dir, "#{num}-#{part}.
|
|
101
|
+
part_item_path = File.join dir, "#{num}-#{part}.#{@data_fetcher.ext}"
|
|
102
102
|
@data_fetcher.write_file part_item_path, part_item
|
|
103
103
|
add_to_index part_item, part_item_path
|
|
104
104
|
hash[:relation] = [RelatonBib::DocumentRelation.new(type: "partOf", bibitem: part_item)]
|
|
@@ -146,9 +146,7 @@ module RelatonBipm
|
|
|
146
146
|
hash[:contributor] = contributors date, args[:body]
|
|
147
147
|
hash[:structuredidentifier] = RelatonBipm::StructuredIdentifier.new docnumber: num
|
|
148
148
|
item = RelatonBipm::BipmBibliographicItem.new(**hash)
|
|
149
|
-
file = year
|
|
150
|
-
file += "-#{num_justed}" # if num.size < 4
|
|
151
|
-
file += ".yaml"
|
|
149
|
+
file = "#{year}-#{num_justed}.#{@data_fetcher.ext}"
|
|
152
150
|
out_dir = File.join args[:dir], r["type"].downcase
|
|
153
151
|
FileUtils.mkdir_p out_dir
|
|
154
152
|
path = File.join out_dir, file
|
|
@@ -9,7 +9,7 @@ module RelatonBipm
|
|
|
9
9
|
@prefix = "BIPM"
|
|
10
10
|
@defaultprefix = %r{^(?:BIPM|CCTF|CCDS|CGPM|CIPM)(?!\w)}
|
|
11
11
|
@idtype = "BIPM"
|
|
12
|
-
@datasets = %w[bipm-data-outcomes bipm-si-brochure]
|
|
12
|
+
@datasets = %w[bipm-data-outcomes bipm-si-brochure rawdata-bipm-metrologia]
|
|
13
13
|
end
|
|
14
14
|
|
|
15
15
|
# @param code [String]
|
|
@@ -21,10 +21,11 @@ module RelatonBipm
|
|
|
21
21
|
end
|
|
22
22
|
|
|
23
23
|
#
|
|
24
|
-
# Fetch all the documents from https://github.com/metanorma/bipm-data-outcomes
|
|
25
|
-
#
|
|
24
|
+
# Fetch all the documents from https://github.com/metanorma/bipm-data-outcomes,
|
|
25
|
+
# https://github.com/metanorma/bipm-si-brochure, https://github.com/relaton/rawdata-bipm-metrologia
|
|
26
26
|
#
|
|
27
|
-
# @param [String] source source name
|
|
27
|
+
# @param [String] source source name (bipm-data-outcomes, bipm-si-brochure,
|
|
28
|
+
# rawdata-bipm-metrologia)
|
|
28
29
|
# @param [Hash] opts
|
|
29
30
|
# @option opts [String] :output directory to output documents
|
|
30
31
|
# @option opts [String] :format
|
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
module RelatonBipm
|
|
2
|
+
module RawdataBipmMetrologia
|
|
3
|
+
class ArticleParser
|
|
4
|
+
ATTRS = %i[docid title contributor date copyright abstract relation series
|
|
5
|
+
extent type doctype].freeze
|
|
6
|
+
#
|
|
7
|
+
# Create new parser and parse document
|
|
8
|
+
#
|
|
9
|
+
# @param [Nokogiri::XML::Element] doc document XML element
|
|
10
|
+
#
|
|
11
|
+
# @return [RelatonBipm::BipmBibliographicItem] document
|
|
12
|
+
#
|
|
13
|
+
def self.parse(doc)
|
|
14
|
+
new(doc).parse
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
#
|
|
18
|
+
# Initialize parser
|
|
19
|
+
#
|
|
20
|
+
# @param [Nokogiri::XML::Element] doc XML document
|
|
21
|
+
#
|
|
22
|
+
def initialize(doc)
|
|
23
|
+
@doc = doc
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
#
|
|
27
|
+
# Create new document
|
|
28
|
+
#
|
|
29
|
+
# @return [RelatonBipm::BipmBibliographicItem] document
|
|
30
|
+
#
|
|
31
|
+
def parse
|
|
32
|
+
attrs = ATTRS.to_h { |a| [a, send("parse_#{a}")] }
|
|
33
|
+
BipmBibliographicItem.new(**attrs)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
#
|
|
37
|
+
# Parse docid
|
|
38
|
+
#
|
|
39
|
+
# @return [Array<RelatonBib::DocumentIdentifier>] array of document identifiers
|
|
40
|
+
#
|
|
41
|
+
def parse_docid
|
|
42
|
+
pubid = "#{journal_title} #{volume_issue_page.join(' ')}"
|
|
43
|
+
primary_id = create_docid pubid, "BIPM", true
|
|
44
|
+
@doc.xpath("./front/article-meta/article-id[@pub-id-type='doi']")
|
|
45
|
+
.each_with_object([primary_id]) do |id, m|
|
|
46
|
+
m << create_docid(id.text, id["pub-id-type"])
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
#
|
|
51
|
+
# Parse volume, issue and page
|
|
52
|
+
#
|
|
53
|
+
# @return [Array<String>] array of volume, issue and page
|
|
54
|
+
#
|
|
55
|
+
def volume_issue_page
|
|
56
|
+
@volume_issue_page ||= begin
|
|
57
|
+
volume = @doc.at("./front/article-meta/volume").text
|
|
58
|
+
issue = @doc.at("./front/article-meta/issue").text
|
|
59
|
+
# page = @doc.at("./front/article-meta/fpage")&.text || manuscript
|
|
60
|
+
[volume, issue, article]
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def article
|
|
65
|
+
@doc.at("./front/article-meta/article-id[@pub-id-type='manuscript']").text.match(/[^_]+$/).to_s
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
#
|
|
69
|
+
# Parse journal title
|
|
70
|
+
#
|
|
71
|
+
# @return [String] journal title
|
|
72
|
+
#
|
|
73
|
+
def journal_title
|
|
74
|
+
@doc.at("./front/journal-meta/journal-title-group/journal-title").text
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
#
|
|
78
|
+
# Create document identifier
|
|
79
|
+
#
|
|
80
|
+
# @param [String] id document id
|
|
81
|
+
# @param [String] type id type
|
|
82
|
+
# @param [Boolean, nil] primary is primary id
|
|
83
|
+
#
|
|
84
|
+
# @return [RelatonBib::DocumentIdentifier] document identifier
|
|
85
|
+
#
|
|
86
|
+
def create_docid(id, type, primary = nil)
|
|
87
|
+
RelatonBib::DocumentIdentifier.new id: id, type: type, primary: primary
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
#
|
|
91
|
+
# Parse title
|
|
92
|
+
#
|
|
93
|
+
# @return [Array<RelatonBib::TypedTitleString>] array of title strings
|
|
94
|
+
#
|
|
95
|
+
def parse_title
|
|
96
|
+
@doc.xpath("./front/article-meta/title-group/article-title").map do |t|
|
|
97
|
+
RelatonBib::TypedTitleString.new content: t.text, language: t[:"xml:lang"], script: "Latn"
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
#
|
|
102
|
+
# Parse contributor
|
|
103
|
+
#
|
|
104
|
+
# @return [Array<RelatonBib::Contributor>] array of contributors
|
|
105
|
+
#
|
|
106
|
+
def parse_contributor
|
|
107
|
+
@doc.xpath("./front/article-meta/contrib-group/contrib").map do |c|
|
|
108
|
+
entity = create_person(c) || create_organization(c)
|
|
109
|
+
RelatonBib::ContributionInfo.new(entity: entity, role: [type: c[:"contrib-type"]])
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def create_person(contrib)
|
|
114
|
+
name = contrib.at("./name")
|
|
115
|
+
return unless name
|
|
116
|
+
|
|
117
|
+
RelatonBib::Person.new name: fullname(name), affiliation: affiliation(contrib)
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def create_organization(contrib)
|
|
121
|
+
RelatonBib::Organization.new name: contrib.at("./collab").text
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
#
|
|
125
|
+
# Parse affiliations
|
|
126
|
+
#
|
|
127
|
+
# @param [Nokogiri::XML::Element] contrib contributor element
|
|
128
|
+
#
|
|
129
|
+
# @return [Array<RelatonBib::Affiliation>] array of affiliations
|
|
130
|
+
#
|
|
131
|
+
def affiliation(contrib) # rubocop:disable Metrics/AbcSize
|
|
132
|
+
contrib.xpath("./xref[@ref-type='aff']").map do |x|
|
|
133
|
+
a = @doc.at("./front/article-meta/contrib-group/aff[@id='#{x[:rid]}']/label/following-sibling::node()")
|
|
134
|
+
parts = a.text.split(", ")
|
|
135
|
+
orgname = parts[0..-3].join(", ")
|
|
136
|
+
city, country = parts[-2..]
|
|
137
|
+
address = []
|
|
138
|
+
address << RelatonBib::Address.new(city: city, country: country) if city && country
|
|
139
|
+
org = RelatonBib::Organization.new name: orgname, contact: address
|
|
140
|
+
RelatonBib::Affiliation.new organization: org
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
#
|
|
145
|
+
# Create full name
|
|
146
|
+
#
|
|
147
|
+
# @param [Nokogiri::XML::Element] contrib contributor element
|
|
148
|
+
#
|
|
149
|
+
# @return [RelatonBib::FullName] full name
|
|
150
|
+
#
|
|
151
|
+
def fullname(name)
|
|
152
|
+
fname = forename name.at("./given-names")
|
|
153
|
+
sname = name.at("./surname").text
|
|
154
|
+
surname = RelatonBib::LocalizedString.new sname, "en", "Latn"
|
|
155
|
+
RelatonBib::FullName.new surname: surname, forename: fname
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
#
|
|
159
|
+
# Parse forename
|
|
160
|
+
#
|
|
161
|
+
# @param [String] given_name given name
|
|
162
|
+
#
|
|
163
|
+
# @return [Array<RelatonBib::Forename>] array of forenames
|
|
164
|
+
#
|
|
165
|
+
def forename(given_name) # rubocop:disable Metrics/MethodLength
|
|
166
|
+
return [] unless given_name
|
|
167
|
+
|
|
168
|
+
given_name.text.scan(/(\w+)(?:\s(\w)(?:\s|$))?/).map do |nm, int|
|
|
169
|
+
if nm.size == 1
|
|
170
|
+
name = nil
|
|
171
|
+
init = nm
|
|
172
|
+
else
|
|
173
|
+
name = nm
|
|
174
|
+
init = int
|
|
175
|
+
end
|
|
176
|
+
RelatonBib::Forename.new(content: name, language: ["en"], script: ["Latn"], initial: init)
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
#
|
|
181
|
+
# Parse date
|
|
182
|
+
#
|
|
183
|
+
# @return [Array<RelatonBib::BibliographicDate>] array of dates
|
|
184
|
+
#
|
|
185
|
+
def parse_date
|
|
186
|
+
on = dates.min
|
|
187
|
+
[RelatonBib::BibliographicDate.new(type: "published", on: on)]
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
#
|
|
191
|
+
# Parse date
|
|
192
|
+
#
|
|
193
|
+
# @yield [date, type] date and type
|
|
194
|
+
#
|
|
195
|
+
# @return [Array<String, Object>] string date or whatever block returns
|
|
196
|
+
#
|
|
197
|
+
def dates
|
|
198
|
+
@doc.xpath("./front/article-meta/pub-date").map do |d|
|
|
199
|
+
month = date_part(d, "month")
|
|
200
|
+
day = date_part(d, "day")
|
|
201
|
+
date = "#{d.at('./year').text}-#{month}-#{day}"
|
|
202
|
+
block_given? ? yield(date, d[:"pub-type"]) : date
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def date_part(date, type)
|
|
207
|
+
part = date.at("./#{type}")&.text
|
|
208
|
+
return "01" if part.nil? || part.empty?
|
|
209
|
+
|
|
210
|
+
part.rjust(2, "0")
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
#
|
|
214
|
+
# Parse copyright
|
|
215
|
+
#
|
|
216
|
+
# @return [Array<RelatonBib::CopyrightAssociation>] array of copyright associations
|
|
217
|
+
#
|
|
218
|
+
def parse_copyright
|
|
219
|
+
@doc.xpath("./front/article-meta/permissions").each_with_object([]) do |l, m|
|
|
220
|
+
from = l.at("./copyright-year")
|
|
221
|
+
next unless from
|
|
222
|
+
|
|
223
|
+
owner = l.at("./copyright-statement").text.split(" & ").map do |c|
|
|
224
|
+
/(?<name>[A-z]+(?:\s[A-z]+)*)/ =~ c
|
|
225
|
+
org = RelatonBib::Organization.new name: name
|
|
226
|
+
RelatonBib::ContributionInfo.new(entity: org)
|
|
227
|
+
end
|
|
228
|
+
m << RelatonBib::CopyrightAssociation.new(owner: owner, from: from.text)
|
|
229
|
+
end
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
#
|
|
233
|
+
# Parse abstract
|
|
234
|
+
#
|
|
235
|
+
# @return [Array<RelatonBib::FormattedString>] array of abstracts
|
|
236
|
+
#
|
|
237
|
+
def parse_abstract
|
|
238
|
+
@doc.xpath("./front/article-meta/abstract").map do |a|
|
|
239
|
+
RelatonBib::FormattedString.new(
|
|
240
|
+
content: a.inner_html, language: a[:"xml:lang"], script: ["Latn"], format: "text/html",
|
|
241
|
+
)
|
|
242
|
+
end
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
#
|
|
246
|
+
# Parese relation
|
|
247
|
+
#
|
|
248
|
+
# @return [Array<RelatonBib::DocumentRelation>] array of document relations
|
|
249
|
+
#
|
|
250
|
+
def parse_relation
|
|
251
|
+
dates do |d, t|
|
|
252
|
+
RelatonBib::DocumentRelation.new(type: "hasManifestation", bibitem: bibitem(d, t))
|
|
253
|
+
end
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
#
|
|
257
|
+
# Create bibitem
|
|
258
|
+
#
|
|
259
|
+
# @param [String] date
|
|
260
|
+
# @param [String] type date type
|
|
261
|
+
#
|
|
262
|
+
# @return [RelatonBipm::BipmBibliographicItem] bibitem
|
|
263
|
+
#
|
|
264
|
+
def bibitem(date, type)
|
|
265
|
+
dt = RelatonBib::BibliographicDate.new(type: type, on: date)
|
|
266
|
+
carrier = type == "epub" ? "online" : "print"
|
|
267
|
+
medium = RelatonBib::Medium.new carrier: carrier
|
|
268
|
+
BipmBibliographicItem.new title: parse_title, date: [dt], medium: medium
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
#
|
|
272
|
+
# Parse series
|
|
273
|
+
#
|
|
274
|
+
# @return [Array<RelatonBib::Series>] array of series
|
|
275
|
+
#
|
|
276
|
+
def parse_series
|
|
277
|
+
title = RelatonBib::TypedTitleString.new(
|
|
278
|
+
content: journal_title, language: ["en"], script: ["Latn"],
|
|
279
|
+
)
|
|
280
|
+
[RelatonBib::Series.new(title: title)]
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
#
|
|
284
|
+
# Parse extent
|
|
285
|
+
#
|
|
286
|
+
# @return [Array<RelatonBib::Extent>] array of extents
|
|
287
|
+
#
|
|
288
|
+
def parse_extent
|
|
289
|
+
%w[volume issue page].map.with_index do |t, i|
|
|
290
|
+
RelatonBib::Locality.new t, volume_issue_page[i]
|
|
291
|
+
end
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
def parse_type
|
|
295
|
+
"article"
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
alias_method :parse_doctype, :parse_type
|
|
299
|
+
end
|
|
300
|
+
end
|
|
301
|
+
end
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RelatonBipm
|
|
4
|
+
module RawdataBipmMetrologia
|
|
5
|
+
class Fetcher
|
|
6
|
+
DIR = "rawdata-bipm-metrologia/data/*content/0026-1394"
|
|
7
|
+
|
|
8
|
+
# @param data_fetcher [RelatonBipm::DataFetcher]
|
|
9
|
+
def self.fetch(data_fetcher)
|
|
10
|
+
new(data_fetcher).fetch
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
# @param data_fetcher [RelatonBipm::DataFetcher]
|
|
14
|
+
def initialize(data_fetcher)
|
|
15
|
+
@data_fetcher = data_fetcher
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
#
|
|
19
|
+
# Fetch documents from rawdata-bipm-metrologia and save to files
|
|
20
|
+
#
|
|
21
|
+
def fetch
|
|
22
|
+
fetch_metrologia
|
|
23
|
+
fetch_volumes
|
|
24
|
+
fetch_issues
|
|
25
|
+
fetch_articles
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
#
|
|
29
|
+
# Fetch articles from rawdata-bipm-metrologia and save to files
|
|
30
|
+
#
|
|
31
|
+
def fetch_articles # rubocop:disable Metrics/AbcSize
|
|
32
|
+
Dir["#{DIR}/**/*.xml"].each do |path|
|
|
33
|
+
doc = Nokogiri::XML File.read(path, encoding: "UTF-8")
|
|
34
|
+
item = ArticleParser.parse doc.at("/article")
|
|
35
|
+
file = "#{item.docidentifier.first.id.downcase.gsub(' ', '-')}.#{@data_fetcher.ext}"
|
|
36
|
+
out_path = File.join(@data_fetcher.output, file)
|
|
37
|
+
@data_fetcher.index[[item.docidentifier.first.id]] = out_path
|
|
38
|
+
@data_fetcher.write_file out_path, item
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
#
|
|
43
|
+
# Fetch volumes from rawdata-bipm-metrologia and save to files
|
|
44
|
+
#
|
|
45
|
+
def fetch_volumes
|
|
46
|
+
Dir["#{DIR}/*"].map { |path| path.split("/").last }.uniq.each do |volume|
|
|
47
|
+
fetch_metrologia volume
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
#
|
|
52
|
+
# Fetch issues from rawdata-bipm-metrologia and save to files
|
|
53
|
+
#
|
|
54
|
+
def fetch_issues
|
|
55
|
+
Dir["#{DIR}/*/*"].each do |path|
|
|
56
|
+
volume, issue = path.split("/").last(2)
|
|
57
|
+
fetch_metrologia volume, issue
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
#
|
|
62
|
+
# Fetch metrologia root document from rawdata-bipm-metrologia and save to a file
|
|
63
|
+
#
|
|
64
|
+
# @overload set(volume, issue)
|
|
65
|
+
# @param [String] volume volume number
|
|
66
|
+
# @param [String] issue issue number
|
|
67
|
+
# @overload set(volume)
|
|
68
|
+
# @param [String] volume volume number
|
|
69
|
+
#
|
|
70
|
+
def fetch_metrologia(*args)
|
|
71
|
+
id = identifier(*args)
|
|
72
|
+
item = BipmBibliographicItem.new(
|
|
73
|
+
type: "article", formattedref: formattedref(id), docid: docidentifier(id),
|
|
74
|
+
language: ["en"], script: ["Latn"], relation: relation(*args),
|
|
75
|
+
link: typed_uri(*args)
|
|
76
|
+
)
|
|
77
|
+
file = "#{id.downcase.gsub(' ', '-')}.#{@data_fetcher.ext}"
|
|
78
|
+
path = File.join(@data_fetcher.output, file)
|
|
79
|
+
@data_fetcher.index[[id]] = path
|
|
80
|
+
@data_fetcher.write_file path, item
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
#
|
|
84
|
+
# Create formattedref
|
|
85
|
+
#
|
|
86
|
+
# @param [String] id document identifier
|
|
87
|
+
#
|
|
88
|
+
# @return [RelatonBib::FormattedRef] formattedref
|
|
89
|
+
#
|
|
90
|
+
def formattedref(id)
|
|
91
|
+
RelatonBib::FormattedRef.new content: id, language: "en", script: "Latn"
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
#
|
|
95
|
+
# Create docidentifier
|
|
96
|
+
#
|
|
97
|
+
# @param [String] id document identifier
|
|
98
|
+
#
|
|
99
|
+
# @return [Array<RelatonBib::DocumentIdentifier>] docidentifier
|
|
100
|
+
#
|
|
101
|
+
def docidentifier(id)
|
|
102
|
+
[RelatonBib::DocumentIdentifier.new(id: id, type: "BIPM", primary: true)]
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
#
|
|
106
|
+
# Create identifier
|
|
107
|
+
#
|
|
108
|
+
# @overload set(volume, issue, article)
|
|
109
|
+
# @param [String] volume volume number
|
|
110
|
+
# @param [String] issue issue number
|
|
111
|
+
# @param [String] article article number
|
|
112
|
+
# @overload set(volume, issue)
|
|
113
|
+
# @param [String] volume volume number
|
|
114
|
+
# @param [String] issue issue number
|
|
115
|
+
# @overload set(volume)
|
|
116
|
+
# @param [String] volume volume number
|
|
117
|
+
#
|
|
118
|
+
# @return [String] document identifier
|
|
119
|
+
#
|
|
120
|
+
def identifier(*args)
|
|
121
|
+
["Metrologia", *id_parts(*args)].join(" ")
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def id_parts(*args)
|
|
125
|
+
args.map { |p| p.match(/[^_]+$/).to_s }
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
#
|
|
129
|
+
# Fetch relations
|
|
130
|
+
#
|
|
131
|
+
# @param (see #fetch_metrologia)
|
|
132
|
+
#
|
|
133
|
+
# @return [Array<RelatonBib::DocumentRelation>] relations
|
|
134
|
+
#
|
|
135
|
+
def relation(*args)
|
|
136
|
+
dir = [DIR, *args].join("/")
|
|
137
|
+
Dir["#{dir}/*"].map do |path|
|
|
138
|
+
part = path.split("/").last
|
|
139
|
+
id = identifier(*args, part)
|
|
140
|
+
RelatonBib::DocumentRelation.new(type: "partOf", bibitem: rel_bibitem(id))
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
#
|
|
145
|
+
# Create relation bibitem
|
|
146
|
+
#
|
|
147
|
+
# @param [String] id document identifier
|
|
148
|
+
#
|
|
149
|
+
# @return [RelatonBipm::BipmBibliographicItem] bibitem
|
|
150
|
+
#
|
|
151
|
+
def rel_bibitem(id)
|
|
152
|
+
BipmBibliographicItem.new(
|
|
153
|
+
formattedref: formattedref(id), docid: docidentifier(id),
|
|
154
|
+
)
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def typed_uri(*args)
|
|
158
|
+
[RelatonBib::TypedUri.new(type: "src", content: link(*args))]
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def link(*args)
|
|
162
|
+
params = id_parts(*args).join("/")
|
|
163
|
+
case args.size
|
|
164
|
+
when 0 then "https://iopscience.iop.org/journal/0026-1394"
|
|
165
|
+
when 1 then "https://iopscience.iop.org/volume/0026-1394/#{params}"
|
|
166
|
+
when 2 then "https://iopscience.iop.org/issue/0026-1394/#{params}"
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
end
|
data/lib/relaton_bipm/version.rb
CHANGED
data/lib/relaton_bipm.rb
CHANGED
|
@@ -17,6 +17,8 @@ require "relaton_bipm/index"
|
|
|
17
17
|
require "relaton_bipm/data_fetcher"
|
|
18
18
|
require "relaton_bipm/data_outcomes_parser"
|
|
19
19
|
require "relaton_bipm/bipm_si_brochure_parser"
|
|
20
|
+
require "relaton_bipm/rawdata_bipm_metrologia/fetcher"
|
|
21
|
+
require "relaton_bipm/rawdata_bipm_metrologia/article_parser"
|
|
20
22
|
|
|
21
23
|
module RelatonBipm
|
|
22
24
|
class Error < StandardError; end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: relaton-bipm
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.14.
|
|
4
|
+
version: 1.14.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2023-02-16 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: byebug
|
|
@@ -218,6 +218,8 @@ files:
|
|
|
218
218
|
- lib/relaton_bipm/hash_converter.rb
|
|
219
219
|
- lib/relaton_bipm/index.rb
|
|
220
220
|
- lib/relaton_bipm/processor.rb
|
|
221
|
+
- lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb
|
|
222
|
+
- lib/relaton_bipm/rawdata_bipm_metrologia/fetcher.rb
|
|
221
223
|
- lib/relaton_bipm/structured_identifier.rb
|
|
222
224
|
- lib/relaton_bipm/version.rb
|
|
223
225
|
- lib/relaton_bipm/workgroup.rb
|