relaton-iec 1.14.1 → 1.14.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.adoc +31 -26
- data/lib/relaton_iec/data_fetcher.rb +166 -0
- data/lib/relaton_iec/data_parser.rb +287 -0
- data/lib/relaton_iec/hit.rb +9 -1
- data/lib/relaton_iec/hit_collection.rb +15 -79
- data/lib/relaton_iec/iec_bibliographic_item.rb +3 -1
- data/lib/relaton_iec/iec_bibliography.rb +83 -111
- data/lib/relaton_iec/index.rb +133 -0
- data/lib/relaton_iec/processor.rb +13 -0
- data/lib/relaton_iec/version.rb +1 -1
- data/lib/relaton_iec.rb +9 -6
- data/relaton_iec.gemspec +4 -3
- metadata +19 -3
- data/lib/relaton_iec/scrapper.rb +0 -308
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e377e8eaef84193231a109087a5d036e68819065c3256179533011f7291e205e
|
4
|
+
data.tar.gz: c4657c57400cc95b4707ffa0132865f793bf973fa9733683576ec88cef3fcb5d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 21f13b927c895e376e245d678e80d3185d6281b98f86e1cdb190905651685579286562b7d88786c52af70c4f1cc25433e3f6af41c290908ac8a46211eefb0602
|
7
|
+
data.tar.gz: 5f1c9e9e5a0068704a4619f0ec2fe9e958bc18d8c776822072317d3d252c2fe67774186be69547d9cc8b5ad22fb96c7c4e60777c33e0215c99f609c78b540997
|
data/README.adoc
CHANGED
@@ -47,25 +47,24 @@ hit_collection = RelatonIec::IecBibliography.search("60050")
|
|
47
47
|
=> <RelatonIec::HitCollection:0x007fe0d7126f28 @ref=60050 @fetched=false>
|
48
48
|
|
49
49
|
hit_collection.first
|
50
|
-
=> <RelatonIec::Hit:
|
50
|
+
=> <RelatonIec::Hit:0x000000000003e8 @text="60050" @fetched="false" @fullIdentifier="" @title="IEC 60050-05:1935">
|
51
51
|
|
52
52
|
item = hit_collection[2].fetch
|
53
53
|
=> #<RelatonIec::IecBibliographicItem:0x007fe1171a06f8
|
54
54
|
...
|
55
55
|
|
56
56
|
item.docidentifier
|
57
|
-
=> [#<RelatonBib::DocumentIdentifier:
|
58
|
-
#<RelatonBib::DocumentIdentifier:
|
59
|
-
@id="urn:iec:std:iec:60050-
|
57
|
+
=> [#<RelatonBib::DocumentIdentifier:0x00007ff1b52a2b60 @id="IEC 60050-07:1956", @language=nil, @primary=true, @scope=nil, @script=nil, @type="IEC">,
|
58
|
+
#<RelatonBib::DocumentIdentifier:0x00007ff1b52a2a98
|
59
|
+
@id="urn:iec:std:iec:60050-07:1956-01:::",
|
60
60
|
@language=nil,
|
61
61
|
@primary=nil,
|
62
62
|
@scope=nil,
|
63
63
|
@script=nil,
|
64
64
|
@type="URN">]
|
65
|
-
[6] pry(main)>
|
66
65
|
|
67
66
|
item.docidentifier.detect { |di| di.type == "URN" }.id
|
68
|
-
=> "urn:iec:std:iec:60050-
|
67
|
+
=> "urn:iec:std:iec:60050-07:1956-01:::"
|
69
68
|
----
|
70
69
|
|
71
70
|
=== Fetch document by keywords
|
@@ -79,8 +78,8 @@ item.docidentifier.detect { |di| di.type == "URN" }.id
|
|
79
78
|
[source,ruby]
|
80
79
|
----
|
81
80
|
item = RelatonIec::IecBibliography.get("IEC 60050-112:2010")
|
82
|
-
[relaton-iec] ("IEC 60050-112") fetching...
|
83
|
-
[relaton-iec] ("IEC 60050-112") found IEC 60050-112:2010
|
81
|
+
[relaton-iec] ("IEC 60050-112:2010") fetching...
|
82
|
+
[relaton-iec] ("IEC 60050-112:2010") found IEC 60050-112:2010
|
84
83
|
=> #<RelatonIec::IecBibliographicItem:0x00007f876525e8d0
|
85
84
|
...
|
86
85
|
|
@@ -104,15 +103,12 @@ item.docidentifier.first
|
|
104
103
|
|
105
104
|
=== Search for ISO/IEC Directives
|
106
105
|
|
107
|
-
The
|
106
|
+
The following references are allowed to fetch:
|
108
107
|
|
109
|
-
-
|
110
|
-
- ISO/IEC DIR 1 IEC SUP - Procedures for the technical work
|
111
|
-
- ISO/IEC DIR
|
112
|
-
- ISO/IEC DIR
|
113
|
-
- ISO/IEC DIR 2 ISO - Principles and rules for the structure and drafting of ISO and IEC documents
|
114
|
-
- ISO/IEC DIR IEC SUP - Procedures specific to IEC
|
115
|
-
- ISO/IEC DIR JTC 1 SUP - Procedures specific to JTC 1
|
108
|
+
- `IEC 61360-4 DB` - IEC/SC 3D - Common Data Dictionary (CDD - V2.0015.0001)
|
109
|
+
- `ISO/IEC DIR 1 IEC SUP` - Procedures for the technical work - Procedures specific to IEC
|
110
|
+
- `ISO/IEC DIR 2 IEC` - Principles and rules for the structure and drafting of ISO and IEC documents
|
111
|
+
- 'ISO/IEC DIR IEC SUP' - Procedures specific to IEC
|
116
112
|
|
117
113
|
=== XML serialization
|
118
114
|
|
@@ -125,22 +121,22 @@ Possible options:
|
|
125
121
|
----
|
126
122
|
item.to_xml
|
127
123
|
=> "<bibitem id="IEC60050-112-2010" type="standard" schema-version="v1.2.1">
|
128
|
-
<fetched>
|
129
|
-
<title type="title-main" format="text/plain" language="en" script="Latn">International Electrotechnical Vocabulary (IEV)</title>
|
130
|
-
<title type="title-part" format="text/plain" language="en" script="Latn">Part 112: Quantities and units</title>
|
124
|
+
<fetched>2023-03-01</fetched>
|
131
125
|
<title type="main" format="text/plain" language="en" script="Latn">International Electrotechnical Vocabulary (IEV) - Part 112: Quantities and units</title>
|
126
|
+
<title type="main" format="text/plain" language="fr" script="Latn">Vocabulaire Electrotechnique International (IEV) - Partie 112: Grandeurs et unités</title>
|
127
|
+
<title type="main" format="text/plain" language="es" script="Latn">Versión Oficial En español - Vocabulario Electrotécnico Internacional. Parte 112: Magnitudes y unidades.</title>
|
132
128
|
<uri type="src">https://webstore.iec.ch/publication/162</uri>
|
133
129
|
...
|
134
130
|
</bibitem>"
|
135
131
|
|
136
132
|
item.to_xml bibdata: true
|
137
133
|
=> "<bibdata type="standard" schema-version="v1.2.1">
|
138
|
-
<fetched>
|
139
|
-
<title type="title-main" format="text/plain" language="en" script="Latn">International Electrotechnical Vocabulary (IEV)</title>
|
140
|
-
<title type="title-part" format="text/plain" language="en" script="Latn">Part 112: Quantities and units</title>
|
134
|
+
<fetched>2023-03-01</fetched>
|
141
135
|
<title type="main" format="text/plain" language="en" script="Latn">International Electrotechnical Vocabulary (IEV) - Part 112: Quantities and units</title>
|
136
|
+
<title type="main" format="text/plain" language="fr" script="Latn">Vocabulaire Electrotechnique International (IEV) - Partie 112: Grandeurs et unités</title>
|
137
|
+
<title type="main" format="text/plain" language="es" script="Latn">Versión Oficial En español - Vocabulario Electrotécnico Internacional. Parte 112: Magnitudes y unidades.</title>
|
142
138
|
<uri type="src">https://webstore.iec.ch/publication/162</uri>
|
143
|
-
<uri type="obp"
|
139
|
+
<uri type="obp">https://webstore.iec.ch/preview/info_iec60050-112{ed1.0}b.pdf</uri>
|
144
140
|
...
|
145
141
|
<ext schema-version="v1.0.0">
|
146
142
|
<doctype>international-standard</doctype>
|
@@ -163,8 +159,16 @@ Each IEC document has `src` type link and optional `obp` type link.
|
|
163
159
|
[source,ruby]
|
164
160
|
----
|
165
161
|
item.link
|
166
|
-
=> [#<RelatonBib::TypedUri:
|
167
|
-
|
162
|
+
=> [#<RelatonBib::TypedUri:0x00007ff1d50e9e20
|
163
|
+
@content=#<Addressable::URI:0x2260 URI:https://webstore.iec.ch/publication/162>,
|
164
|
+
@language=nil,
|
165
|
+
@script=nil,
|
166
|
+
@type="src">,
|
167
|
+
#<RelatonBib::TypedUri:0x00007ff1d50e9498
|
168
|
+
@content=#<Addressable::URI:0x2274 URI:https://webstore.iec.ch/preview/info_iec60050-112{ed1.0}b.pdf>,
|
169
|
+
@language=nil,
|
170
|
+
@script=nil,
|
171
|
+
@type="obp">]
|
168
172
|
----
|
169
173
|
|
170
174
|
=== Create bibliographic item from Hash
|
@@ -172,7 +176,8 @@ item.link
|
|
172
176
|
[source,ruby]
|
173
177
|
----
|
174
178
|
hash = YAML.load_file "spec/examples/hit.yaml"
|
175
|
-
=> {"
|
179
|
+
=> {"schema-version"=>"v1.2.1",
|
180
|
+
"id"=>"IEC61058-2-4-1995+AMD1-2003CSV",
|
176
181
|
...
|
177
182
|
|
178
183
|
RelatonIec::IecBibliographicItem.from_hash hash
|
@@ -0,0 +1,166 @@
|
|
1
|
+
module RelatonIec
|
2
|
+
class DataFetcher
|
3
|
+
ENTRYPOINT = "https://api.iec.ch/harmonized/publications?size=100&sortBy=urn&page=".freeze
|
4
|
+
CREDENTIAL = "https://api.iec.ch/oauth/client_credential/accesstoken?grant_type=client_credentials".freeze
|
5
|
+
|
6
|
+
#
|
7
|
+
# Initialize new instance.
|
8
|
+
#
|
9
|
+
# @param [String] source source name (iec-harmonized-all, iec-harmonized-latest)
|
10
|
+
# @param [String] output output directory
|
11
|
+
# @param [String] format format of output files (xml, bibxml, yaml)
|
12
|
+
#
|
13
|
+
def initialize(source = "iec-harmonised-latest", output: "data", format: "yaml")
|
14
|
+
@output = output
|
15
|
+
@format = format
|
16
|
+
@ext = format.sub(/^bib/, "")
|
17
|
+
@files = []
|
18
|
+
@index = Index.new "index.yaml"
|
19
|
+
@all = source == "iec-harmonised-all"
|
20
|
+
end
|
21
|
+
|
22
|
+
#
|
23
|
+
# Fetch data from IEC.
|
24
|
+
#
|
25
|
+
def fetch # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
26
|
+
t1 = Time.now
|
27
|
+
puts "Started at: #{t1}"
|
28
|
+
|
29
|
+
FileUtils.mkdir_p @output
|
30
|
+
if @all
|
31
|
+
FileUtils.rm Dir[File.join(@output, "*.#{@ext}")]
|
32
|
+
@index.clear
|
33
|
+
end
|
34
|
+
fetch_all
|
35
|
+
add_static_files_to_index
|
36
|
+
@index.save
|
37
|
+
|
38
|
+
t2 = Time.now
|
39
|
+
puts "Stopped at: #{t2}"
|
40
|
+
puts "Done in: #{(t2 - t1).round} sec."
|
41
|
+
rescue StandardError => e
|
42
|
+
warn e.message
|
43
|
+
warn e.backtrace.join("\n")
|
44
|
+
end
|
45
|
+
|
46
|
+
#
|
47
|
+
# Add static files to index.
|
48
|
+
#
|
49
|
+
# @return [void]
|
50
|
+
#
|
51
|
+
def add_static_files_to_index
|
52
|
+
Dir["static/*.yaml"].each do |file|
|
53
|
+
pub = RelatonBib.parse_yaml File.read(file, encoding: "UTF-8")
|
54
|
+
pubid = RelatonBib.array(pub["docid"]).detect { |id| id["primary"] }["id"]
|
55
|
+
@index.add pubid, file
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
#
|
60
|
+
# Fetch documents from IEC API.
|
61
|
+
#
|
62
|
+
# @return [void]
|
63
|
+
#
|
64
|
+
def fetch_all # rubocop:disable Metrics/MethodLength
|
65
|
+
page = 0
|
66
|
+
next_page = true
|
67
|
+
while next_page
|
68
|
+
res = fetch_page_token page
|
69
|
+
unless res.code == "200"
|
70
|
+
warn "[relaton-iec] #{res.body}"
|
71
|
+
break
|
72
|
+
end
|
73
|
+
json = JSON.parse res.body
|
74
|
+
json["publication"].each { |pub| fetch_pub pub }
|
75
|
+
page += 1
|
76
|
+
next_page = res["link"]&.include? "rel=\"last\""
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
#
|
81
|
+
# Fetch page. If response code is 401, then get new access token and try
|
82
|
+
#
|
83
|
+
# @param [Integer] page page number
|
84
|
+
#
|
85
|
+
# @return [Net::HTTP::Response] response
|
86
|
+
#
|
87
|
+
def fetch_page_token(page)
|
88
|
+
res = fetch_page page
|
89
|
+
if res.code == "401"
|
90
|
+
@access_token = nil
|
91
|
+
res = fetch_page page
|
92
|
+
end
|
93
|
+
res
|
94
|
+
end
|
95
|
+
|
96
|
+
#
|
97
|
+
# Fetch page from IEC API.
|
98
|
+
#
|
99
|
+
# @param [Integer] page page number
|
100
|
+
#
|
101
|
+
# @return [Net::HTTP::Response] response
|
102
|
+
#
|
103
|
+
def fetch_page(page)
|
104
|
+
url = "#{ENTRYPOINT}#{page}"
|
105
|
+
if !@all && @index.last_change
|
106
|
+
url += "&lastChangeTimestampFrom=#{@index.last_change}"
|
107
|
+
end
|
108
|
+
uri = URI url
|
109
|
+
req = Net::HTTP::Get.new uri
|
110
|
+
req["Authorization"] = "Bearer #{access_token}"
|
111
|
+
Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http|
|
112
|
+
http.request req
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
#
|
117
|
+
# Get access token.
|
118
|
+
#
|
119
|
+
# @return [String] access token
|
120
|
+
#
|
121
|
+
def access_token # rubocop:disable Metrics/AbcSize
|
122
|
+
@access_token ||= begin
|
123
|
+
uri = URI CREDENTIAL
|
124
|
+
req = Net::HTTP::Get.new uri
|
125
|
+
req.basic_auth ENV.fetch("IEC_HAPI_PROJ_PUBS_KEY"), ENV.fetch("IEC_HAPI_PROJ_PUBS_SECRET")
|
126
|
+
res = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http|
|
127
|
+
http.request req
|
128
|
+
end
|
129
|
+
JSON.parse(res.body)["access_token"]
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
#
|
134
|
+
# Fetch publication and save it to file.
|
135
|
+
#
|
136
|
+
# @param [Hash] pub publication
|
137
|
+
#
|
138
|
+
def fetch_pub(pub) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
139
|
+
bib = DataParser.new(pub).parse
|
140
|
+
did = bib.docidentifier.detect &:primary
|
141
|
+
file = File.join(@output, "#{did.id.downcase.gsub(/[:\s\/]/, '_')}.#{@ext}")
|
142
|
+
if @files.include? file then warn "File #{file} exists."
|
143
|
+
else
|
144
|
+
@files << file
|
145
|
+
@index.add index_id(pub), file, pub["lastChangeTimestamp"]
|
146
|
+
end
|
147
|
+
content = case @format
|
148
|
+
when "xml" then bib.to_xml bibdata: true
|
149
|
+
when "yaml", "yml" then bib.to_hash.to_yaml
|
150
|
+
when "bibxml" then bib.to_bibxml
|
151
|
+
end
|
152
|
+
File.write file, content, encoding: "UTF-8"
|
153
|
+
end
|
154
|
+
|
155
|
+
def index_id(pub)
|
156
|
+
/-(?<part>\d+)/ =~ pub["reference"]
|
157
|
+
title = pub.dig("title", 0, "value")
|
158
|
+
return pub["reference"] unless part && title
|
159
|
+
|
160
|
+
ids = title.scan(/(?<=-\sPart\s)#{part[0]}\d+(?=:)/).map do |m|
|
161
|
+
pub["reference"].sub(/-#{part}/, "-#{m}")
|
162
|
+
end
|
163
|
+
ids.size > 1 ? ids : pub["reference"]
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
@@ -0,0 +1,287 @@
|
|
1
|
+
module RelatonIec
|
2
|
+
class DataParser
|
3
|
+
DOMAIN = "https://webstore.iec.ch"
|
4
|
+
|
5
|
+
ATTRS = %i[
|
6
|
+
docid structuredidentifier language script title doctype
|
7
|
+
ics date contributor editorialgroup abstract copyright link relation
|
8
|
+
].freeze
|
9
|
+
|
10
|
+
ABBREVS = {
|
11
|
+
"ISO" => ["International Organization for Standardization", "www.iso.org"],
|
12
|
+
"IEC" => ["International Electrotechnical Commission", "www.iec.ch"],
|
13
|
+
"IEEE" => ["Institute of Electrical and Electronics Engineers", "www.ieee.org"],
|
14
|
+
"ASTM" => ["American Society of Testing Materials", "www.astm.org"],
|
15
|
+
"CISPR" => ["International special committee on radio interference", "www.iec.ch"],
|
16
|
+
}.freeze
|
17
|
+
|
18
|
+
#
|
19
|
+
# Initialize new instance.
|
20
|
+
#
|
21
|
+
# @param [Hash] pub document data
|
22
|
+
#
|
23
|
+
def initialize(pub)
|
24
|
+
@pub = pub
|
25
|
+
end
|
26
|
+
|
27
|
+
#
|
28
|
+
# Parse document.
|
29
|
+
#
|
30
|
+
# @return [RelatonIec::IecBibliographicItem] bib item
|
31
|
+
#
|
32
|
+
def parse # rubocop:disable Metrics/AbcSize
|
33
|
+
args = ATTRS.each_with_object({}) { |a, h| h[a] = send a }
|
34
|
+
args[:docstatus] = RelatonBib::DocumentStatus.new stage: @pub["status"]
|
35
|
+
args[:edition] = @pub["edition"]
|
36
|
+
args[:price_code] = @pub["priceInfo"]["priceCode"]
|
37
|
+
args[:place] = ["Geneva"]
|
38
|
+
IecBibliographicItem.new(**args)
|
39
|
+
end
|
40
|
+
|
41
|
+
#
|
42
|
+
# Parse document identifiers.
|
43
|
+
#
|
44
|
+
# @return [Array<RelatonBib::DocumentIdentifier>] document identifiers
|
45
|
+
#
|
46
|
+
def docid
|
47
|
+
ids = []
|
48
|
+
ids << RelatonBib::DocumentIdentifier.new(id: @pub["reference"], type: "IEC", primary: true)
|
49
|
+
urnid = "urn:#{@pub['urnAlt'][0]}"
|
50
|
+
ids << RelatonBib::DocumentIdentifier.new(id: urnid, type: "URN")
|
51
|
+
end
|
52
|
+
|
53
|
+
#
|
54
|
+
# Parse structured identifier.
|
55
|
+
#
|
56
|
+
# @return [RelatonIsoBib::StructuredIdentifier] structured identifier
|
57
|
+
#
|
58
|
+
def structuredidentifier
|
59
|
+
m = @pub["reference"].match(
|
60
|
+
/(?<=\s)(?<project>\w+)(?:-(?<part>\w*)(?:-(?<subpart>\w*))?)?/,
|
61
|
+
)
|
62
|
+
RelatonIsoBib::StructuredIdentifier.new(
|
63
|
+
project_number: m[:project], part: m[:part], subpart: m[:subpart],
|
64
|
+
type: "IEC", id: @pub["reference"]
|
65
|
+
)
|
66
|
+
end
|
67
|
+
|
68
|
+
#
|
69
|
+
# Parse languages.
|
70
|
+
#
|
71
|
+
# @return [Array<String>] languages
|
72
|
+
#
|
73
|
+
def language
|
74
|
+
@pub["title"].map { |t| t["lang"] }.uniq
|
75
|
+
end
|
76
|
+
|
77
|
+
#
|
78
|
+
# Parse scripts.
|
79
|
+
#
|
80
|
+
# @return [Array<String>] scripts
|
81
|
+
#
|
82
|
+
def script
|
83
|
+
language.each_with_object([]) do |l, s|
|
84
|
+
scr = lang_to_script l
|
85
|
+
s << scr if scr && !s.include?(scr)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
#
|
90
|
+
# Detect script.
|
91
|
+
#
|
92
|
+
# @param [String] lang language
|
93
|
+
#
|
94
|
+
# @return [String] script
|
95
|
+
#
|
96
|
+
def lang_to_script(lang)
|
97
|
+
case lang
|
98
|
+
when "en", "fr", "es" then "Latn"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
#
|
103
|
+
# Parse titles.
|
104
|
+
#
|
105
|
+
# @return [Array<RelatonBib::TypedTitleString>] titles
|
106
|
+
#
|
107
|
+
def title
|
108
|
+
@pub["title"].map do |t|
|
109
|
+
RelatonBib::TypedTitleString.new(
|
110
|
+
content: t["value"], language: t["lang"], script: lang_to_script(t["lang"]), type: "main",
|
111
|
+
)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
#
|
116
|
+
# Parse editorial group.
|
117
|
+
#
|
118
|
+
# @return [Hash] editorial group
|
119
|
+
#
|
120
|
+
def editorialgroup
|
121
|
+
return unless @pub["committee"]
|
122
|
+
|
123
|
+
wg = @pub["committee"]["reference"]
|
124
|
+
{
|
125
|
+
technical_committee: [{
|
126
|
+
name: wg,
|
127
|
+
type: "technicalCommittee",
|
128
|
+
number: wg.match(/\d+/)&.to_s&.to_i,
|
129
|
+
}],
|
130
|
+
}
|
131
|
+
end
|
132
|
+
|
133
|
+
#
|
134
|
+
# Parse abstract.
|
135
|
+
#
|
136
|
+
# @return [Array<RelatonBib::FormattedString>] abstract
|
137
|
+
#
|
138
|
+
def abstract
|
139
|
+
@pub["abstract"]&.map do |a|
|
140
|
+
RelatonBib::FormattedString.new(
|
141
|
+
content: a["content"], language: a["lang"], script: lang_to_script(a["lang"]),
|
142
|
+
format: a["format"]
|
143
|
+
)
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
# @return [Array<Hash>]
|
148
|
+
def copyright # rubocop:disable Metrics/AbcSize
|
149
|
+
from = @pub["reference"].match(/(?<=:)\d{4}/).to_s
|
150
|
+
from = @pub["releaseDate"]&.match(/\d{4}/).to_s if from.empty?
|
151
|
+
return [] if from.nil? || from.empty?
|
152
|
+
|
153
|
+
abbreviation = @pub["reference"].match(/.*?(?=\s)/).to_s
|
154
|
+
owner = abbreviation.split("/").map do |abbrev|
|
155
|
+
name, url = ABBREVS[abbrev]
|
156
|
+
{ name: name, abbreviation: abbrev, url: url }
|
157
|
+
end
|
158
|
+
[{ owner: owner, from: from }]
|
159
|
+
end
|
160
|
+
|
161
|
+
#
|
162
|
+
# Fetche ics.
|
163
|
+
#
|
164
|
+
# @return [Array<RelatonIsoBib::Ics>] ics
|
165
|
+
#
|
166
|
+
def ics
|
167
|
+
return [] unless @pub["classifications"]
|
168
|
+
|
169
|
+
@pub["classifications"].select { |c| c["type"] == "ICS" }.map do |c|
|
170
|
+
RelatonIsoBib::Ics.new(c["value"])
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
#
|
175
|
+
# Parse dates.
|
176
|
+
#
|
177
|
+
# @return [Array<RelatonBib::BibliographicDate>] dates
|
178
|
+
#
|
179
|
+
def date
|
180
|
+
date = []
|
181
|
+
date << create_date("published", @pub["releaseDate"]) if @pub["releaseDate"]
|
182
|
+
date << create_date("confirmed", @pub["confirmationDate"]) if @pub["confirmationDate"]
|
183
|
+
date
|
184
|
+
end
|
185
|
+
|
186
|
+
def create_date(type, date)
|
187
|
+
RelatonBib::BibliographicDate.new(type: type, on: date)
|
188
|
+
end
|
189
|
+
|
190
|
+
#
|
191
|
+
# Parse contributors.
|
192
|
+
#
|
193
|
+
# @return [Array<Hash>] contributors
|
194
|
+
#
|
195
|
+
def contributor
|
196
|
+
@pub["reference"].sub(/\s.*/, "").split("/").map do |abbrev|
|
197
|
+
name, url = ABBREVS[abbrev]
|
198
|
+
{ entity: { name: name, url: url, abbreviation: abbrev },
|
199
|
+
role: [type: "publisher"] }
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
#
|
204
|
+
# Parse links.
|
205
|
+
#
|
206
|
+
# @return [Array<RelatonBib::TypedUri>] links
|
207
|
+
#
|
208
|
+
def link
|
209
|
+
url = "#{DOMAIN}/publication/#{urn_id}"
|
210
|
+
l = [RelatonBib::TypedUri.new(content: url, type: "src")]
|
211
|
+
RelatonBib.array(@pub["releaseItems"]).each_with_object(l) do |r, a|
|
212
|
+
next unless r["type"] == "PREVIEW"
|
213
|
+
|
214
|
+
url = "#{DOMAIN}/preview/#{r['contentRef']['fileName']}"
|
215
|
+
a << RelatonBib::TypedUri.new(content: url, type: "obp")
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
#
|
220
|
+
# Extract URN ID from URN.
|
221
|
+
#
|
222
|
+
# @return [String] URN ID
|
223
|
+
#
|
224
|
+
def urn_id
|
225
|
+
@pub["urn"].split(":").last
|
226
|
+
end
|
227
|
+
|
228
|
+
#
|
229
|
+
# Parse document type.
|
230
|
+
#
|
231
|
+
# @return [String] document type
|
232
|
+
#
|
233
|
+
def doctype
|
234
|
+
case @pub["stdType"]
|
235
|
+
when "IS" then "international-standard"
|
236
|
+
when "TR" then "technical-report"
|
237
|
+
when "TS" then "technical-specification"
|
238
|
+
when "PAS" then "publicly-available-specification"
|
239
|
+
when "SRD" then "system-reference-delivrabble"
|
240
|
+
else @pub["stdType"].downcase
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
#
|
245
|
+
# Parse relation.
|
246
|
+
#
|
247
|
+
# @return [Array<RelatonBib::DocumentRelation>] relation
|
248
|
+
#
|
249
|
+
def relation # rubocop:disable Metrics/MethodLength
|
250
|
+
try = 0
|
251
|
+
begin
|
252
|
+
uri = URI "#{DOMAIN}/webstore/webstore.nsf/AjaxRequestXML?" \
|
253
|
+
"Openagent&url=#{urn_id}"
|
254
|
+
resp = Net::HTTP.get_response uri
|
255
|
+
doc = Nokogiri::XML resp.body
|
256
|
+
create_relations doc
|
257
|
+
rescue StandardError => e
|
258
|
+
try += 1
|
259
|
+
try < 3 ? retry : raise(e)
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
#
|
264
|
+
# Create relations.
|
265
|
+
#
|
266
|
+
# @param [Nokogiri::XML::Document] doc XML document
|
267
|
+
#
|
268
|
+
# @return [Array<Hash>] relations
|
269
|
+
#
|
270
|
+
def create_relations(doc) # rubocop:disable Metrics/MethodLength
|
271
|
+
doc.xpath('//ROW[STATUS[.!="PREPARING" and .!="PUBLISHED"]]')
|
272
|
+
.map do |r|
|
273
|
+
r_type = r.at("STATUS").text.downcase
|
274
|
+
type = case r_type
|
275
|
+
when "revised", "replaced" then "updates"
|
276
|
+
when "withdrawn" then "obsoletes"
|
277
|
+
else r_type
|
278
|
+
end
|
279
|
+
ref = r.at("FULL_NAME").text
|
280
|
+
fref = RelatonBib::FormattedRef.new content: ref, format: "text/plain"
|
281
|
+
docid = RelatonBib::DocumentIdentifier.new(id: ref, type: "IEC", primary: true)
|
282
|
+
bibitem = IecBibliographicItem.new(formattedref: fref, docid: [docid])
|
283
|
+
RelatonBib::DocumentRelation.new type: type, bibitem: bibitem
|
284
|
+
end
|
285
|
+
end
|
286
|
+
end
|
287
|
+
end
|
data/lib/relaton_iec/hit.rb
CHANGED
@@ -3,12 +3,20 @@
|
|
3
3
|
module RelatonIec
|
4
4
|
# Hit.
|
5
5
|
class Hit < RelatonBib::Hit
|
6
|
+
GHURL = "https://raw.githubusercontent.com/relaton/relaton-data-iec/main/"
|
7
|
+
|
6
8
|
attr_writer :fetch
|
7
9
|
|
8
10
|
# Parse page.
|
9
11
|
# @return [RelatonIec::IecBibliographicItem]
|
10
12
|
def fetch
|
11
|
-
@fetch ||=
|
13
|
+
@fetch ||= begin
|
14
|
+
url = "#{GHURL}#{hit[:file]}"
|
15
|
+
resp = Net::HTTP.get URI(url)
|
16
|
+
hash = YAML.safe_load resp
|
17
|
+
hash["fetched"] = Date.today.to_s
|
18
|
+
IecBibliographicItem.from_hash hash
|
19
|
+
end
|
12
20
|
end
|
13
21
|
|
14
22
|
def part
|