relaton-iec 0.9.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +1 -1
- data/grammars/basicdoc.rng +986 -0
- data/grammars/biblio.rng +1237 -0
- data/grammars/iec.rng +43 -0
- data/grammars/isodoc.rng +1504 -0
- data/{grammars → grammars/isostandard.rng} +148 -472
- data/grammars/reqt.rng +165 -0
- data/lib/relaton_iec.rb +16 -9
- data/lib/relaton_iec/hash_converter.rb +14 -0
- data/lib/relaton_iec/hit.rb +1 -1
- data/lib/relaton_iec/hit_collection.rb +0 -1
- data/lib/relaton_iec/iec_bibliographic_item.rb +9 -0
- data/lib/relaton_iec/iec_bibliography.rb +16 -18
- data/lib/relaton_iec/processor.rb +7 -7
- data/lib/relaton_iec/scrapper.rb +35 -166
- data/lib/relaton_iec/version.rb +1 -1
- data/lib/relaton_iec/xml_parser.rb +14 -0
- data/relaton_iec.gemspec +6 -5
- metadata +29 -7
data/grammars/reqt.rng
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<grammar xmlns="http://relaxng.org/ns/structure/1.0" datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
|
3
|
+
<!--
|
4
|
+
Presupposes isodoc.rnc, is included in it
|
5
|
+
include "isodoc.rnc" { }
|
6
|
+
-->
|
7
|
+
<define name="requirement">
|
8
|
+
<element name="requirement">
|
9
|
+
<ref name="RequirementType"/>
|
10
|
+
</element>
|
11
|
+
</define>
|
12
|
+
<define name="recommendation">
|
13
|
+
<element name="recommendation">
|
14
|
+
<ref name="RequirementType"/>
|
15
|
+
</element>
|
16
|
+
</define>
|
17
|
+
<define name="permission">
|
18
|
+
<element name="permission">
|
19
|
+
<ref name="RequirementType"/>
|
20
|
+
</element>
|
21
|
+
</define>
|
22
|
+
<define name="RequirementType">
|
23
|
+
<optional>
|
24
|
+
<attribute name="obligation">
|
25
|
+
<ref name="ObligationType"/>
|
26
|
+
</attribute>
|
27
|
+
</optional>
|
28
|
+
<optional>
|
29
|
+
<attribute name="unnumbered">
|
30
|
+
<data type="boolean"/>
|
31
|
+
</attribute>
|
32
|
+
</optional>
|
33
|
+
<optional>
|
34
|
+
<attribute name="subsequence"/>
|
35
|
+
</optional>
|
36
|
+
<attribute name="id">
|
37
|
+
<data type="ID"/>
|
38
|
+
</attribute>
|
39
|
+
<optional>
|
40
|
+
<attribute name="filename"/>
|
41
|
+
</optional>
|
42
|
+
<optional>
|
43
|
+
<ref name="reqtitle"/>
|
44
|
+
</optional>
|
45
|
+
<optional>
|
46
|
+
<ref name="label"/>
|
47
|
+
</optional>
|
48
|
+
<optional>
|
49
|
+
<ref name="subject"/>
|
50
|
+
</optional>
|
51
|
+
<optional>
|
52
|
+
<ref name="reqinherit"/>
|
53
|
+
</optional>
|
54
|
+
<zeroOrMore>
|
55
|
+
<ref name="classification"/>
|
56
|
+
</zeroOrMore>
|
57
|
+
<zeroOrMore>
|
58
|
+
<choice>
|
59
|
+
<ref name="measurementtarget"/>
|
60
|
+
<ref name="specification"/>
|
61
|
+
<ref name="verification"/>
|
62
|
+
<ref name="import"/>
|
63
|
+
<ref name="description"/>
|
64
|
+
</choice>
|
65
|
+
</zeroOrMore>
|
66
|
+
<optional>
|
67
|
+
<ref name="reqt_references"/>
|
68
|
+
</optional>
|
69
|
+
<zeroOrMore>
|
70
|
+
<choice>
|
71
|
+
<ref name="requirement"/>
|
72
|
+
<ref name="recommendation"/>
|
73
|
+
<ref name="permission"/>
|
74
|
+
</choice>
|
75
|
+
</zeroOrMore>
|
76
|
+
</define>
|
77
|
+
<define name="reqtitle">
|
78
|
+
<element name="title">
|
79
|
+
<ref name="FormattedString"/>
|
80
|
+
</element>
|
81
|
+
</define>
|
82
|
+
<define name="label">
|
83
|
+
<element name="label">
|
84
|
+
<text/>
|
85
|
+
</element>
|
86
|
+
</define>
|
87
|
+
<define name="subject">
|
88
|
+
<element name="subject">
|
89
|
+
<text/>
|
90
|
+
</element>
|
91
|
+
</define>
|
92
|
+
<define name="reqinherit">
|
93
|
+
<element name="inherit">
|
94
|
+
<text/>
|
95
|
+
</element>
|
96
|
+
</define>
|
97
|
+
<define name="measurementtarget">
|
98
|
+
<element name="measurement-target">
|
99
|
+
<ref name="RequirementSubpart"/>
|
100
|
+
</element>
|
101
|
+
</define>
|
102
|
+
<define name="specification">
|
103
|
+
<element name="specification">
|
104
|
+
<ref name="RequirementSubpart"/>
|
105
|
+
</element>
|
106
|
+
</define>
|
107
|
+
<define name="verification">
|
108
|
+
<element name="verification">
|
109
|
+
<ref name="RequirementSubpart"/>
|
110
|
+
</element>
|
111
|
+
</define>
|
112
|
+
<define name="import">
|
113
|
+
<element name="import">
|
114
|
+
<ref name="RequirementSubpart"/>
|
115
|
+
</element>
|
116
|
+
</define>
|
117
|
+
<define name="description">
|
118
|
+
<element name="description">
|
119
|
+
<ref name="RequirementSubpart"/>
|
120
|
+
</element>
|
121
|
+
</define>
|
122
|
+
<define name="reqt_references">
|
123
|
+
<element name="references">
|
124
|
+
<oneOrMore>
|
125
|
+
<ref name="bibitem"/>
|
126
|
+
</oneOrMore>
|
127
|
+
</element>
|
128
|
+
</define>
|
129
|
+
<define name="RequirementSubpart">
|
130
|
+
<optional>
|
131
|
+
<attribute name="type"/>
|
132
|
+
</optional>
|
133
|
+
<optional>
|
134
|
+
<attribute name="exclude">
|
135
|
+
<data type="boolean"/>
|
136
|
+
</attribute>
|
137
|
+
</optional>
|
138
|
+
<oneOrMore>
|
139
|
+
<ref name="BasicBlock"/>
|
140
|
+
</oneOrMore>
|
141
|
+
</define>
|
142
|
+
<define name="ObligationType">
|
143
|
+
<choice>
|
144
|
+
<value>requirement</value>
|
145
|
+
<value>recommendation</value>
|
146
|
+
<value>permission</value>
|
147
|
+
</choice>
|
148
|
+
</define>
|
149
|
+
<define name="classification">
|
150
|
+
<element name="classification">
|
151
|
+
<ref name="classification_tag"/>
|
152
|
+
<ref name="classification_value"/>
|
153
|
+
</element>
|
154
|
+
</define>
|
155
|
+
<define name="classification_tag">
|
156
|
+
<element name="tag">
|
157
|
+
<text/>
|
158
|
+
</element>
|
159
|
+
</define>
|
160
|
+
<define name="classification_value">
|
161
|
+
<element name="value">
|
162
|
+
<text/>
|
163
|
+
</element>
|
164
|
+
</define>
|
165
|
+
</grammar>
|
data/lib/relaton_iec.rb
CHANGED
@@ -1,14 +1,21 @@
|
|
1
|
+
require "relaton_iso_bib"
|
2
|
+
require "relaton_iec/hit"
|
3
|
+
require "nokogiri"
|
4
|
+
require "net/http"
|
1
5
|
require "relaton_iec/version"
|
2
6
|
require "relaton_iec/iec_bibliography"
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
# return if caller.detect { |c| c.include? "register_gems" }
|
8
|
-
|
9
|
-
# Relaton::Registry.instance.register(RelatonIec::Processor)
|
10
|
-
# end
|
7
|
+
require "relaton_iec/iec_bibliographic_item"
|
8
|
+
require "relaton_iec/xml_parser"
|
9
|
+
require "relaton_iec/hash_converter"
|
10
|
+
require "digest/md5"
|
11
11
|
|
12
12
|
module RelatonIec
|
13
|
-
#
|
13
|
+
# Returns hash of XML reammar
|
14
|
+
# @return [String]
|
15
|
+
def self.grammar_hash
|
16
|
+
gem_path = File.expand_path "..", __dir__
|
17
|
+
grammars_path = File.join gem_path, "grammars", "*"
|
18
|
+
grammars = Dir[grammars_path].sort.map { |gp| File.read gp }.join
|
19
|
+
Digest::MD5.hexdigest grammars
|
20
|
+
end
|
14
21
|
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module RelatonIec
|
2
|
+
class HashConverter < RelatonIsoBib::HashConverter
|
3
|
+
class << self
|
4
|
+
#
|
5
|
+
# Ovverides superclass's method
|
6
|
+
#
|
7
|
+
# @param item [Hash]
|
8
|
+
# @retirn [RelatonIec::IecBibliographicItem]
|
9
|
+
def bib_item(item)
|
10
|
+
IecBibliographicItem.new(item)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
data/lib/relaton_iec/hit.rb
CHANGED
@@ -17,15 +17,10 @@ module RelatonIec
|
|
17
17
|
raise RelatonBib::RequestError, "Could not access http://www.iec.ch"
|
18
18
|
end
|
19
19
|
|
20
|
-
# @param text [String]
|
21
|
-
# @return [Array<IsoBibliographicItem>]
|
22
|
-
# def search_and_fetch(text, year = nil)
|
23
|
-
# Scrapper.get(text, year)
|
24
|
-
# end
|
25
|
-
|
26
20
|
# @param code [String] the ISO standard Code to look up (e..g "ISO 9000")
|
27
21
|
# @param year [String] the year the standard was published (optional)
|
28
|
-
# @param opts [Hash] options; restricted to :all_parts if all-parts
|
22
|
+
# @param opts [Hash] options; restricted to :all_parts if all-parts
|
23
|
+
# reference is required
|
29
24
|
# @return [String] Relaton XML serialisation of reference
|
30
25
|
def get(code, year = nil, opts = {})
|
31
26
|
if year.nil?
|
@@ -43,8 +38,8 @@ module RelatonIec
|
|
43
38
|
ret = iecbib_get1(code, year, opts)
|
44
39
|
return nil if ret.nil?
|
45
40
|
|
46
|
-
ret.to_most_recent_reference unless year || opts[:keep_year]
|
47
|
-
ret.to_all_parts if opts[:all_parts]
|
41
|
+
ret = ret.to_most_recent_reference unless year || opts[:keep_year]
|
42
|
+
ret = ret.to_all_parts if opts[:all_parts]
|
48
43
|
ret
|
49
44
|
end
|
50
45
|
|
@@ -52,15 +47,15 @@ module RelatonIec
|
|
52
47
|
|
53
48
|
def fetch_ref_err(code, year, missed_years)
|
54
49
|
id = year ? "#{code}:#{year}" : code
|
55
|
-
warn "WARNING: no match found online for #{id}. "\
|
50
|
+
warn "[relaton-iec] WARNING: no match found online for #{id}. "\
|
56
51
|
"The code must be exactly like it is on the standards website."
|
57
|
-
warn "(There was no match for #{year}, though there were matches "\
|
52
|
+
warn "[relaton-iec] (There was no match for #{year}, though there were matches "\
|
58
53
|
"found for #{missed_years.join(', ')}.)" unless missed_years.empty?
|
59
54
|
if /\d-\d/ =~ code
|
60
|
-
warn "The provided document part may not exist, or the document "\
|
55
|
+
warn "[relaton-iec] The provided document part may not exist, or the document "\
|
61
56
|
"may no longer be published in parts."
|
62
57
|
else
|
63
|
-
warn "If you wanted to cite all document parts for the reference, "\
|
58
|
+
warn "[relaton-iec] If you wanted to cite all document parts for the reference, "\
|
64
59
|
"use \"#{code} (all parts)\".\nIf the document is not a standard, "\
|
65
60
|
"use its document type abbreviation (TS, TR, PAS, Guide)."
|
66
61
|
end
|
@@ -72,13 +67,13 @@ module RelatonIec
|
|
72
67
|
workers.worker { |w| { i: w[:i], hit: w[:hit].fetch } }
|
73
68
|
s.each_with_index { |hit, i| workers << { i: i, hit: hit } }
|
74
69
|
workers.end
|
75
|
-
workers.result.
|
70
|
+
workers.result.sort_by { |a| a[:i] }.map { |x| x[:hit] }
|
76
71
|
end
|
77
72
|
|
78
73
|
def isobib_search_filter(code)
|
79
74
|
docidrx = %r{^(ISO|IEC)[^0-9]*\s[0-9-]+}
|
80
75
|
corrigrx = %r{^(ISO|IEC)[^0-9]*\s[0-9-]+:[0-9]+/}
|
81
|
-
warn "
|
76
|
+
warn "[relaton-iec] (\"#{code}\") fetching..."
|
82
77
|
result = search(code)
|
83
78
|
result.select do |i|
|
84
79
|
i.hit[:code] &&
|
@@ -147,9 +142,12 @@ module RelatonIec
|
|
147
142
|
|
148
143
|
result = isobib_search_filter(code) || return
|
149
144
|
ret = isobib_results_filter(result, year)
|
150
|
-
|
151
|
-
|
152
|
-
|
145
|
+
if ret[:ret]
|
146
|
+
warn "[relaton-iec] (\"#{code}\") found #{ret[:ret].docidentifier.first.id}"
|
147
|
+
ret[:ret]
|
148
|
+
else
|
149
|
+
fetch_ref_err(code, year, ret[:years])
|
150
|
+
end
|
153
151
|
end
|
154
152
|
end
|
155
153
|
end
|
@@ -12,28 +12,28 @@ module RelatonIec
|
|
12
12
|
# @param code [String]
|
13
13
|
# @param date [String, NilClass] year
|
14
14
|
# @param opts [Hash]
|
15
|
-
# @return [RelatonIsoBib::
|
15
|
+
# @return [RelatonIsoBib::IecBibliographicItem]
|
16
16
|
def get(code, date, opts)
|
17
17
|
::RelatonIec::IecBibliography.get(code, date, opts)
|
18
18
|
end
|
19
19
|
|
20
20
|
# @param xml [String]
|
21
|
-
# @return [RelatonIsoBib::
|
21
|
+
# @return [RelatonIsoBib::IecBibliographicItem]
|
22
22
|
def from_xml(xml)
|
23
|
-
|
23
|
+
RelatonIec::XMLParser.from_xml xml
|
24
24
|
end
|
25
25
|
|
26
26
|
# @param hash [Hash]
|
27
|
-
# @return [RelatonIsoBib::
|
27
|
+
# @return [RelatonIsoBib::IecBibliographicItem]
|
28
28
|
def hash_to_bib(hash)
|
29
|
-
item_hash = ::
|
30
|
-
::
|
29
|
+
item_hash = ::RelatonIec::HashConverter.hash_to_bib(hash)
|
30
|
+
::RelatonIec::IecBibliographicItem.new item_hash
|
31
31
|
end
|
32
32
|
|
33
33
|
# Returns hash of XML grammar
|
34
34
|
# @return [String]
|
35
35
|
def grammar_hash
|
36
|
-
@grammar_hash ||= ::
|
36
|
+
@grammar_hash ||= ::RelatonIec.grammar_hash
|
37
37
|
end
|
38
38
|
end
|
39
39
|
end
|
data/lib/relaton_iec/scrapper.rb
CHANGED
@@ -1,10 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "relaton_iso_bib"
|
4
|
-
require "relaton_iec/hit"
|
5
|
-
require "nokogiri"
|
6
|
-
require "net/http"
|
7
|
-
|
8
3
|
# Capybara.request_driver :poltergeist do |app|
|
9
4
|
# Capybara::Poltergeist::Driver.new app, js_errors: false
|
10
5
|
# end
|
@@ -12,7 +7,6 @@ require "net/http"
|
|
12
7
|
|
13
8
|
module RelatonIec
|
14
9
|
# Scrapper.
|
15
|
-
# rubocop:disable Metrics/ModuleLength
|
16
10
|
module Scrapper
|
17
11
|
DOMAIN = "https://webstore.iec.ch"
|
18
12
|
|
@@ -32,31 +26,22 @@ module RelatonIec
|
|
32
26
|
}.freeze
|
33
27
|
|
34
28
|
class << self
|
35
|
-
#
|
36
|
-
# @return [Array<Hash>]
|
37
|
-
# def get(text)
|
38
|
-
# iso_workers = WorkersPool.new 4
|
39
|
-
# iso_workers.worker { |hit| iso_worker(hit, iso_workers) }
|
40
|
-
# algolia_workers = start_algolia_search(text, iso_workers)
|
41
|
-
# iso_docs = iso_workers.result
|
42
|
-
# algolia_workers.end
|
43
|
-
# algolia_workers.result
|
44
|
-
# iso_docs
|
45
|
-
# end
|
29
|
+
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
46
30
|
|
47
31
|
# Parse page.
|
48
32
|
# @param hit [Hash]
|
49
33
|
# @return [Hash]
|
50
|
-
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
51
34
|
def parse_page(hit_data)
|
52
35
|
doc = get_page hit_data[:url]
|
53
36
|
|
54
37
|
# Fetch edition.
|
55
|
-
edition = doc.at(
|
38
|
+
edition = doc.at(
|
39
|
+
"//th[contains(., 'Edition')]/following-sibling::td/span",
|
40
|
+
).text
|
56
41
|
|
57
42
|
status, relations = fetch_status_relations hit_data[:url]
|
58
43
|
|
59
|
-
|
44
|
+
IecBibliographicItem.new(
|
60
45
|
fetched: Date.today.to_s,
|
61
46
|
docid: [RelatonBib::DocumentIdentifier.new(id: hit_data[:code], type: "IEC")],
|
62
47
|
structuredidentifier: fetch_structuredidentifier(doc),
|
@@ -81,46 +66,6 @@ module RelatonIec
|
|
81
66
|
|
82
67
|
private
|
83
68
|
|
84
|
-
# Start search workers.
|
85
|
-
# @param text[String]
|
86
|
-
# @param iec_workers [Isobib::WorkersPool]
|
87
|
-
# @reaturn [Isobib::WorkersPool]
|
88
|
-
# def start_algolia_search(text, iec_workers)
|
89
|
-
# index = Algolia::Index.new 'all_en'
|
90
|
-
# workers = WorkersPool.new
|
91
|
-
# workers.worker do |page|
|
92
|
-
# algolia_worker(index, text, page, workers, iec_workers)
|
93
|
-
# end
|
94
|
-
|
95
|
-
# # Add first page so search worker will start.
|
96
|
-
# workers << 0
|
97
|
-
# end
|
98
|
-
|
99
|
-
# Fetch ISO documents.
|
100
|
-
# @param hit [Hash]
|
101
|
-
# @param isiso_workers [Isobib::WorkersPool]
|
102
|
-
# def iso_worker(hit, iso_workers)
|
103
|
-
# print "Parse #{iso_workers.size} of #{iso_workers.nb_hits} \r"
|
104
|
-
# parse_page hit
|
105
|
-
# end
|
106
|
-
|
107
|
-
# Fetch hits from algolia search service.
|
108
|
-
# @param index[Algolia::Index]
|
109
|
-
# @param text [String]
|
110
|
-
# @param page [Integer]
|
111
|
-
# @param algolia_workers [Isobib::WorkersPool]
|
112
|
-
# @param isiso_workers [Isobib::WorkersPool]
|
113
|
-
# def algolia_worker(index, text, page, algolia_workers, iso_workers)
|
114
|
-
# res = index.search text, facetFilters: ['category:standard'], page: page
|
115
|
-
# next_page = res['page'] + 1
|
116
|
-
# algolia_workers << next_page if next_page < res['nbPages']
|
117
|
-
# res['hits'].each do |hit|
|
118
|
-
# iso_workers.nb_hits = res['nbHits']
|
119
|
-
# iso_workers << hit
|
120
|
-
# end
|
121
|
-
# iso_workers.end unless next_page < res['nbPages']
|
122
|
-
# end
|
123
|
-
|
124
69
|
# Fetch abstracts.
|
125
70
|
# @param doc [Nokigiri::HTML::Document]
|
126
71
|
# @return [Array<Array>]
|
@@ -134,19 +79,6 @@ module RelatonIec
|
|
134
79
|
}]
|
135
80
|
end
|
136
81
|
|
137
|
-
# Get langs.
|
138
|
-
# @param doc [Nokogiri::HTML::Document]
|
139
|
-
# @return [Array<Hash>]
|
140
|
-
# def langs(doc)
|
141
|
-
# lgs = [{ lang: 'en' }]
|
142
|
-
# doc.css('ul#lang-switcher ul li a').each do |lang_link|
|
143
|
-
# lang_path = lang_link.attr('href')
|
144
|
-
# lang = lang_path.match(%r{^\/(fr)\/})
|
145
|
-
# lgs << { lang: lang[1], path: lang_path } if lang
|
146
|
-
# end
|
147
|
-
# lgs
|
148
|
-
# end
|
149
|
-
|
150
82
|
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
151
83
|
|
152
84
|
# Get page.
|
@@ -154,25 +86,20 @@ module RelatonIec
|
|
154
86
|
# @return [Array<Nokogiri::HTML::Document, String>]
|
155
87
|
def get_page(url)
|
156
88
|
uri = URI url
|
157
|
-
resp = Net::HTTP.get_response(uri)
|
89
|
+
resp = Net::HTTP.get_response(uri)
|
158
90
|
case resp.code
|
159
91
|
when "301"
|
160
92
|
path = resp["location"]
|
161
93
|
url = DOMAIN + path
|
162
94
|
uri = URI url
|
163
|
-
resp = Net::HTTP.get_response(uri)
|
95
|
+
resp = Net::HTTP.get_response(uri)
|
164
96
|
when "404"
|
165
97
|
raise RelatonBib::RequestError, "Page not found #{url}"
|
166
98
|
end
|
167
|
-
# n = 0
|
168
|
-
# while resp.body !~ /<strong/ && n < 10
|
169
|
-
# resp = Net::HTTP.get_response(uri)#.encode("UTF-8")
|
170
|
-
# n += 1
|
171
|
-
# end
|
172
99
|
Nokogiri::HTML(resp.body)
|
173
|
-
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
|
174
|
-
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
175
|
-
OpenSSL::SSL::SSLError
|
100
|
+
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
|
101
|
+
EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
102
|
+
Net::ProtocolError, OpenSSL::SSL::SSLError
|
176
103
|
raise RelatonBib::RequestError, "Could not access #{url}"
|
177
104
|
end
|
178
105
|
# rubocop:enable Metrics/AbcSize
|
@@ -211,15 +138,12 @@ module RelatonIec
|
|
211
138
|
statuses = YAML.load_file "lib/relaton_iec/statuses.yml"
|
212
139
|
s = wip.at("STAGE").text
|
213
140
|
stage, substage = statuses[s]["stage"].split "."
|
214
|
-
# status = statuses[s]["status"]
|
215
141
|
else
|
216
|
-
# status = "Published"
|
217
142
|
stage = "60"
|
218
143
|
substage = "60"
|
219
144
|
end
|
220
145
|
RelatonBib::DocumentStatus.new(stage: stage, substage: substage)
|
221
146
|
end
|
222
|
-
# rubocop:enable Metrics/MethodLength
|
223
147
|
|
224
148
|
# Fetch workgroup.
|
225
149
|
# @param doc [Nokogiri::HTML::Document]
|
@@ -237,27 +161,26 @@ module RelatonIec
|
|
237
161
|
}],
|
238
162
|
}
|
239
163
|
end
|
164
|
+
# rubocop:enable Metrics/MethodLength
|
240
165
|
|
241
166
|
# Fetch relations.
|
242
167
|
# @param doc [Nokogiri::HTML::Document]
|
243
168
|
# @return [Array<Hash>]
|
244
169
|
# rubocop:disable Metrics/MethodLength
|
245
170
|
def fetch_relations(doc)
|
246
|
-
doc.xpath('//ROW[STATUS[.!="PREPARING"]][STATUS[.!="PUBLISHED"]]').
|
171
|
+
doc.xpath('//ROW[STATUS[.!="PREPARING"]][STATUS[.!="PUBLISHED"]]').
|
172
|
+
map do |r|
|
247
173
|
r_type = r.at("STATUS").text.downcase
|
248
174
|
type = case r_type
|
249
|
-
|
175
|
+
# when 'published' then 'obsoletes' # Valid
|
250
176
|
when "revised", "replaced" then "updates"
|
251
177
|
when "withdrawn" then "obsoletes"
|
252
178
|
else r_type
|
253
179
|
end
|
254
|
-
# url = DOMAIN + "/publication/" + r.at("PUB_ID").text
|
255
180
|
fref = RelatonBib::FormattedRef.new(
|
256
181
|
content: r.at("FULL_NAME").text, format: "text/plain",
|
257
182
|
)
|
258
|
-
bibitem =
|
259
|
-
formattedref: fref,
|
260
|
-
)
|
183
|
+
bibitem = IecBibliographicItem.new(formattedref: fref)
|
261
184
|
{ type: type, bibitem: bibitem }
|
262
185
|
end
|
263
186
|
end
|
@@ -272,22 +195,6 @@ module RelatonIec
|
|
272
195
|
status = fetch_status doc
|
273
196
|
relations = fetch_relations doc
|
274
197
|
[status, relations]
|
275
|
-
# doc.css('ul.steps li').inject([]) do |a, r|
|
276
|
-
# r_type = r.css('strong').text
|
277
|
-
# type = case r_type
|
278
|
-
# when 'Previously', 'Will be replaced by' then 'obsoletes'
|
279
|
-
# when 'Corrigenda/Amendments', 'Revised by', 'Now confirmed'
|
280
|
-
# 'updates'
|
281
|
-
# else r_type
|
282
|
-
# end
|
283
|
-
# if ['Now', 'Now under review'].include? type
|
284
|
-
# a
|
285
|
-
# else
|
286
|
-
# a + r.css('a').map do |id|
|
287
|
-
# { type: type, identifier: id.text, url: id['href'] }
|
288
|
-
# end
|
289
|
-
# end
|
290
|
-
# end
|
291
198
|
end
|
292
199
|
# rubocop:enable Metrics/MethodLength
|
293
200
|
|
@@ -295,66 +202,18 @@ module RelatonIec
|
|
295
202
|
# @param doc [Nokogiri::HTML::Document]
|
296
203
|
# @return [String]
|
297
204
|
def fetch_type(doc)
|
298
|
-
doc.at(
|
299
|
-
|
300
|
-
|
301
|
-
# /NP)*\s|/)(TS|TR|PAS|AWI|CD|FDIS|NP|DIS|WD|R|Guide|(?=\d+))}x)
|
302
|
-
# #return "international-standard" if type_match.nil?
|
303
|
-
# if TYPES[type_match[2]]
|
304
|
-
# TYPES[type_match[2]]
|
305
|
-
# elsif type_match[1]
|
306
|
-
# elsif type_match[1] == 'ISO'
|
307
|
-
# 'international-standard'
|
308
|
-
# elsif type_match[1] == 'IWA'
|
309
|
-
# 'international-workshop-agreement'
|
310
|
-
# end
|
311
|
-
# # rescue => _e
|
312
|
-
# # puts 'Unknown document type: ' + title
|
205
|
+
doc.at(
|
206
|
+
'//th[contains(., "Publication type")]/following-sibling::td/span',
|
207
|
+
).text.downcase.tr " ", "-"
|
313
208
|
end
|
314
209
|
|
315
210
|
# Fetch titles.
|
316
211
|
# @param hit_data [Hash]
|
317
212
|
# @return [Array<Hash>]
|
318
213
|
def fetch_titles(hit_data)
|
319
|
-
|
320
|
-
case titles.size
|
321
|
-
when 0
|
322
|
-
intro, main, part = nil, "", nil
|
323
|
-
when 1
|
324
|
-
intro, main, part = nil, titles[0], nil
|
325
|
-
when 2
|
326
|
-
if /^(Part|Partie) \d+:/ =~ titles[1]
|
327
|
-
intro, main, part = nil, titles[0], titles[1]
|
328
|
-
else
|
329
|
-
intro, main, part = titles[0], titles[1], nil
|
330
|
-
end
|
331
|
-
when 3
|
332
|
-
if /^(Part|Partie) \d+:/ =~ titles[1]
|
333
|
-
intro, main, part = nil, titles[0], titles[1..2].join(" - ")
|
334
|
-
else
|
335
|
-
intro, main, part = titles[0], titles[1], titles[2]
|
336
|
-
end
|
337
|
-
else
|
338
|
-
intro, main, part = titles[0], titles[1], titles[2..-1]&.join(" -- ")
|
339
|
-
end
|
340
|
-
[{
|
341
|
-
title_intro: intro,
|
342
|
-
title_main: main,
|
343
|
-
title_part: part,
|
344
|
-
language: "en",
|
345
|
-
script: "Latn"
|
346
|
-
}]
|
214
|
+
RelatonBib::TypedTitleString.from_string hit_data[:title], "en", "Latn"
|
347
215
|
end
|
348
216
|
|
349
|
-
# Return ISO script code.
|
350
|
-
# @param lang [String]
|
351
|
-
# @return [String]
|
352
|
-
# def script(lang)
|
353
|
-
# case lang
|
354
|
-
# when 'en', 'fr' then 'Latn'
|
355
|
-
# end
|
356
|
-
# end
|
357
|
-
|
358
217
|
# Fetch dates
|
359
218
|
# @param doc [Nokogiri::HTML::Document]
|
360
219
|
# @return [Array<Hash>]
|
@@ -367,6 +226,8 @@ module RelatonIec
|
|
367
226
|
dates
|
368
227
|
end
|
369
228
|
|
229
|
+
# rubocop:disable Metrics/MethodLength
|
230
|
+
|
370
231
|
def fetch_contributors(code)
|
371
232
|
code.sub(/\s.*/, "").split("/").map do |abbrev|
|
372
233
|
case abbrev
|
@@ -381,12 +242,15 @@ module RelatonIec
|
|
381
242
|
role: [type: "publisher"] }
|
382
243
|
end
|
383
244
|
end
|
245
|
+
# rubocop:enable Metrics/MethodLength
|
384
246
|
|
385
247
|
# Fetch ICS.
|
386
248
|
# @param doc [Nokogiri::HTML::Document]
|
387
249
|
# @return [Array<Hash>]
|
388
250
|
def fetch_ics(doc)
|
389
|
-
doc.xpath(
|
251
|
+
doc.xpath(
|
252
|
+
'//th[contains(text(), "ICS")]/following-sibling::td/a',
|
253
|
+
).map do |i|
|
390
254
|
code = i.text.match(/[\d\.]+/).to_s.split "."
|
391
255
|
{ field: code[0], group: code[1], subgroup: code[2] }
|
392
256
|
end
|
@@ -403,9 +267,11 @@ module RelatonIec
|
|
403
267
|
links
|
404
268
|
end
|
405
269
|
|
270
|
+
# rubocop:disable Metrics/MethodLength
|
271
|
+
|
406
272
|
# Fetch copyright.
|
407
273
|
# @param title [String]
|
408
|
-
# @return [Hash]
|
274
|
+
# @return [Array<Hash>]
|
409
275
|
def fetch_copyright(code, doc)
|
410
276
|
abbreviation = code.match(/.*?(?=\s)/).to_s
|
411
277
|
case abbreviation
|
@@ -415,12 +281,15 @@ module RelatonIec
|
|
415
281
|
end
|
416
282
|
from = code.match(/(?<=:)\d{4}/).to_s
|
417
283
|
if from.empty?
|
418
|
-
from = doc.xpath("//span[@itemprop='releaseDate']").text
|
419
|
-
|
284
|
+
from = doc.xpath("//span[@itemprop='releaseDate']").text.
|
285
|
+
match(/\d{4}/).to_s
|
420
286
|
end
|
421
|
-
{
|
287
|
+
[{
|
288
|
+
owner: [{ name: name, abbreviation: abbreviation, url: url }],
|
289
|
+
from: from,
|
290
|
+
}]
|
422
291
|
end
|
292
|
+
# rubocop:enable Metrics/MethodLength
|
423
293
|
end
|
424
294
|
end
|
425
|
-
# rubocop:enable Metrics/ModuleLength
|
426
295
|
end
|