relaton-ecma 1.13.0 → 1.14.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +0 -1
- data/.github/workflows/release.yml +22 -0
- data/Gemfile +7 -1
- data/README.adoc +81 -37
- data/grammars/basicdoc.rng +3 -28
- data/grammars/biblio-standoc.rng +164 -0
- data/grammars/biblio.rng +94 -21
- data/grammars/relaton-ecma-compile.rng +11 -0
- data/grammars/relaton-ecma.rng +4 -0
- data/lib/relaton_ecma/bibliographic_item.rb +12 -0
- data/lib/relaton_ecma/data_fetcher.rb +97 -0
- data/lib/relaton_ecma/data_parser.rb +215 -0
- data/lib/relaton_ecma/ecma_bibliography.rb +53 -7
- data/lib/relaton_ecma/hash_converter.rb +11 -0
- data/lib/relaton_ecma/processor.rb +18 -5
- data/lib/relaton_ecma/version.rb +1 -1
- data/lib/relaton_ecma/xml_parser.rb +13 -0
- data/lib/relaton_ecma.rb +6 -1
- data/relaton_ecma.gemspec +3 -8
- metadata +27 -63
- data/grammars/isodoc.rng +0 -2807
- data/grammars/reqt.rng +0 -223
- data/lib/relaton_ecma/scrapper.rb +0 -27
data/grammars/biblio.rng
CHANGED
@@ -33,9 +33,10 @@
|
|
33
33
|
<param name="pattern">([\+\-]?\d{4})((-?)((0[1-9]|1[0-2])((-?)([12]\d|0[1-9]|3[01]))?|W([0-4]\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\d|[12]\d{2}|3([0-5]\d|6[1-6]))))?</param>
|
34
34
|
</data>
|
35
35
|
</define>
|
36
|
-
|
37
|
-
|
38
|
-
|
36
|
+
<!-- start = bibitem -->
|
37
|
+
<define name="BibData">
|
38
|
+
<ref name="BibliographicItem"/>
|
39
|
+
</define>
|
39
40
|
<define name="status">
|
40
41
|
<element name="status">
|
41
42
|
<ref name="stage"/>
|
@@ -73,8 +74,14 @@
|
|
73
74
|
<text/>
|
74
75
|
</element>
|
75
76
|
</define>
|
76
|
-
<define name="
|
77
|
+
<define name="locale">
|
77
78
|
<a:documentation>ISO-639</a:documentation>
|
79
|
+
<element name="locale">
|
80
|
+
<text/>
|
81
|
+
</element>
|
82
|
+
</define>
|
83
|
+
<define name="script">
|
84
|
+
<a:documentation>ISO-3166</a:documentation>
|
78
85
|
<element name="script">
|
79
86
|
<text/>
|
80
87
|
</element>
|
@@ -93,6 +100,9 @@
|
|
93
100
|
<!-- multiple languages and scripts possible: comma delimit them if so -->
|
94
101
|
<attribute name="language"/>
|
95
102
|
</optional>
|
103
|
+
<optional>
|
104
|
+
<attribute name="locale"/>
|
105
|
+
</optional>
|
96
106
|
<optional>
|
97
107
|
<attribute name="script"/>
|
98
108
|
</optional>
|
@@ -136,6 +146,9 @@
|
|
136
146
|
<!-- multiple languages and scripts possible: comma delimit them if so -->
|
137
147
|
<attribute name="language"/>
|
138
148
|
</optional>
|
149
|
+
<optional>
|
150
|
+
<attribute name="locale"/>
|
151
|
+
</optional>
|
139
152
|
<optional>
|
140
153
|
<attribute name="script"/>
|
141
154
|
</optional>
|
@@ -158,27 +171,30 @@
|
|
158
171
|
</define>
|
159
172
|
<define name="contributor">
|
160
173
|
<element name="contributor">
|
161
|
-
<
|
174
|
+
<oneOrMore>
|
162
175
|
<ref name="role"/>
|
163
|
-
</
|
176
|
+
</oneOrMore>
|
164
177
|
<ref name="ContributorInfo"/>
|
165
178
|
</element>
|
166
179
|
</define>
|
167
180
|
<define name="role">
|
168
181
|
<element name="role">
|
169
|
-
<
|
170
|
-
<
|
171
|
-
<
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
</
|
180
|
-
|
181
|
-
|
182
|
+
<attribute name="type">
|
183
|
+
<choice>
|
184
|
+
<value>author</value>
|
185
|
+
<value>performer</value>
|
186
|
+
<value>publisher</value>
|
187
|
+
<value>editor</value>
|
188
|
+
<value>adapter</value>
|
189
|
+
<value>translator</value>
|
190
|
+
<value>distributor</value>
|
191
|
+
<value>realizer</value>
|
192
|
+
<value>owner</value>
|
193
|
+
<value>authorizer</value>
|
194
|
+
<value>enabler</value>
|
195
|
+
<value>subject</value>
|
196
|
+
</choice>
|
197
|
+
</attribute>
|
182
198
|
<zeroOrMore>
|
183
199
|
<ref name="roledescription"/>
|
184
200
|
</zeroOrMore>
|
@@ -200,6 +216,9 @@
|
|
200
216
|
<optional>
|
201
217
|
<ref name="fullname"/>
|
202
218
|
</optional>
|
219
|
+
<zeroOrMore>
|
220
|
+
<ref name="credential"/>
|
221
|
+
</zeroOrMore>
|
203
222
|
<zeroOrMore>
|
204
223
|
<ref name="affiliation"/>
|
205
224
|
</zeroOrMore>
|
@@ -216,6 +235,11 @@
|
|
216
235
|
<ref name="FullNameType"/>
|
217
236
|
</element>
|
218
237
|
</define>
|
238
|
+
<define name="credential">
|
239
|
+
<element name="credential">
|
240
|
+
<text/>
|
241
|
+
</element>
|
242
|
+
</define>
|
219
243
|
<define name="FullNameType">
|
220
244
|
<choice>
|
221
245
|
<group>
|
@@ -289,7 +313,9 @@
|
|
289
313
|
<zeroOrMore>
|
290
314
|
<ref name="affiliationdescription"/>
|
291
315
|
</zeroOrMore>
|
292
|
-
<
|
316
|
+
<optional>
|
317
|
+
<ref name="organization"/>
|
318
|
+
</optional>
|
293
319
|
</element>
|
294
320
|
</define>
|
295
321
|
<define name="affiliationname">
|
@@ -500,6 +526,17 @@
|
|
500
526
|
</define>
|
501
527
|
<define name="localityStack">
|
502
528
|
<element name="localityStack">
|
529
|
+
<optional>
|
530
|
+
<attribute name="connective">
|
531
|
+
<choice>
|
532
|
+
<value>and</value>
|
533
|
+
<value>or</value>
|
534
|
+
<value>from</value>
|
535
|
+
<value>to</value>
|
536
|
+
<value/>
|
537
|
+
</choice>
|
538
|
+
</attribute>
|
539
|
+
</optional>
|
503
540
|
<zeroOrMore>
|
504
541
|
<ref name="locality"/>
|
505
542
|
</zeroOrMore>
|
@@ -512,6 +549,17 @@
|
|
512
549
|
</define>
|
513
550
|
<define name="sourceLocalityStack">
|
514
551
|
<element name="sourceLocalityStack">
|
552
|
+
<optional>
|
553
|
+
<attribute name="connective">
|
554
|
+
<choice>
|
555
|
+
<value>and</value>
|
556
|
+
<value>or</value>
|
557
|
+
<value>from</value>
|
558
|
+
<value>to</value>
|
559
|
+
<value/>
|
560
|
+
</choice>
|
561
|
+
</attribute>
|
562
|
+
</optional>
|
515
563
|
<zeroOrMore>
|
516
564
|
<ref name="sourceLocality"/>
|
517
565
|
</zeroOrMore>
|
@@ -614,6 +662,9 @@
|
|
614
662
|
<ref name="BibItemType"/>
|
615
663
|
</attribute>
|
616
664
|
</optional>
|
665
|
+
<optional>
|
666
|
+
<attribute name="schema-version"/>
|
667
|
+
</optional>
|
617
668
|
<optional>
|
618
669
|
<ref name="fetched"/>
|
619
670
|
</optional>
|
@@ -650,6 +701,9 @@
|
|
650
701
|
<zeroOrMore>
|
651
702
|
<ref name="language"/>
|
652
703
|
</zeroOrMore>
|
704
|
+
<zeroOrMore>
|
705
|
+
<ref name="locale"/>
|
706
|
+
</zeroOrMore>
|
653
707
|
<zeroOrMore>
|
654
708
|
<ref name="script"/>
|
655
709
|
</zeroOrMore>
|
@@ -705,6 +759,9 @@
|
|
705
759
|
<ref name="BibItemType"/>
|
706
760
|
</attribute>
|
707
761
|
</optional>
|
762
|
+
<optional>
|
763
|
+
<attribute name="schema-version"/>
|
764
|
+
</optional>
|
708
765
|
<optional>
|
709
766
|
<ref name="fetched"/>
|
710
767
|
</optional>
|
@@ -741,6 +798,9 @@
|
|
741
798
|
<zeroOrMore>
|
742
799
|
<ref name="language"/>
|
743
800
|
</zeroOrMore>
|
801
|
+
<zeroOrMore>
|
802
|
+
<ref name="locale"/>
|
803
|
+
</zeroOrMore>
|
744
804
|
<zeroOrMore>
|
745
805
|
<ref name="script"/>
|
746
806
|
</zeroOrMore>
|
@@ -854,6 +914,15 @@
|
|
854
914
|
<optional>
|
855
915
|
<attribute name="type"/>
|
856
916
|
</optional>
|
917
|
+
<optional>
|
918
|
+
<attribute name="language"/>
|
919
|
+
</optional>
|
920
|
+
<optional>
|
921
|
+
<attribute name="locale"/>
|
922
|
+
</optional>
|
923
|
+
<optional>
|
924
|
+
<attribute name="script"/>
|
925
|
+
</optional>
|
857
926
|
<data type="anyURI"/>
|
858
927
|
</define>
|
859
928
|
<define name="DateType">
|
@@ -882,6 +951,7 @@
|
|
882
951
|
<value>vote-started</value>
|
883
952
|
<value>vote-ended</value>
|
884
953
|
<value>announced</value>
|
954
|
+
<value>stable-until</value>
|
885
955
|
</choice>
|
886
956
|
</define>
|
887
957
|
<define name="bdate">
|
@@ -930,6 +1000,9 @@
|
|
930
1000
|
<optional>
|
931
1001
|
<attribute name="language"/>
|
932
1002
|
</optional>
|
1003
|
+
<optional>
|
1004
|
+
<attribute name="locale"/>
|
1005
|
+
</optional>
|
933
1006
|
<optional>
|
934
1007
|
<attribute name="script"/>
|
935
1008
|
</optional>
|
@@ -1253,7 +1326,7 @@
|
|
1253
1326
|
<value>commentaryOf</value>
|
1254
1327
|
<value>hasCommentary</value>
|
1255
1328
|
<value>related</value>
|
1256
|
-
<value>
|
1329
|
+
<value>hasComplement</value>
|
1257
1330
|
<value>complementOf</value>
|
1258
1331
|
<value>obsoletes</value>
|
1259
1332
|
<value>obsoletedBy</value>
|
@@ -0,0 +1,11 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<grammar xmlns="http://relaxng.org/ns/structure/1.0">
|
3
|
+
<include href="basicdoc.rng"/>
|
4
|
+
<include href="relaton-ecma.rng"/>
|
5
|
+
<start>
|
6
|
+
<choice>
|
7
|
+
<ref name="bibitem"/>
|
8
|
+
<ref name="bibdata"/>
|
9
|
+
</choice>
|
10
|
+
</start>
|
11
|
+
</grammar>
|
@@ -0,0 +1,97 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "English"
|
4
|
+
require "mechanize"
|
5
|
+
require "relaton_ecma"
|
6
|
+
|
7
|
+
module RelatonEcma
|
8
|
+
class DataFetcher
|
9
|
+
URL = "https://www.ecma-international.org/publications-and-standards/"
|
10
|
+
|
11
|
+
# @param [String] :output directory to output documents
|
12
|
+
# @param [String] :format output format (xml, yaml, bibxml)
|
13
|
+
def initialize(output: "data", format: "yaml")
|
14
|
+
@output = output
|
15
|
+
@format = format
|
16
|
+
@ext = format.sub(/^bib/, "")
|
17
|
+
@files = []
|
18
|
+
@index = Relaton::Index.find_or_create :ECMA
|
19
|
+
@agent = Mechanize.new
|
20
|
+
@agent.user_agent_alias = Mechanize::AGENT_ALIASES.keys[rand(21)]
|
21
|
+
end
|
22
|
+
|
23
|
+
# @param bib [RelatonItu::ItuBibliographicItem]
|
24
|
+
def write_file(bib) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
25
|
+
id = bib.docidentifier[0].id.gsub(%r{[/\s]}, "_")
|
26
|
+
id += "-#{bib.edition.content.gsub('.', '_')}" if bib.edition
|
27
|
+
extent = bib.extent.detect { |e| e.type == "volume" }
|
28
|
+
id += "-#{extent.reference_from}" if extent
|
29
|
+
file = "#{@output}/#{id}.#{@ext}"
|
30
|
+
if @files.include? file
|
31
|
+
warn "Duplicate file #{file}"
|
32
|
+
else
|
33
|
+
@files << file
|
34
|
+
File.write file, render_doc(bib), encoding: "UTF-8"
|
35
|
+
@index.add_or_update index_id(bib), file
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def index_id(bib)
|
40
|
+
{ id: bib.docidentifier[0].id }.tap do |i|
|
41
|
+
i[:ed] = bib.edition.content if bib.edition
|
42
|
+
extent = bib.extent.detect { |e| e.type == "volume" }
|
43
|
+
i[:vol] = extent.reference_from if extent
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def render_doc(bib)
|
48
|
+
case @format
|
49
|
+
when "yaml" then bib.to_hash.to_yaml
|
50
|
+
when "xml" then bib.to_xml bibdata: true
|
51
|
+
when "bibxml" then bib.to_bibxml
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
# @param hit [Nokogiri::XML::Element]
|
56
|
+
def parse_page(hit) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
57
|
+
DataParser.new(hit).parse.each { |item| write_file item }
|
58
|
+
end
|
59
|
+
|
60
|
+
# @param type [String]
|
61
|
+
def html_index(type) # rubocop:disable Metrics/MethodLength
|
62
|
+
result = @agent.get "#{URL}#{type}/"
|
63
|
+
# @last_call_time = Time.now
|
64
|
+
result.xpath(
|
65
|
+
"//li/span[1]/a",
|
66
|
+
"//div[contains(@class, 'entry-content-wrapper')][.//a[.='Download']]",
|
67
|
+
).each do |hit|
|
68
|
+
# workers << hit
|
69
|
+
parse_page(hit)
|
70
|
+
rescue StandardError => e
|
71
|
+
warn e.message
|
72
|
+
warn e.backtrace
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
#
|
77
|
+
# Fetch data from Ecma website.
|
78
|
+
#
|
79
|
+
# @return [void]
|
80
|
+
#
|
81
|
+
def fetch
|
82
|
+
t1 = Time.now
|
83
|
+
puts "Started at: #{t1}"
|
84
|
+
|
85
|
+
FileUtils.mkdir_p @output
|
86
|
+
|
87
|
+
html_index "standards"
|
88
|
+
html_index "technical-reports"
|
89
|
+
html_index "mementos"
|
90
|
+
@index.save
|
91
|
+
|
92
|
+
t2 = Time.now
|
93
|
+
puts "Stopped at: #{t2}"
|
94
|
+
puts "Done in: #{(t2 - t1).round} sec."
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
@@ -0,0 +1,215 @@
|
|
1
|
+
module RelatonEcma
|
2
|
+
class DataParser
|
3
|
+
MATTRS = %i[docid title date link].freeze
|
4
|
+
ATTRS = MATTRS + %i[abstract relation edition].freeze
|
5
|
+
|
6
|
+
#
|
7
|
+
# Initialize parser
|
8
|
+
#
|
9
|
+
# @param [Nokogiri::XML::Element] hit document hit
|
10
|
+
#
|
11
|
+
def initialize(hit)
|
12
|
+
@hit = hit
|
13
|
+
@bib = {
|
14
|
+
type: "standard", language: ["en"], script: ["Latn"], place: ["Geneva"], doctype: "document"
|
15
|
+
}
|
16
|
+
@agent = Mechanize.new
|
17
|
+
end
|
18
|
+
|
19
|
+
def parse # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
20
|
+
if @hit[:href]
|
21
|
+
@agent.user_agent_alias = Mechanize::AGENT_ALIASES.keys[rand(21)]
|
22
|
+
@doc = get_page @hit[:href]
|
23
|
+
ATTRS.each { |a| @bib[a] = send "fetch_#{a}" }
|
24
|
+
else
|
25
|
+
MATTRS.each { |a| @bib[a] = send "fetch_mem_#{a}" }
|
26
|
+
end
|
27
|
+
@bib[:contributor] = contributor
|
28
|
+
items = [BibliographicItem.new(**@bib)]
|
29
|
+
items + parse_editions
|
30
|
+
end
|
31
|
+
|
32
|
+
#
|
33
|
+
# Get page with retries
|
34
|
+
#
|
35
|
+
# @param [String] url url to fetch
|
36
|
+
#
|
37
|
+
# @return [Mechanize::Page] document
|
38
|
+
#
|
39
|
+
def get_page(url)
|
40
|
+
3.times do |n|
|
41
|
+
sleep n
|
42
|
+
doc = @agent.get url
|
43
|
+
return doc
|
44
|
+
rescue StandardError => e
|
45
|
+
warn e.message
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
#
|
50
|
+
# Parse editions
|
51
|
+
#
|
52
|
+
# @param [Mechanize::Page] doc document
|
53
|
+
# @param [Hash] bib bibliographic item the last edition
|
54
|
+
#
|
55
|
+
# @return [void]
|
56
|
+
#
|
57
|
+
def parse_editions # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
58
|
+
return [] unless @doc
|
59
|
+
|
60
|
+
docid = @bib[:docid]
|
61
|
+
@doc.xpath('//div[@id="main"]/div[1]/div/main/article/div/div/standard/div/ul/li').map do |hit|
|
62
|
+
id, ed, @bib[:date], vol = edition_id_parts hit.at("./span", "./a").text
|
63
|
+
@bib[:link] = edition_link(hit) + edition_translation_link(ed)
|
64
|
+
next if ed.nil? || ed.empty?
|
65
|
+
|
66
|
+
@bib[:docid] = id.nil? || id.empty? ? docid : fetch_docid(id)
|
67
|
+
@bib[:edition] = RelatonBib::Edition.new(content: ed)
|
68
|
+
@bib[:extent] = vol && [RelatonBib::Locality.new("volume", vol)]
|
69
|
+
BibliographicItem.new(**@bib)
|
70
|
+
end.compact
|
71
|
+
end
|
72
|
+
|
73
|
+
def edition_link(hit)
|
74
|
+
{ "src" => hit.at("./a"), "pdf" => hit.at("./span/a") }.map do |type, a|
|
75
|
+
RelatonBib::TypedUri.new(type: type, content: a[:href]) if a
|
76
|
+
end.compact
|
77
|
+
end
|
78
|
+
|
79
|
+
#
|
80
|
+
# Parse edition and date
|
81
|
+
#
|
82
|
+
# @param [String] text identifier text
|
83
|
+
#
|
84
|
+
# @return [Array<String,nil,Array<RelatonBib::BibliographicDate>>] edition and date
|
85
|
+
#
|
86
|
+
def edition_id_parts(text) # rubocop:disable Metrics/MethodLength
|
87
|
+
%r{^
|
88
|
+
(?<id>\w+(?:[\d-]+|\sTR/\d+)),?\s
|
89
|
+
(?:Volume\s(?<vol>[\d.]+),?\s)?
|
90
|
+
(?<ed>[\d.]+)(?:st|nd|rd|th)?\sedition
|
91
|
+
(?:[,.]\s(?<dt>\w+\s\d+))?
|
92
|
+
}x =~ text
|
93
|
+
date = [dt].compact.map do |d|
|
94
|
+
on = Date.strptime(d, "%B %Y").strftime("%Y-%m")
|
95
|
+
RelatonBib::BibliographicDate.new(type: "published", on: on)
|
96
|
+
end
|
97
|
+
[id, ed, date, vol]
|
98
|
+
end
|
99
|
+
|
100
|
+
# @return [Array<RelatonBib::DocumentIdentifier>]
|
101
|
+
def fetch_docid(id = nil)
|
102
|
+
id ||= @hit.text
|
103
|
+
[RelatonBib::DocumentIdentifier.new(type: "ECMA", id: id, primary: true)]
|
104
|
+
end
|
105
|
+
|
106
|
+
# @return [Array<RelatonBib::TypedUri>]
|
107
|
+
def fetch_link # rubocop:disable Metrics/AbcSize
|
108
|
+
link = []
|
109
|
+
link << RelatonBib::TypedUri.new(type: "src", content: @hit[:href]) if @hit[:href]
|
110
|
+
ref = @doc.at('//div[@class="ecma-item-content-wrapper"]/span/a',
|
111
|
+
'//div[@class="ecma-item-content-wrapper"]/a')
|
112
|
+
link << RelatonBib::TypedUri.new(type: "pdf", content: ref[:href]) if ref
|
113
|
+
link + edition_translation_link(@bib[:edition]&.content)
|
114
|
+
end
|
115
|
+
|
116
|
+
def fetch_mem_link
|
117
|
+
@hit.xpath("./div/section/div/p/a").map do |a|
|
118
|
+
RelatonBib::TypedUri.new(type: "pdf", content: a[:href])
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
def edition_translation_link(edition)
|
123
|
+
translation_link.select { |l| l[:ed] == edition }.map { |l| l[:link] }
|
124
|
+
end
|
125
|
+
|
126
|
+
def translation_link
|
127
|
+
return [] unless @doc
|
128
|
+
|
129
|
+
@translation_link ||= @doc.xpath("//main/article/div/div/standard/div[2]/ul/li").map do |l|
|
130
|
+
a = l.at("span/a")
|
131
|
+
id = l.at("span").text
|
132
|
+
%r{\w+[\d-]+,\s(?<lang>\w+)\sversion,\s(?<ed>[\d.]+)(?:st|nd|rd|th)\sedition} =~ id
|
133
|
+
case lang
|
134
|
+
when "Japanese"
|
135
|
+
{ ed: ed, link: RelatonBib::TypedUri.new(type: "pdf", language: "ja", script: "Jpan", content: a[:href]) }
|
136
|
+
end
|
137
|
+
end.compact
|
138
|
+
end
|
139
|
+
|
140
|
+
# @return [Array<Hash>]
|
141
|
+
def fetch_title
|
142
|
+
@doc.xpath('//p[@class="ecma-item-short-description"]').map do |t|
|
143
|
+
{ content: t.text.strip, language: "en", script: "Latn" }
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
# @return [Array<RelatonBib::FormattedString>]
|
148
|
+
def fetch_abstract
|
149
|
+
content = @doc.xpath('//div[@class="ecma-item-content"]/p').map do |a|
|
150
|
+
a.text.strip.squeeze(" ").gsub(/\r\n/, "")
|
151
|
+
end.join "\n"
|
152
|
+
return [] if content.empty?
|
153
|
+
|
154
|
+
[RelatonBib::FormattedString.new(content: content, language: "en", script: "Latn")]
|
155
|
+
end
|
156
|
+
|
157
|
+
# @return [Array<RelatonBib::BibliographicDate>]
|
158
|
+
def fetch_date
|
159
|
+
@doc.xpath('//p[@class="ecma-item-edition"]').map do |d|
|
160
|
+
date = d.text.split(", ").last
|
161
|
+
RelatonBib::BibliographicDate.new type: "published", on: date
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
# @return [Array<Hash>]
|
166
|
+
def fetch_relation # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity
|
167
|
+
@doc.xpath("//ul[@class='ecma-item-archives']/li").map do |rel|
|
168
|
+
ref, ed, date, vol = edition_id_parts rel.at("span").text
|
169
|
+
next if ed.nil? || ed.empty?
|
170
|
+
|
171
|
+
fref = RelatonBib::FormattedRef.new content: ref, language: "en", script: "Latn"
|
172
|
+
docid = RelatonBib::DocumentIdentifier.new(type: "ECMA", id: ref, primary: true)
|
173
|
+
link = rel.xpath("span/a").map { |l| RelatonBib::TypedUri.new type: "pdf", content: l[:href] }
|
174
|
+
edition = RelatonBib::Edition.new content: ed
|
175
|
+
extent = vol && [RelatonBib::Locality.new("volume", vol)]
|
176
|
+
bibitem = BibliographicItem.new(
|
177
|
+
docid: [docid], formattedref: fref, date: date, edition: edition,
|
178
|
+
link: link, extent: extent
|
179
|
+
)
|
180
|
+
{ type: "updates", bibitem: bibitem }
|
181
|
+
end.compact
|
182
|
+
end
|
183
|
+
|
184
|
+
#
|
185
|
+
# @return [RelatonBib::Edition, nil]
|
186
|
+
#
|
187
|
+
def fetch_edition
|
188
|
+
cnt = @doc.at('//p[@class="ecma-item-edition"]')&.text&.match(/^\d+(?=(?:st|nd|th|rd))/)&.to_s
|
189
|
+
RelatonBib::Edition.new(content: cnt) if cnt && !cnt.empty?
|
190
|
+
end
|
191
|
+
|
192
|
+
def contributor
|
193
|
+
org = RelatonBib::Organization.new name: "Ecma International"
|
194
|
+
[{ entity: org, role: [{ type: "publisher" }] }]
|
195
|
+
end
|
196
|
+
|
197
|
+
# @return [Array<RelatonBib::DocumentIdentifier>]
|
198
|
+
def fetch_mem_docid
|
199
|
+
code = "ECMA MEM/#{@hit.at('div[1]//p').text}"
|
200
|
+
fetch_docid code
|
201
|
+
end
|
202
|
+
|
203
|
+
def fetch_mem_date
|
204
|
+
date = @hit.at("div[2]//p").text
|
205
|
+
on = Date.strptime(date, "%B %Y").strftime "%Y-%m"
|
206
|
+
[RelatonBib::BibliographicDate.new(type: "published", on: on)]
|
207
|
+
end
|
208
|
+
|
209
|
+
def fetch_mem_title
|
210
|
+
year = @hit.at("div[1]//p").text
|
211
|
+
content = "\"Memento #{year}\" for year #{year}"
|
212
|
+
[{ content: content, language: "en", script: "Latn" }]
|
213
|
+
end
|
214
|
+
end
|
215
|
+
end
|
@@ -3,28 +3,74 @@
|
|
3
3
|
module RelatonEcma
|
4
4
|
# IETF bibliography module
|
5
5
|
module EcmaBibliography
|
6
|
+
ENDPOINT = "https://raw.githubusercontent.com/relaton/relaton-data-ecma/master/"
|
7
|
+
|
6
8
|
class << self
|
7
|
-
#
|
8
|
-
#
|
9
|
-
|
10
|
-
|
9
|
+
#
|
10
|
+
# Search for a reference on the IETF website.
|
11
|
+
#
|
12
|
+
# @param ref [String] the ECMA standard reference to look up (e..g "ECMA-6")
|
13
|
+
#
|
14
|
+
# @return [Array<Hash>]
|
15
|
+
#
|
16
|
+
def search(ref)
|
17
|
+
refparts = parse_ref ref
|
18
|
+
return false unless refparts
|
19
|
+
|
20
|
+
index = Relaton::Index.find_or_create :ECMA, url: "#{ENDPOINT}index.zip"
|
21
|
+
index.search { |row| match_ref refparts, row }
|
22
|
+
end
|
23
|
+
|
24
|
+
def parse_ref(ref)
|
25
|
+
%r{^
|
26
|
+
(?<id>ECMA(?:[\d-]+|\s\w+/\d+))
|
27
|
+
(?:\sed(?<ed>[\d.]+))?
|
28
|
+
(?:\svol(?<vol>\d+))?
|
29
|
+
}x.match ref
|
30
|
+
end
|
31
|
+
|
32
|
+
def match_ref(refparts, row)
|
33
|
+
row[:id][:id] == refparts[:id] &&
|
34
|
+
(refparts[:ed].nil? || row[:id][:ed] == refparts[:ed]) &&
|
35
|
+
(refparts[:vol].nil? || row[:id][:vol] == refparts[:vol])
|
11
36
|
end
|
12
37
|
|
13
38
|
# @param code [String] the ECMA standard Code to look up (e..g "ECMA-6")
|
14
39
|
# @param year [String] not used
|
15
40
|
# @param opts [Hash] not used
|
16
|
-
# @return [
|
41
|
+
# @return [RelatonEcma::BibliographicItem] Relaton of reference
|
17
42
|
def get(code, _year = nil, _opts = {})
|
18
43
|
warn "[relaton-ecma] (\"#{code}\") fetching..."
|
19
|
-
result =
|
44
|
+
result = fetch_doc(code)
|
20
45
|
if result
|
21
46
|
warn "[relaton-ecma] (\"#{code}\") found #{result.docidentifier.first.id}"
|
47
|
+
# item
|
22
48
|
else
|
23
|
-
warn "[relaton-ecma] WARNING no match found online for #{code}. "\
|
49
|
+
warn "[relaton-ecma] WARNING no match found online for #{code}. " \
|
24
50
|
"The code must be exactly like it is on the standards website."
|
25
51
|
end
|
26
52
|
result
|
27
53
|
end
|
54
|
+
|
55
|
+
def compare_edition_volume(aaa, bbb)
|
56
|
+
comp = bbb[:id][:ed] <=> aaa[:id][:ed]
|
57
|
+
comp.zero? ? aaa[:id][:vol] <=> bbb[:id][:vol] : comp
|
58
|
+
end
|
59
|
+
|
60
|
+
def fetch_doc(code) # rubocop:disable Metrics/AbcSize
|
61
|
+
row = search(code).min { |a, b| compare_edition_volume a, b }
|
62
|
+
return unless row
|
63
|
+
|
64
|
+
url = "#{ENDPOINT}#{row[:file]}"
|
65
|
+
doc = OpenURI.open_uri url
|
66
|
+
hash = YAML.safe_load doc
|
67
|
+
hash["fetched"] = Date.today.to_s
|
68
|
+
BibliographicItem.from_hash hash
|
69
|
+
rescue OpenURI::HTTPError => e
|
70
|
+
return if e.io.status.first == "404"
|
71
|
+
|
72
|
+
raise RelatonBib::RequestError, "No document found for #{code} reference. #{e.message}"
|
73
|
+
end
|
28
74
|
end
|
29
75
|
end
|
30
76
|
end
|