relaton-ecma 1.13.0 → 1.14.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +0 -1
- data/.github/workflows/release.yml +22 -0
- data/Gemfile +7 -1
- data/README.adoc +81 -37
- data/grammars/basicdoc.rng +3 -28
- data/grammars/biblio-standoc.rng +164 -0
- data/grammars/biblio.rng +94 -21
- data/grammars/relaton-ecma-compile.rng +11 -0
- data/grammars/relaton-ecma.rng +4 -0
- data/lib/relaton_ecma/bibliographic_item.rb +12 -0
- data/lib/relaton_ecma/data_fetcher.rb +97 -0
- data/lib/relaton_ecma/data_parser.rb +215 -0
- data/lib/relaton_ecma/ecma_bibliography.rb +53 -7
- data/lib/relaton_ecma/hash_converter.rb +11 -0
- data/lib/relaton_ecma/processor.rb +18 -5
- data/lib/relaton_ecma/version.rb +1 -1
- data/lib/relaton_ecma/xml_parser.rb +13 -0
- data/lib/relaton_ecma.rb +6 -1
- data/relaton_ecma.gemspec +3 -8
- metadata +27 -63
- data/grammars/isodoc.rng +0 -2807
- data/grammars/reqt.rng +0 -223
- data/lib/relaton_ecma/scrapper.rb +0 -27
data/grammars/biblio.rng
CHANGED
@@ -33,9 +33,10 @@
|
|
33
33
|
<param name="pattern">([\+\-]?\d{4})((-?)((0[1-9]|1[0-2])((-?)([12]\d|0[1-9]|3[01]))?|W([0-4]\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\d|[12]\d{2}|3([0-5]\d|6[1-6]))))?</param>
|
34
34
|
</data>
|
35
35
|
</define>
|
36
|
-
|
37
|
-
|
38
|
-
|
36
|
+
<!-- start = bibitem -->
|
37
|
+
<define name="BibData">
|
38
|
+
<ref name="BibliographicItem"/>
|
39
|
+
</define>
|
39
40
|
<define name="status">
|
40
41
|
<element name="status">
|
41
42
|
<ref name="stage"/>
|
@@ -73,8 +74,14 @@
|
|
73
74
|
<text/>
|
74
75
|
</element>
|
75
76
|
</define>
|
76
|
-
<define name="
|
77
|
+
<define name="locale">
|
77
78
|
<a:documentation>ISO-639</a:documentation>
|
79
|
+
<element name="locale">
|
80
|
+
<text/>
|
81
|
+
</element>
|
82
|
+
</define>
|
83
|
+
<define name="script">
|
84
|
+
<a:documentation>ISO-3166</a:documentation>
|
78
85
|
<element name="script">
|
79
86
|
<text/>
|
80
87
|
</element>
|
@@ -93,6 +100,9 @@
|
|
93
100
|
<!-- multiple languages and scripts possible: comma delimit them if so -->
|
94
101
|
<attribute name="language"/>
|
95
102
|
</optional>
|
103
|
+
<optional>
|
104
|
+
<attribute name="locale"/>
|
105
|
+
</optional>
|
96
106
|
<optional>
|
97
107
|
<attribute name="script"/>
|
98
108
|
</optional>
|
@@ -136,6 +146,9 @@
|
|
136
146
|
<!-- multiple languages and scripts possible: comma delimit them if so -->
|
137
147
|
<attribute name="language"/>
|
138
148
|
</optional>
|
149
|
+
<optional>
|
150
|
+
<attribute name="locale"/>
|
151
|
+
</optional>
|
139
152
|
<optional>
|
140
153
|
<attribute name="script"/>
|
141
154
|
</optional>
|
@@ -158,27 +171,30 @@
|
|
158
171
|
</define>
|
159
172
|
<define name="contributor">
|
160
173
|
<element name="contributor">
|
161
|
-
<
|
174
|
+
<oneOrMore>
|
162
175
|
<ref name="role"/>
|
163
|
-
</
|
176
|
+
</oneOrMore>
|
164
177
|
<ref name="ContributorInfo"/>
|
165
178
|
</element>
|
166
179
|
</define>
|
167
180
|
<define name="role">
|
168
181
|
<element name="role">
|
169
|
-
<
|
170
|
-
<
|
171
|
-
<
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
</
|
180
|
-
|
181
|
-
|
182
|
+
<attribute name="type">
|
183
|
+
<choice>
|
184
|
+
<value>author</value>
|
185
|
+
<value>performer</value>
|
186
|
+
<value>publisher</value>
|
187
|
+
<value>editor</value>
|
188
|
+
<value>adapter</value>
|
189
|
+
<value>translator</value>
|
190
|
+
<value>distributor</value>
|
191
|
+
<value>realizer</value>
|
192
|
+
<value>owner</value>
|
193
|
+
<value>authorizer</value>
|
194
|
+
<value>enabler</value>
|
195
|
+
<value>subject</value>
|
196
|
+
</choice>
|
197
|
+
</attribute>
|
182
198
|
<zeroOrMore>
|
183
199
|
<ref name="roledescription"/>
|
184
200
|
</zeroOrMore>
|
@@ -200,6 +216,9 @@
|
|
200
216
|
<optional>
|
201
217
|
<ref name="fullname"/>
|
202
218
|
</optional>
|
219
|
+
<zeroOrMore>
|
220
|
+
<ref name="credential"/>
|
221
|
+
</zeroOrMore>
|
203
222
|
<zeroOrMore>
|
204
223
|
<ref name="affiliation"/>
|
205
224
|
</zeroOrMore>
|
@@ -216,6 +235,11 @@
|
|
216
235
|
<ref name="FullNameType"/>
|
217
236
|
</element>
|
218
237
|
</define>
|
238
|
+
<define name="credential">
|
239
|
+
<element name="credential">
|
240
|
+
<text/>
|
241
|
+
</element>
|
242
|
+
</define>
|
219
243
|
<define name="FullNameType">
|
220
244
|
<choice>
|
221
245
|
<group>
|
@@ -289,7 +313,9 @@
|
|
289
313
|
<zeroOrMore>
|
290
314
|
<ref name="affiliationdescription"/>
|
291
315
|
</zeroOrMore>
|
292
|
-
<
|
316
|
+
<optional>
|
317
|
+
<ref name="organization"/>
|
318
|
+
</optional>
|
293
319
|
</element>
|
294
320
|
</define>
|
295
321
|
<define name="affiliationname">
|
@@ -500,6 +526,17 @@
|
|
500
526
|
</define>
|
501
527
|
<define name="localityStack">
|
502
528
|
<element name="localityStack">
|
529
|
+
<optional>
|
530
|
+
<attribute name="connective">
|
531
|
+
<choice>
|
532
|
+
<value>and</value>
|
533
|
+
<value>or</value>
|
534
|
+
<value>from</value>
|
535
|
+
<value>to</value>
|
536
|
+
<value/>
|
537
|
+
</choice>
|
538
|
+
</attribute>
|
539
|
+
</optional>
|
503
540
|
<zeroOrMore>
|
504
541
|
<ref name="locality"/>
|
505
542
|
</zeroOrMore>
|
@@ -512,6 +549,17 @@
|
|
512
549
|
</define>
|
513
550
|
<define name="sourceLocalityStack">
|
514
551
|
<element name="sourceLocalityStack">
|
552
|
+
<optional>
|
553
|
+
<attribute name="connective">
|
554
|
+
<choice>
|
555
|
+
<value>and</value>
|
556
|
+
<value>or</value>
|
557
|
+
<value>from</value>
|
558
|
+
<value>to</value>
|
559
|
+
<value/>
|
560
|
+
</choice>
|
561
|
+
</attribute>
|
562
|
+
</optional>
|
515
563
|
<zeroOrMore>
|
516
564
|
<ref name="sourceLocality"/>
|
517
565
|
</zeroOrMore>
|
@@ -614,6 +662,9 @@
|
|
614
662
|
<ref name="BibItemType"/>
|
615
663
|
</attribute>
|
616
664
|
</optional>
|
665
|
+
<optional>
|
666
|
+
<attribute name="schema-version"/>
|
667
|
+
</optional>
|
617
668
|
<optional>
|
618
669
|
<ref name="fetched"/>
|
619
670
|
</optional>
|
@@ -650,6 +701,9 @@
|
|
650
701
|
<zeroOrMore>
|
651
702
|
<ref name="language"/>
|
652
703
|
</zeroOrMore>
|
704
|
+
<zeroOrMore>
|
705
|
+
<ref name="locale"/>
|
706
|
+
</zeroOrMore>
|
653
707
|
<zeroOrMore>
|
654
708
|
<ref name="script"/>
|
655
709
|
</zeroOrMore>
|
@@ -705,6 +759,9 @@
|
|
705
759
|
<ref name="BibItemType"/>
|
706
760
|
</attribute>
|
707
761
|
</optional>
|
762
|
+
<optional>
|
763
|
+
<attribute name="schema-version"/>
|
764
|
+
</optional>
|
708
765
|
<optional>
|
709
766
|
<ref name="fetched"/>
|
710
767
|
</optional>
|
@@ -741,6 +798,9 @@
|
|
741
798
|
<zeroOrMore>
|
742
799
|
<ref name="language"/>
|
743
800
|
</zeroOrMore>
|
801
|
+
<zeroOrMore>
|
802
|
+
<ref name="locale"/>
|
803
|
+
</zeroOrMore>
|
744
804
|
<zeroOrMore>
|
745
805
|
<ref name="script"/>
|
746
806
|
</zeroOrMore>
|
@@ -854,6 +914,15 @@
|
|
854
914
|
<optional>
|
855
915
|
<attribute name="type"/>
|
856
916
|
</optional>
|
917
|
+
<optional>
|
918
|
+
<attribute name="language"/>
|
919
|
+
</optional>
|
920
|
+
<optional>
|
921
|
+
<attribute name="locale"/>
|
922
|
+
</optional>
|
923
|
+
<optional>
|
924
|
+
<attribute name="script"/>
|
925
|
+
</optional>
|
857
926
|
<data type="anyURI"/>
|
858
927
|
</define>
|
859
928
|
<define name="DateType">
|
@@ -882,6 +951,7 @@
|
|
882
951
|
<value>vote-started</value>
|
883
952
|
<value>vote-ended</value>
|
884
953
|
<value>announced</value>
|
954
|
+
<value>stable-until</value>
|
885
955
|
</choice>
|
886
956
|
</define>
|
887
957
|
<define name="bdate">
|
@@ -930,6 +1000,9 @@
|
|
930
1000
|
<optional>
|
931
1001
|
<attribute name="language"/>
|
932
1002
|
</optional>
|
1003
|
+
<optional>
|
1004
|
+
<attribute name="locale"/>
|
1005
|
+
</optional>
|
933
1006
|
<optional>
|
934
1007
|
<attribute name="script"/>
|
935
1008
|
</optional>
|
@@ -1253,7 +1326,7 @@
|
|
1253
1326
|
<value>commentaryOf</value>
|
1254
1327
|
<value>hasCommentary</value>
|
1255
1328
|
<value>related</value>
|
1256
|
-
<value>
|
1329
|
+
<value>hasComplement</value>
|
1257
1330
|
<value>complementOf</value>
|
1258
1331
|
<value>obsoletes</value>
|
1259
1332
|
<value>obsoletedBy</value>
|
@@ -0,0 +1,11 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<grammar xmlns="http://relaxng.org/ns/structure/1.0">
|
3
|
+
<include href="basicdoc.rng"/>
|
4
|
+
<include href="relaton-ecma.rng"/>
|
5
|
+
<start>
|
6
|
+
<choice>
|
7
|
+
<ref name="bibitem"/>
|
8
|
+
<ref name="bibdata"/>
|
9
|
+
</choice>
|
10
|
+
</start>
|
11
|
+
</grammar>
|
@@ -0,0 +1,97 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "English"
|
4
|
+
require "mechanize"
|
5
|
+
require "relaton_ecma"
|
6
|
+
|
7
|
+
module RelatonEcma
|
8
|
+
class DataFetcher
|
9
|
+
URL = "https://www.ecma-international.org/publications-and-standards/"
|
10
|
+
|
11
|
+
# @param [String] :output directory to output documents
|
12
|
+
# @param [String] :format output format (xml, yaml, bibxml)
|
13
|
+
def initialize(output: "data", format: "yaml")
|
14
|
+
@output = output
|
15
|
+
@format = format
|
16
|
+
@ext = format.sub(/^bib/, "")
|
17
|
+
@files = []
|
18
|
+
@index = Relaton::Index.find_or_create :ECMA
|
19
|
+
@agent = Mechanize.new
|
20
|
+
@agent.user_agent_alias = Mechanize::AGENT_ALIASES.keys[rand(21)]
|
21
|
+
end
|
22
|
+
|
23
|
+
# @param bib [RelatonItu::ItuBibliographicItem]
|
24
|
+
def write_file(bib) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
25
|
+
id = bib.docidentifier[0].id.gsub(%r{[/\s]}, "_")
|
26
|
+
id += "-#{bib.edition.content.gsub('.', '_')}" if bib.edition
|
27
|
+
extent = bib.extent.detect { |e| e.type == "volume" }
|
28
|
+
id += "-#{extent.reference_from}" if extent
|
29
|
+
file = "#{@output}/#{id}.#{@ext}"
|
30
|
+
if @files.include? file
|
31
|
+
warn "Duplicate file #{file}"
|
32
|
+
else
|
33
|
+
@files << file
|
34
|
+
File.write file, render_doc(bib), encoding: "UTF-8"
|
35
|
+
@index.add_or_update index_id(bib), file
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def index_id(bib)
|
40
|
+
{ id: bib.docidentifier[0].id }.tap do |i|
|
41
|
+
i[:ed] = bib.edition.content if bib.edition
|
42
|
+
extent = bib.extent.detect { |e| e.type == "volume" }
|
43
|
+
i[:vol] = extent.reference_from if extent
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def render_doc(bib)
|
48
|
+
case @format
|
49
|
+
when "yaml" then bib.to_hash.to_yaml
|
50
|
+
when "xml" then bib.to_xml bibdata: true
|
51
|
+
when "bibxml" then bib.to_bibxml
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
# @param hit [Nokogiri::XML::Element]
|
56
|
+
def parse_page(hit) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
57
|
+
DataParser.new(hit).parse.each { |item| write_file item }
|
58
|
+
end
|
59
|
+
|
60
|
+
# @param type [String]
|
61
|
+
def html_index(type) # rubocop:disable Metrics/MethodLength
|
62
|
+
result = @agent.get "#{URL}#{type}/"
|
63
|
+
# @last_call_time = Time.now
|
64
|
+
result.xpath(
|
65
|
+
"//li/span[1]/a",
|
66
|
+
"//div[contains(@class, 'entry-content-wrapper')][.//a[.='Download']]",
|
67
|
+
).each do |hit|
|
68
|
+
# workers << hit
|
69
|
+
parse_page(hit)
|
70
|
+
rescue StandardError => e
|
71
|
+
warn e.message
|
72
|
+
warn e.backtrace
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
#
|
77
|
+
# Fetch data from Ecma website.
|
78
|
+
#
|
79
|
+
# @return [void]
|
80
|
+
#
|
81
|
+
def fetch
|
82
|
+
t1 = Time.now
|
83
|
+
puts "Started at: #{t1}"
|
84
|
+
|
85
|
+
FileUtils.mkdir_p @output
|
86
|
+
|
87
|
+
html_index "standards"
|
88
|
+
html_index "technical-reports"
|
89
|
+
html_index "mementos"
|
90
|
+
@index.save
|
91
|
+
|
92
|
+
t2 = Time.now
|
93
|
+
puts "Stopped at: #{t2}"
|
94
|
+
puts "Done in: #{(t2 - t1).round} sec."
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
@@ -0,0 +1,215 @@
|
|
1
|
+
module RelatonEcma
|
2
|
+
class DataParser
|
3
|
+
MATTRS = %i[docid title date link].freeze
|
4
|
+
ATTRS = MATTRS + %i[abstract relation edition].freeze
|
5
|
+
|
6
|
+
#
|
7
|
+
# Initialize parser
|
8
|
+
#
|
9
|
+
# @param [Nokogiri::XML::Element] hit document hit
|
10
|
+
#
|
11
|
+
def initialize(hit)
|
12
|
+
@hit = hit
|
13
|
+
@bib = {
|
14
|
+
type: "standard", language: ["en"], script: ["Latn"], place: ["Geneva"], doctype: "document"
|
15
|
+
}
|
16
|
+
@agent = Mechanize.new
|
17
|
+
end
|
18
|
+
|
19
|
+
def parse # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
20
|
+
if @hit[:href]
|
21
|
+
@agent.user_agent_alias = Mechanize::AGENT_ALIASES.keys[rand(21)]
|
22
|
+
@doc = get_page @hit[:href]
|
23
|
+
ATTRS.each { |a| @bib[a] = send "fetch_#{a}" }
|
24
|
+
else
|
25
|
+
MATTRS.each { |a| @bib[a] = send "fetch_mem_#{a}" }
|
26
|
+
end
|
27
|
+
@bib[:contributor] = contributor
|
28
|
+
items = [BibliographicItem.new(**@bib)]
|
29
|
+
items + parse_editions
|
30
|
+
end
|
31
|
+
|
32
|
+
#
|
33
|
+
# Get page with retries
|
34
|
+
#
|
35
|
+
# @param [String] url url to fetch
|
36
|
+
#
|
37
|
+
# @return [Mechanize::Page] document
|
38
|
+
#
|
39
|
+
def get_page(url)
|
40
|
+
3.times do |n|
|
41
|
+
sleep n
|
42
|
+
doc = @agent.get url
|
43
|
+
return doc
|
44
|
+
rescue StandardError => e
|
45
|
+
warn e.message
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
#
|
50
|
+
# Parse editions
|
51
|
+
#
|
52
|
+
# @param [Mechanize::Page] doc document
|
53
|
+
# @param [Hash] bib bibliographic item the last edition
|
54
|
+
#
|
55
|
+
# @return [void]
|
56
|
+
#
|
57
|
+
def parse_editions # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
58
|
+
return [] unless @doc
|
59
|
+
|
60
|
+
docid = @bib[:docid]
|
61
|
+
@doc.xpath('//div[@id="main"]/div[1]/div/main/article/div/div/standard/div/ul/li').map do |hit|
|
62
|
+
id, ed, @bib[:date], vol = edition_id_parts hit.at("./span", "./a").text
|
63
|
+
@bib[:link] = edition_link(hit) + edition_translation_link(ed)
|
64
|
+
next if ed.nil? || ed.empty?
|
65
|
+
|
66
|
+
@bib[:docid] = id.nil? || id.empty? ? docid : fetch_docid(id)
|
67
|
+
@bib[:edition] = RelatonBib::Edition.new(content: ed)
|
68
|
+
@bib[:extent] = vol && [RelatonBib::Locality.new("volume", vol)]
|
69
|
+
BibliographicItem.new(**@bib)
|
70
|
+
end.compact
|
71
|
+
end
|
72
|
+
|
73
|
+
def edition_link(hit)
|
74
|
+
{ "src" => hit.at("./a"), "pdf" => hit.at("./span/a") }.map do |type, a|
|
75
|
+
RelatonBib::TypedUri.new(type: type, content: a[:href]) if a
|
76
|
+
end.compact
|
77
|
+
end
|
78
|
+
|
79
|
+
#
|
80
|
+
# Parse edition and date
|
81
|
+
#
|
82
|
+
# @param [String] text identifier text
|
83
|
+
#
|
84
|
+
# @return [Array<String,nil,Array<RelatonBib::BibliographicDate>>] edition and date
|
85
|
+
#
|
86
|
+
def edition_id_parts(text) # rubocop:disable Metrics/MethodLength
|
87
|
+
%r{^
|
88
|
+
(?<id>\w+(?:[\d-]+|\sTR/\d+)),?\s
|
89
|
+
(?:Volume\s(?<vol>[\d.]+),?\s)?
|
90
|
+
(?<ed>[\d.]+)(?:st|nd|rd|th)?\sedition
|
91
|
+
(?:[,.]\s(?<dt>\w+\s\d+))?
|
92
|
+
}x =~ text
|
93
|
+
date = [dt].compact.map do |d|
|
94
|
+
on = Date.strptime(d, "%B %Y").strftime("%Y-%m")
|
95
|
+
RelatonBib::BibliographicDate.new(type: "published", on: on)
|
96
|
+
end
|
97
|
+
[id, ed, date, vol]
|
98
|
+
end
|
99
|
+
|
100
|
+
# @return [Array<RelatonBib::DocumentIdentifier>]
|
101
|
+
def fetch_docid(id = nil)
|
102
|
+
id ||= @hit.text
|
103
|
+
[RelatonBib::DocumentIdentifier.new(type: "ECMA", id: id, primary: true)]
|
104
|
+
end
|
105
|
+
|
106
|
+
# @return [Array<RelatonBib::TypedUri>]
|
107
|
+
def fetch_link # rubocop:disable Metrics/AbcSize
|
108
|
+
link = []
|
109
|
+
link << RelatonBib::TypedUri.new(type: "src", content: @hit[:href]) if @hit[:href]
|
110
|
+
ref = @doc.at('//div[@class="ecma-item-content-wrapper"]/span/a',
|
111
|
+
'//div[@class="ecma-item-content-wrapper"]/a')
|
112
|
+
link << RelatonBib::TypedUri.new(type: "pdf", content: ref[:href]) if ref
|
113
|
+
link + edition_translation_link(@bib[:edition]&.content)
|
114
|
+
end
|
115
|
+
|
116
|
+
def fetch_mem_link
|
117
|
+
@hit.xpath("./div/section/div/p/a").map do |a|
|
118
|
+
RelatonBib::TypedUri.new(type: "pdf", content: a[:href])
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
def edition_translation_link(edition)
|
123
|
+
translation_link.select { |l| l[:ed] == edition }.map { |l| l[:link] }
|
124
|
+
end
|
125
|
+
|
126
|
+
def translation_link
|
127
|
+
return [] unless @doc
|
128
|
+
|
129
|
+
@translation_link ||= @doc.xpath("//main/article/div/div/standard/div[2]/ul/li").map do |l|
|
130
|
+
a = l.at("span/a")
|
131
|
+
id = l.at("span").text
|
132
|
+
%r{\w+[\d-]+,\s(?<lang>\w+)\sversion,\s(?<ed>[\d.]+)(?:st|nd|rd|th)\sedition} =~ id
|
133
|
+
case lang
|
134
|
+
when "Japanese"
|
135
|
+
{ ed: ed, link: RelatonBib::TypedUri.new(type: "pdf", language: "ja", script: "Jpan", content: a[:href]) }
|
136
|
+
end
|
137
|
+
end.compact
|
138
|
+
end
|
139
|
+
|
140
|
+
# @return [Array<Hash>]
|
141
|
+
def fetch_title
|
142
|
+
@doc.xpath('//p[@class="ecma-item-short-description"]').map do |t|
|
143
|
+
{ content: t.text.strip, language: "en", script: "Latn" }
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
# @return [Array<RelatonBib::FormattedString>]
|
148
|
+
def fetch_abstract
|
149
|
+
content = @doc.xpath('//div[@class="ecma-item-content"]/p').map do |a|
|
150
|
+
a.text.strip.squeeze(" ").gsub(/\r\n/, "")
|
151
|
+
end.join "\n"
|
152
|
+
return [] if content.empty?
|
153
|
+
|
154
|
+
[RelatonBib::FormattedString.new(content: content, language: "en", script: "Latn")]
|
155
|
+
end
|
156
|
+
|
157
|
+
# @return [Array<RelatonBib::BibliographicDate>]
|
158
|
+
def fetch_date
|
159
|
+
@doc.xpath('//p[@class="ecma-item-edition"]').map do |d|
|
160
|
+
date = d.text.split(", ").last
|
161
|
+
RelatonBib::BibliographicDate.new type: "published", on: date
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
# @return [Array<Hash>]
|
166
|
+
def fetch_relation # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity
|
167
|
+
@doc.xpath("//ul[@class='ecma-item-archives']/li").map do |rel|
|
168
|
+
ref, ed, date, vol = edition_id_parts rel.at("span").text
|
169
|
+
next if ed.nil? || ed.empty?
|
170
|
+
|
171
|
+
fref = RelatonBib::FormattedRef.new content: ref, language: "en", script: "Latn"
|
172
|
+
docid = RelatonBib::DocumentIdentifier.new(type: "ECMA", id: ref, primary: true)
|
173
|
+
link = rel.xpath("span/a").map { |l| RelatonBib::TypedUri.new type: "pdf", content: l[:href] }
|
174
|
+
edition = RelatonBib::Edition.new content: ed
|
175
|
+
extent = vol && [RelatonBib::Locality.new("volume", vol)]
|
176
|
+
bibitem = BibliographicItem.new(
|
177
|
+
docid: [docid], formattedref: fref, date: date, edition: edition,
|
178
|
+
link: link, extent: extent
|
179
|
+
)
|
180
|
+
{ type: "updates", bibitem: bibitem }
|
181
|
+
end.compact
|
182
|
+
end
|
183
|
+
|
184
|
+
#
|
185
|
+
# @return [RelatonBib::Edition, nil]
|
186
|
+
#
|
187
|
+
def fetch_edition
|
188
|
+
cnt = @doc.at('//p[@class="ecma-item-edition"]')&.text&.match(/^\d+(?=(?:st|nd|th|rd))/)&.to_s
|
189
|
+
RelatonBib::Edition.new(content: cnt) if cnt && !cnt.empty?
|
190
|
+
end
|
191
|
+
|
192
|
+
def contributor
|
193
|
+
org = RelatonBib::Organization.new name: "Ecma International"
|
194
|
+
[{ entity: org, role: [{ type: "publisher" }] }]
|
195
|
+
end
|
196
|
+
|
197
|
+
# @return [Array<RelatonBib::DocumentIdentifier>]
|
198
|
+
def fetch_mem_docid
|
199
|
+
code = "ECMA MEM/#{@hit.at('div[1]//p').text}"
|
200
|
+
fetch_docid code
|
201
|
+
end
|
202
|
+
|
203
|
+
def fetch_mem_date
|
204
|
+
date = @hit.at("div[2]//p").text
|
205
|
+
on = Date.strptime(date, "%B %Y").strftime "%Y-%m"
|
206
|
+
[RelatonBib::BibliographicDate.new(type: "published", on: on)]
|
207
|
+
end
|
208
|
+
|
209
|
+
def fetch_mem_title
|
210
|
+
year = @hit.at("div[1]//p").text
|
211
|
+
content = "\"Memento #{year}\" for year #{year}"
|
212
|
+
[{ content: content, language: "en", script: "Latn" }]
|
213
|
+
end
|
214
|
+
end
|
215
|
+
end
|
@@ -3,28 +3,74 @@
|
|
3
3
|
module RelatonEcma
|
4
4
|
# IETF bibliography module
|
5
5
|
module EcmaBibliography
|
6
|
+
ENDPOINT = "https://raw.githubusercontent.com/relaton/relaton-data-ecma/master/"
|
7
|
+
|
6
8
|
class << self
|
7
|
-
#
|
8
|
-
#
|
9
|
-
|
10
|
-
|
9
|
+
#
|
10
|
+
# Search for a reference on the IETF website.
|
11
|
+
#
|
12
|
+
# @param ref [String] the ECMA standard reference to look up (e..g "ECMA-6")
|
13
|
+
#
|
14
|
+
# @return [Array<Hash>]
|
15
|
+
#
|
16
|
+
def search(ref)
|
17
|
+
refparts = parse_ref ref
|
18
|
+
return false unless refparts
|
19
|
+
|
20
|
+
index = Relaton::Index.find_or_create :ECMA, url: "#{ENDPOINT}index.zip"
|
21
|
+
index.search { |row| match_ref refparts, row }
|
22
|
+
end
|
23
|
+
|
24
|
+
def parse_ref(ref)
|
25
|
+
%r{^
|
26
|
+
(?<id>ECMA(?:[\d-]+|\s\w+/\d+))
|
27
|
+
(?:\sed(?<ed>[\d.]+))?
|
28
|
+
(?:\svol(?<vol>\d+))?
|
29
|
+
}x.match ref
|
30
|
+
end
|
31
|
+
|
32
|
+
def match_ref(refparts, row)
|
33
|
+
row[:id][:id] == refparts[:id] &&
|
34
|
+
(refparts[:ed].nil? || row[:id][:ed] == refparts[:ed]) &&
|
35
|
+
(refparts[:vol].nil? || row[:id][:vol] == refparts[:vol])
|
11
36
|
end
|
12
37
|
|
13
38
|
# @param code [String] the ECMA standard Code to look up (e..g "ECMA-6")
|
14
39
|
# @param year [String] not used
|
15
40
|
# @param opts [Hash] not used
|
16
|
-
# @return [
|
41
|
+
# @return [RelatonEcma::BibliographicItem] Relaton of reference
|
17
42
|
def get(code, _year = nil, _opts = {})
|
18
43
|
warn "[relaton-ecma] (\"#{code}\") fetching..."
|
19
|
-
result =
|
44
|
+
result = fetch_doc(code)
|
20
45
|
if result
|
21
46
|
warn "[relaton-ecma] (\"#{code}\") found #{result.docidentifier.first.id}"
|
47
|
+
# item
|
22
48
|
else
|
23
|
-
warn "[relaton-ecma] WARNING no match found online for #{code}. "\
|
49
|
+
warn "[relaton-ecma] WARNING no match found online for #{code}. " \
|
24
50
|
"The code must be exactly like it is on the standards website."
|
25
51
|
end
|
26
52
|
result
|
27
53
|
end
|
54
|
+
|
55
|
+
def compare_edition_volume(aaa, bbb)
|
56
|
+
comp = bbb[:id][:ed] <=> aaa[:id][:ed]
|
57
|
+
comp.zero? ? aaa[:id][:vol] <=> bbb[:id][:vol] : comp
|
58
|
+
end
|
59
|
+
|
60
|
+
def fetch_doc(code) # rubocop:disable Metrics/AbcSize
|
61
|
+
row = search(code).min { |a, b| compare_edition_volume a, b }
|
62
|
+
return unless row
|
63
|
+
|
64
|
+
url = "#{ENDPOINT}#{row[:file]}"
|
65
|
+
doc = OpenURI.open_uri url
|
66
|
+
hash = YAML.safe_load doc
|
67
|
+
hash["fetched"] = Date.today.to_s
|
68
|
+
BibliographicItem.from_hash hash
|
69
|
+
rescue OpenURI::HTTPError => e
|
70
|
+
return if e.io.status.first == "404"
|
71
|
+
|
72
|
+
raise RelatonBib::RequestError, "No document found for #{code} reference. #{e.message}"
|
73
|
+
end
|
28
74
|
end
|
29
75
|
end
|
30
76
|
end
|