relaton-ecma 1.14.0 → 1.14.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2bfb56317935db58c7697daf05caf958a85995fa3d66533034a18cdeb671a178
4
- data.tar.gz: 63d485105638a55c444ceecf185296b159308ca54981f8564cb755422177f79e
3
+ metadata.gz: ccb93a32e1674146e6e2465d8a264f9322f12616be106f921f6176412ef1e22d
4
+ data.tar.gz: 163267a6fbffc14dc71eaf072e29856cd899a383055ea21f2126d7cd12675128
5
5
  SHA512:
6
- metadata.gz: 1b44febabc989d7c41f0561234c5715d83b0e393fa12f7b48ccfd749b20f39a8ffe334a27cef7eb875c4c14d08abe4807dfb098018b31cbb911c8f0b3a487e21
7
- data.tar.gz: 878ee9903834c5d832f5a23ffd09a43c6343a990ab91756e7a874866d7a505374e6e922e3ba6c7fd007b848238c6d771072826191d8cb0e1f7c11eb02d3a934c
6
+ metadata.gz: 56b043e9a77b57410600001a6cfbde0fe24d9b723d3d2911013e3ce933f3fe5c9297d7032e3725efbf150c77b3938cf59904e40cb63a9a3f9eb96a50e0b402f8
7
+ data.tar.gz: de9fce557065d77645e09d18a41f262b01558cfeb844e3007b46551936ea66261a3a37bc37afe8c6b6ecdbc431d93b0411e98862b93a69644e3d41ab3433f76e
@@ -5,6 +5,7 @@ name: rake
5
5
  on:
6
6
  push:
7
7
  branches: [ master, main ]
8
+ tags: [ v* ]
8
9
  pull_request:
9
10
 
10
11
  jobs:
@@ -10,8 +10,9 @@ on:
10
10
  Next release version. Possible values: x.y.z, major, minor, patch or pre|rc|etc
11
11
  required: true
12
12
  default: 'skip'
13
- push:
14
- tags: [ v* ]
13
+ repository_dispatch:
14
+ types: [ do-release ]
15
+
15
16
 
16
17
  jobs:
17
18
  release:
data/Gemfile CHANGED
@@ -5,3 +5,9 @@ gemspec
5
5
 
6
6
  gem "rake", "~> 13.0"
7
7
  gem "rspec", "~> 3.0"
8
+
9
+ gem "pry-byebug"
10
+ gem "ruby-jing"
11
+ gem "simplecov"
12
+ gem "vcr"
13
+ gem "webmock"
data/README.adoc CHANGED
@@ -29,25 +29,67 @@ Or install it yourself as:
29
29
 
30
30
  == Usage
31
31
 
32
- === Search document
32
+ === Fetch documents
33
+
34
+ Documents can be fetched by reference. The structure of the reference depends on the type of the document. There are three types of documents:
35
+
36
+ - ECMA standards
37
+ - ECMA technical reports
38
+ - ECMA mementos
39
+
40
+ ECMA standards have the following reference structure: `ECMA-{NUMBER}[ ed{EDITION}][ vol{VOLUME}]`. Where: `NUMBER` is a number of the standard, `EDITION` is an edition of the standard, and `VOLUME` is a volume of the standard. The `EDITION` and `VOLUME` are optional. If `EDITION` is not specified, the latest edition of the standard will be fetched. If `VOLUME` is not specified, the first volume of the standard will be fetched. +
41
+
42
+ ECMA technical reports have the following reference structure: `ECMA TR/{NUMBER}[ ed{EDITION}]`. Where: `NUMBER` is a number of the technical report, and `EDITION` is an edition of the technical report. The `EDITION` is optional. If `EDITION` is not specified, the latest edition of the technical report will be fetched. +
43
+
44
+ ECMA mementos have the following reference structure: `ECMA MEM/{YEAR}`. Where: `YEAR` is an year of the memento.
33
45
 
34
46
  [source,ruby]
35
47
  ----
36
48
  require 'relaton_ecma'
37
49
  => true
38
50
 
51
+ # fetch ECMA standard
39
52
  item = RelatonEcma::EcmaBibliography.get 'ECMA-6'
40
53
  [relaton-ecma] ("ECMA-6") fetching...
41
54
  [relaton-ecma] ("ECMA-6") found ECMA-6
42
55
  #<RelatonEcma::BibliographicItem:0x00007fc645b11c10
43
56
  ...
44
57
 
58
+ # fetch ECMA standard with edition and volume
59
+ RelatonEcma::EcmaBibliography.get "ECMA-269 ed3 vol2"
60
+ [relaton-ecma] ("ECMA-269 ed3 vol2") fetching...
61
+ [relaton-ecma] ("ECMA-269 ed3 vol2") found ECMA-269
62
+ => #<RelatonEcma::BibliographicItem:0x0000000106ac8210
63
+ ...
64
+
65
+ # fetch the last edition of ECMA standard
66
+ bib = RelatonEcma::EcmaBibliography.get "ECMA-269"
67
+ [relaton-ecma] ("ECMA-269") fetching...
68
+ [relaton-ecma] ("ECMA-269") found ECMA-269
69
+ => #<RelatonEcma::BibliographicItem:0x000000010a408480
70
+ ...
71
+
72
+ bib.edition.content
73
+ => "9"
74
+
75
+ # fetch the first volume of ECMA standard
76
+ bib = RelatonEcma::EcmaBibliography.get "ECMA-269 ed3"
77
+ [relaton-ecma] ("ECMA-269 ed3") fetching...
78
+ [relaton-ecma] ("ECMA-269 ed3") found ECMA-269
79
+ => #<RelatonEcma::BibliographicItem:0x000000010a3ed0e0
80
+ ...
81
+
82
+ bib.extent.first.reference_from
83
+ => "1"
84
+
85
+ # fetch ECMA technical report
45
86
  RelatonEcma::EcmaBibliography.get 'ECMA TR/18'
46
87
  [relaton-ecma] ("ECMA TR/18") fetching...
47
88
  [relaton-ecma] ("ECMA TR/18") found ECMA TR/18
48
89
  => #<RelatonEcma::BibliographicItem:0x00007fc645c00cc0
49
90
  ...
50
91
 
92
+ # fetch ECMA memento
51
93
  RelatonEcma::EcmaBibliography.get "ECMA MEM/2021"
52
94
  [relaton-ecma] ("ECMA MEM/2021") fetching...
53
95
  [relaton-ecma] ("ECMA MEM/2021") found ECMA MEM/2021
@@ -113,6 +155,25 @@ item = RelatonEcma::XMLParser.from_xml File.read("spec/fixtures/bibdata.xml")
113
155
  ...
114
156
  ----
115
157
 
158
+ === Fetch data
159
+
160
+ This gem uses a https://github.com/relaton/relaton-data-ecma[ecma-standards] prefetched dataset as a data source. The dataset contains documents from ECMA https://www.ecma-international.org/publications-and-standards/standards/[Standards], https://www.ecma-international.org/publications-and-standards/technical-reports/[Technical Reports], and https://www.ecma-international.org/publications-and-standards/mementos/[Mementos] pages.
161
+
162
+ The method `RelatonEcma::DataFetcher.new(output: "data", format: "yaml").fetch` fetches all the documents from the pages and saves them to the `./data` folder in YAML format.
163
+ Arguments:
164
+
165
+ - `output` - folder to save documents (default './data').
166
+ - `format` - the format in which the documents are saved. Possible formats are: `yaml`, `xml`, `bibxxml` (default `yaml`).
167
+
168
+ [source,ruby]
169
+ ----
170
+ RelatonEcma::DataFetcher.new.fetch
171
+ Started at: 2022-06-23 09:36:55 +0200
172
+ Stopped at: 2022-06-23 09:36:58 +0200
173
+ Done in: 752 sec.
174
+ => nil
175
+ ----
176
+
116
177
  == Development
117
178
 
118
179
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -522,7 +522,6 @@
522
522
  <value>tip</value>
523
523
  <value>important</value>
524
524
  <value>caution</value>
525
- <value>statement</value>
526
525
  </choice>
527
526
  </define>
528
527
  <define name="figure">
data/grammars/biblio.rng CHANGED
@@ -216,6 +216,9 @@
216
216
  <optional>
217
217
  <ref name="fullname"/>
218
218
  </optional>
219
+ <zeroOrMore>
220
+ <ref name="credential"/>
221
+ </zeroOrMore>
219
222
  <zeroOrMore>
220
223
  <ref name="affiliation"/>
221
224
  </zeroOrMore>
@@ -232,6 +235,11 @@
232
235
  <ref name="FullNameType"/>
233
236
  </element>
234
237
  </define>
238
+ <define name="credential">
239
+ <element name="credential">
240
+ <text/>
241
+ </element>
242
+ </define>
235
243
  <define name="FullNameType">
236
244
  <choice>
237
245
  <group>
@@ -305,7 +313,9 @@
305
313
  <zeroOrMore>
306
314
  <ref name="affiliationdescription"/>
307
315
  </zeroOrMore>
308
- <ref name="organization"/>
316
+ <optional>
317
+ <ref name="organization"/>
318
+ </optional>
309
319
  </element>
310
320
  </define>
311
321
  <define name="affiliationname">
@@ -1316,7 +1326,7 @@
1316
1326
  <value>commentaryOf</value>
1317
1327
  <value>hasCommentary</value>
1318
1328
  <value>related</value>
1319
- <value>complements</value>
1329
+ <value>hasComplement</value>
1320
1330
  <value>complementOf</value>
1321
1331
  <value>obsoletes</value>
1322
1332
  <value>obsoletedBy</value>
@@ -0,0 +1,97 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "English"
4
+ require "mechanize"
5
+ require "relaton_ecma"
6
+
7
+ module RelatonEcma
8
+ class DataFetcher
9
+ URL = "https://www.ecma-international.org/publications-and-standards/"
10
+
11
+ # @param [String] :output directory to output documents
12
+ # @param [String] :format output format (xml, yaml, bibxml)
13
+ def initialize(output: "data", format: "yaml")
14
+ @output = output
15
+ @format = format
16
+ @ext = format.sub(/^bib/, "")
17
+ @files = []
18
+ @index = Relaton::Index.find_or_create :ECMA
19
+ @agent = Mechanize.new
20
+ @agent.user_agent_alias = Mechanize::AGENT_ALIASES.keys[rand(21)]
21
+ end
22
+
23
+ # @param bib [RelatonItu::ItuBibliographicItem]
24
+ def write_file(bib) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
25
+ id = bib.docidentifier[0].id.gsub(%r{[/\s]}, "_")
26
+ id += "-#{bib.edition.content.gsub('.', '_')}" if bib.edition
27
+ extent = bib.extent.detect { |e| e.type == "volume" }
28
+ id += "-#{extent.reference_from}" if extent
29
+ file = "#{@output}/#{id}.#{@ext}"
30
+ if @files.include? file
31
+ warn "Duplicate file #{file}"
32
+ else
33
+ @files << file
34
+ File.write file, render_doc(bib), encoding: "UTF-8"
35
+ @index.add_or_update index_id(bib), file
36
+ end
37
+ end
38
+
39
+ def index_id(bib)
40
+ { id: bib.docidentifier[0].id }.tap do |i|
41
+ i[:ed] = bib.edition.content if bib.edition
42
+ extent = bib.extent.detect { |e| e.type == "volume" }
43
+ i[:vol] = extent.reference_from if extent
44
+ end
45
+ end
46
+
47
+ def render_doc(bib)
48
+ case @format
49
+ when "yaml" then bib.to_hash.to_yaml
50
+ when "xml" then bib.to_xml bibdata: true
51
+ when "bibxml" then bib.to_bibxml
52
+ end
53
+ end
54
+
55
+ # @param hit [Nokogiri::XML::Element]
56
+ def parse_page(hit) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
57
+ DataParser.new(hit).parse.each { |item| write_file item }
58
+ end
59
+
60
+ # @param type [String]
61
+ def html_index(type) # rubocop:disable Metrics/MethodLength
62
+ result = @agent.get "#{URL}#{type}/"
63
+ # @last_call_time = Time.now
64
+ result.xpath(
65
+ "//li/span[1]/a",
66
+ "//div[contains(@class, 'entry-content-wrapper')][.//a[.='Download']]",
67
+ ).each do |hit|
68
+ # workers << hit
69
+ parse_page(hit)
70
+ rescue StandardError => e
71
+ warn e.message
72
+ warn e.backtrace
73
+ end
74
+ end
75
+
76
+ #
77
+ # Fetch data from Ecma website.
78
+ #
79
+ # @return [void]
80
+ #
81
+ def fetch
82
+ t1 = Time.now
83
+ puts "Started at: #{t1}"
84
+
85
+ FileUtils.mkdir_p @output
86
+
87
+ html_index "standards"
88
+ html_index "technical-reports"
89
+ html_index "mementos"
90
+ @index.save
91
+
92
+ t2 = Time.now
93
+ puts "Stopped at: #{t2}"
94
+ puts "Done in: #{(t2 - t1).round} sec."
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,215 @@
1
+ module RelatonEcma
2
+ class DataParser
3
+ MATTRS = %i[docid title date link].freeze
4
+ ATTRS = MATTRS + %i[abstract relation edition].freeze
5
+
6
+ #
7
+ # Initialize parser
8
+ #
9
+ # @param [Nokogiri::XML::Element] hit document hit
10
+ #
11
+ def initialize(hit)
12
+ @hit = hit
13
+ @bib = {
14
+ type: "standard", language: ["en"], script: ["Latn"], place: ["Geneva"], doctype: "document"
15
+ }
16
+ @agent = Mechanize.new
17
+ end
18
+
19
+ def parse # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
20
+ if @hit[:href]
21
+ @agent.user_agent_alias = Mechanize::AGENT_ALIASES.keys[rand(21)]
22
+ @doc = get_page @hit[:href]
23
+ ATTRS.each { |a| @bib[a] = send "fetch_#{a}" }
24
+ else
25
+ MATTRS.each { |a| @bib[a] = send "fetch_mem_#{a}" }
26
+ end
27
+ @bib[:contributor] = contributor
28
+ items = [BibliographicItem.new(**@bib)]
29
+ items + parse_editions
30
+ end
31
+
32
+ #
33
+ # Get page with retries
34
+ #
35
+ # @param [String] url url to fetch
36
+ #
37
+ # @return [Mechanize::Page] document
38
+ #
39
+ def get_page(url)
40
+ 3.times do |n|
41
+ sleep n
42
+ doc = @agent.get url
43
+ return doc
44
+ rescue StandardError => e
45
+ warn e.message
46
+ end
47
+ end
48
+
49
+ #
50
+ # Parse editions
51
+ #
52
+ # @param [Mechanize::Page] doc document
53
+ # @param [Hash] bib bibliographic item the last edition
54
+ #
55
+ # @return [void]
56
+ #
57
+ def parse_editions # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
58
+ return [] unless @doc
59
+
60
+ docid = @bib[:docid]
61
+ @doc.xpath('//div[@id="main"]/div[1]/div/main/article/div/div/standard/div/ul/li').map do |hit|
62
+ id, ed, @bib[:date], vol = edition_id_parts hit.at("./span", "./a").text
63
+ @bib[:link] = edition_link(hit) + edition_translation_link(ed)
64
+ next if ed.nil? || ed.empty?
65
+
66
+ @bib[:docid] = id.nil? || id.empty? ? docid : fetch_docid(id)
67
+ @bib[:edition] = RelatonBib::Edition.new(content: ed)
68
+ @bib[:extent] = vol && [RelatonBib::Locality.new("volume", vol)]
69
+ BibliographicItem.new(**@bib)
70
+ end.compact
71
+ end
72
+
73
+ def edition_link(hit)
74
+ { "src" => hit.at("./a"), "pdf" => hit.at("./span/a") }.map do |type, a|
75
+ RelatonBib::TypedUri.new(type: type, content: a[:href]) if a
76
+ end.compact
77
+ end
78
+
79
+ #
80
+ # Parse edition and date
81
+ #
82
+ # @param [String] text identifier text
83
+ #
84
+ # @return [Array<String,nil,Array<RelatonBib::BibliographicDate>>] edition and date
85
+ #
86
+ def edition_id_parts(text) # rubocop:disable Metrics/MethodLength
87
+ %r{^
88
+ (?<id>\w+(?:[\d-]+|\sTR/\d+)),?\s
89
+ (?:Volume\s(?<vol>[\d.]+),?\s)?
90
+ (?<ed>[\d.]+)(?:st|nd|rd|th)?\sedition
91
+ (?:[,.]\s(?<dt>\w+\s\d+))?
92
+ }x =~ text
93
+ date = [dt].compact.map do |d|
94
+ on = Date.strptime(d, "%B %Y").strftime("%Y-%m")
95
+ RelatonBib::BibliographicDate.new(type: "published", on: on)
96
+ end
97
+ [id, ed, date, vol]
98
+ end
99
+
100
+ # @return [Array<RelatonBib::DocumentIdentifier>]
101
+ def fetch_docid(id = nil)
102
+ id ||= @hit.text
103
+ [RelatonBib::DocumentIdentifier.new(type: "ECMA", id: id, primary: true)]
104
+ end
105
+
106
+ # @return [Array<RelatonBib::TypedUri>]
107
+ def fetch_link # rubocop:disable Metrics/AbcSize
108
+ link = []
109
+ link << RelatonBib::TypedUri.new(type: "src", content: @hit[:href]) if @hit[:href]
110
+ ref = @doc.at('//div[@class="ecma-item-content-wrapper"]/span/a',
111
+ '//div[@class="ecma-item-content-wrapper"]/a')
112
+ link << RelatonBib::TypedUri.new(type: "pdf", content: ref[:href]) if ref
113
+ link + edition_translation_link(@bib[:edition]&.content)
114
+ end
115
+
116
+ def fetch_mem_link
117
+ @hit.xpath("./div/section/div/p/a").map do |a|
118
+ RelatonBib::TypedUri.new(type: "pdf", content: a[:href])
119
+ end
120
+ end
121
+
122
+ def edition_translation_link(edition)
123
+ translation_link.select { |l| l[:ed] == edition }.map { |l| l[:link] }
124
+ end
125
+
126
+ def translation_link
127
+ return [] unless @doc
128
+
129
+ @translation_link ||= @doc.xpath("//main/article/div/div/standard/div[2]/ul/li").map do |l|
130
+ a = l.at("span/a")
131
+ id = l.at("span").text
132
+ %r{\w+[\d-]+,\s(?<lang>\w+)\sversion,\s(?<ed>[\d.]+)(?:st|nd|rd|th)\sedition} =~ id
133
+ case lang
134
+ when "Japanese"
135
+ { ed: ed, link: RelatonBib::TypedUri.new(type: "pdf", language: "ja", script: "Jpan", content: a[:href]) }
136
+ end
137
+ end.compact
138
+ end
139
+
140
+ # @return [Array<Hash>]
141
+ def fetch_title
142
+ @doc.xpath('//p[@class="ecma-item-short-description"]').map do |t|
143
+ { content: t.text.strip, language: "en", script: "Latn" }
144
+ end
145
+ end
146
+
147
+ # @return [Array<RelatonBib::FormattedString>]
148
+ def fetch_abstract
149
+ content = @doc.xpath('//div[@class="ecma-item-content"]/p').map do |a|
150
+ a.text.strip.squeeze(" ").gsub(/\r\n/, "")
151
+ end.join "\n"
152
+ return [] if content.empty?
153
+
154
+ [RelatonBib::FormattedString.new(content: content, language: "en", script: "Latn")]
155
+ end
156
+
157
+ # @return [Array<RelatonBib::BibliographicDate>]
158
+ def fetch_date
159
+ @doc.xpath('//p[@class="ecma-item-edition"]').map do |d|
160
+ date = d.text.split(", ").last
161
+ RelatonBib::BibliographicDate.new type: "published", on: date
162
+ end
163
+ end
164
+
165
+ # @return [Array<Hash>]
166
+ def fetch_relation # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity
167
+ @doc.xpath("//ul[@class='ecma-item-archives']/li").map do |rel|
168
+ ref, ed, date, vol = edition_id_parts rel.at("span").text
169
+ next if ed.nil? || ed.empty?
170
+
171
+ fref = RelatonBib::FormattedRef.new content: ref, language: "en", script: "Latn"
172
+ docid = RelatonBib::DocumentIdentifier.new(type: "ECMA", id: ref, primary: true)
173
+ link = rel.xpath("span/a").map { |l| RelatonBib::TypedUri.new type: "pdf", content: l[:href] }
174
+ edition = RelatonBib::Edition.new content: ed
175
+ extent = vol && [RelatonBib::Locality.new("volume", vol)]
176
+ bibitem = BibliographicItem.new(
177
+ docid: [docid], formattedref: fref, date: date, edition: edition,
178
+ link: link, extent: extent
179
+ )
180
+ { type: "updates", bibitem: bibitem }
181
+ end.compact
182
+ end
183
+
184
+ #
185
+ # @return [RelatonBib::Edition, nil]
186
+ #
187
+ def fetch_edition
188
+ cnt = @doc.at('//p[@class="ecma-item-edition"]')&.text&.match(/^\d+(?=(?:st|nd|th|rd))/)&.to_s
189
+ RelatonBib::Edition.new(content: cnt) if cnt && !cnt.empty?
190
+ end
191
+
192
+ def contributor
193
+ org = RelatonBib::Organization.new name: "Ecma International"
194
+ [{ entity: org, role: [{ type: "publisher" }] }]
195
+ end
196
+
197
+ # @return [Array<RelatonBib::DocumentIdentifier>]
198
+ def fetch_mem_docid
199
+ code = "ECMA MEM/#{@hit.at('div[1]//p').text}"
200
+ fetch_docid code
201
+ end
202
+
203
+ def fetch_mem_date
204
+ date = @hit.at("div[2]//p").text
205
+ on = Date.strptime(date, "%B %Y").strftime "%Y-%m"
206
+ [RelatonBib::BibliographicDate.new(type: "published", on: on)]
207
+ end
208
+
209
+ def fetch_mem_title
210
+ year = @hit.at("div[1]//p").text
211
+ content = "\"Memento #{year}\" for year #{year}"
212
+ [{ content: content, language: "en", script: "Latn" }]
213
+ end
214
+ end
215
+ end
@@ -3,11 +3,36 @@
3
3
  module RelatonEcma
4
4
  # IETF bibliography module
5
5
  module EcmaBibliography
6
+ ENDPOINT = "https://raw.githubusercontent.com/relaton/relaton-data-ecma/master/"
7
+
6
8
  class << self
7
- # @param code [String] the ECMA standard Code to look up (e..g "ECMA-6")
8
- # @return [RelatonBib::BibliographicEcma]
9
- def search(code)
10
- Scrapper.scrape_page code
9
+ #
10
+ # Search for a reference on the IETF website.
11
+ #
12
+ # @param ref [String] the ECMA standard reference to look up (e..g "ECMA-6")
13
+ #
14
+ # @return [Array<Hash>]
15
+ #
16
+ def search(ref)
17
+ refparts = parse_ref ref
18
+ return false unless refparts
19
+
20
+ index = Relaton::Index.find_or_create :ECMA, url: "#{ENDPOINT}index.zip"
21
+ index.search { |row| match_ref refparts, row }
22
+ end
23
+
24
+ def parse_ref(ref)
25
+ %r{^
26
+ (?<id>ECMA(?:[\d-]+|\s\w+/\d+))
27
+ (?:\sed(?<ed>[\d.]+))?
28
+ (?:\svol(?<vol>\d+))?
29
+ }x.match ref
30
+ end
31
+
32
+ def match_ref(refparts, row)
33
+ row[:id][:id] == refparts[:id] &&
34
+ (refparts[:ed].nil? || row[:id][:ed] == refparts[:ed]) &&
35
+ (refparts[:vol].nil? || row[:id][:vol] == refparts[:vol])
11
36
  end
12
37
 
13
38
  # @param code [String] the ECMA standard Code to look up (e..g "ECMA-6")
@@ -16,15 +41,36 @@ module RelatonEcma
16
41
  # @return [RelatonEcma::BibliographicItem] Relaton of reference
17
42
  def get(code, _year = nil, _opts = {})
18
43
  warn "[relaton-ecma] (\"#{code}\") fetching..."
19
- result = search code
44
+ result = fetch_doc(code)
20
45
  if result
21
46
  warn "[relaton-ecma] (\"#{code}\") found #{result.docidentifier.first.id}"
47
+ # item
22
48
  else
23
- warn "[relaton-ecma] WARNING no match found online for #{code}. "\
49
+ warn "[relaton-ecma] WARNING no match found online for #{code}. " \
24
50
  "The code must be exactly like it is on the standards website."
25
51
  end
26
52
  result
27
53
  end
54
+
55
+ def compare_edition_volume(aaa, bbb)
56
+ comp = bbb[:id][:ed] <=> aaa[:id][:ed]
57
+ comp.zero? ? aaa[:id][:vol] <=> bbb[:id][:vol] : comp
58
+ end
59
+
60
+ def fetch_doc(code) # rubocop:disable Metrics/AbcSize
61
+ row = search(code).min { |a, b| compare_edition_volume a, b }
62
+ return unless row
63
+
64
+ url = "#{ENDPOINT}#{row[:file]}"
65
+ doc = OpenURI.open_uri url
66
+ hash = YAML.safe_load doc
67
+ hash["fetched"] = Date.today.to_s
68
+ BibliographicItem.from_hash hash
69
+ rescue OpenURI::HTTPError => e
70
+ return if e.io.status.first == "404"
71
+
72
+ raise RelatonBib::RequestError, "No document found for #{code} reference. #{e.message}"
73
+ end
28
74
  end
29
75
  end
30
76
  end
@@ -7,6 +7,7 @@ module RelatonEcma
7
7
  @prefix = "ECMA"
8
8
  @defaultprefix = /^ECMA(-|\s)/
9
9
  @idtype = "ECMA"
10
+ @datasets = %w[ecma-standards]
10
11
  end
11
12
 
12
13
  # @param code [String]
@@ -17,6 +18,18 @@ module RelatonEcma
17
18
  ::RelatonEcma::EcmaBibliography.get(code, date, opts)
18
19
  end
19
20
 
21
+ #
22
+ # Fetch all the documents from a source
23
+ #
24
+ # @param [String] source source name (iec-harmonized-all, iec-harmonized-latest)
25
+ # @param [Hash] opts
26
+ # @option opts [String] :output directory to output documents
27
+ # @option opts [String] :format output format (xml, yaml, bibxml)
28
+ #
29
+ def fetch_data(_source, opts)
30
+ DataFetcher.new(**opts).fetch
31
+ end
32
+
20
33
  # @param xml [String]
21
34
  # @return [RelatonEcma::BibliographicItem]
22
35
  def from_xml(xml)
@@ -34,5 +47,12 @@ module RelatonEcma
34
47
  def grammar_hash
35
48
  @grammar_hash ||= ::RelatonEcma.grammar_hash
36
49
  end
50
+
51
+ #
52
+ # Remove index file
53
+ #
54
+ def remove_index_file
55
+ Relaton::Index.find_or_create(:ECMA, url: true).remove_file
56
+ end
37
57
  end
38
58
  end
@@ -1,3 +1,3 @@
1
1
  module RelatonEcma
2
- VERSION = "1.14.0".freeze
2
+ VERSION = "1.14.2".freeze
3
3
  end
data/lib/relaton_ecma.rb CHANGED
@@ -1,13 +1,15 @@
1
1
  require "nokogiri"
2
2
  require "open-uri"
3
3
  require "yaml"
4
+ require "relaton/index"
4
5
  require "relaton_bib"
5
6
  require "relaton_ecma/version"
6
7
  require "relaton_ecma/bibliographic_item"
7
8
  require "relaton_ecma/xml_parser"
8
9
  require "relaton_ecma/hash_converter"
9
- require "relaton_ecma/scrapper"
10
10
  require "relaton_ecma/ecma_bibliography"
11
+ require "relaton_ecma/data_fetcher"
12
+ require "relaton_ecma/data_parser"
11
13
 
12
14
  module RelatonEcma
13
15
  # Returns hash of XML reammar
data/relaton_ecma.gemspec CHANGED
@@ -27,15 +27,10 @@ Gem::Specification.new do |spec| # rubocop:disable Metrics/BlockLength
27
27
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
28
28
  spec.require_paths = ["lib"]
29
29
 
30
- # spec.add_development_dependency "debase"
31
30
  spec.add_development_dependency "equivalent-xml", "~> 0.6"
32
- spec.add_development_dependency "pry-byebug"
33
31
  spec.add_development_dependency "rake", "~> 10.0"
34
- # spec.add_development_dependency "ruby-debug-ide"
35
- spec.add_development_dependency "ruby-jing"
36
- spec.add_development_dependency "simplecov"
37
- spec.add_development_dependency "vcr"
38
- spec.add_development_dependency "webmock"
39
32
 
33
+ spec.add_dependency "mechanize", "~> 2.7"
40
34
  spec.add_dependency "relaton-bib", "~> 1.14.0"
35
+ spec.add_dependency "relaton-index", "~> 0.2.0"
41
36
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-ecma
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.14.0
4
+ version: 1.14.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-12-03 00:00:00.000000000 Z
11
+ date: 2023-05-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: equivalent-xml
@@ -24,20 +24,6 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0.6'
27
- - !ruby/object:Gem::Dependency
28
- name: pry-byebug
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - ">="
32
- - !ruby/object:Gem::Version
33
- version: '0'
34
- type: :development
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - ">="
39
- - !ruby/object:Gem::Version
40
- version: '0'
41
27
  - !ruby/object:Gem::Dependency
42
28
  name: rake
43
29
  requirement: !ruby/object:Gem::Requirement
@@ -53,75 +39,47 @@ dependencies:
53
39
  - !ruby/object:Gem::Version
54
40
  version: '10.0'
55
41
  - !ruby/object:Gem::Dependency
56
- name: ruby-jing
42
+ name: mechanize
57
43
  requirement: !ruby/object:Gem::Requirement
58
44
  requirements:
59
- - - ">="
60
- - !ruby/object:Gem::Version
61
- version: '0'
62
- type: :development
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - ">="
67
- - !ruby/object:Gem::Version
68
- version: '0'
69
- - !ruby/object:Gem::Dependency
70
- name: simplecov
71
- requirement: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - ">="
74
- - !ruby/object:Gem::Version
75
- version: '0'
76
- type: :development
77
- prerelease: false
78
- version_requirements: !ruby/object:Gem::Requirement
79
- requirements:
80
- - - ">="
81
- - !ruby/object:Gem::Version
82
- version: '0'
83
- - !ruby/object:Gem::Dependency
84
- name: vcr
85
- requirement: !ruby/object:Gem::Requirement
86
- requirements:
87
- - - ">="
45
+ - - "~>"
88
46
  - !ruby/object:Gem::Version
89
- version: '0'
90
- type: :development
47
+ version: '2.7'
48
+ type: :runtime
91
49
  prerelease: false
92
50
  version_requirements: !ruby/object:Gem::Requirement
93
51
  requirements:
94
- - - ">="
52
+ - - "~>"
95
53
  - !ruby/object:Gem::Version
96
- version: '0'
54
+ version: '2.7'
97
55
  - !ruby/object:Gem::Dependency
98
- name: webmock
56
+ name: relaton-bib
99
57
  requirement: !ruby/object:Gem::Requirement
100
58
  requirements:
101
- - - ">="
59
+ - - "~>"
102
60
  - !ruby/object:Gem::Version
103
- version: '0'
104
- type: :development
61
+ version: 1.14.0
62
+ type: :runtime
105
63
  prerelease: false
106
64
  version_requirements: !ruby/object:Gem::Requirement
107
65
  requirements:
108
- - - ">="
66
+ - - "~>"
109
67
  - !ruby/object:Gem::Version
110
- version: '0'
68
+ version: 1.14.0
111
69
  - !ruby/object:Gem::Dependency
112
- name: relaton-bib
70
+ name: relaton-index
113
71
  requirement: !ruby/object:Gem::Requirement
114
72
  requirements:
115
73
  - - "~>"
116
74
  - !ruby/object:Gem::Version
117
- version: 1.14.0
75
+ version: 0.2.0
118
76
  type: :runtime
119
77
  prerelease: false
120
78
  version_requirements: !ruby/object:Gem::Requirement
121
79
  requirements:
122
80
  - - "~>"
123
81
  - !ruby/object:Gem::Version
124
- version: 1.14.0
82
+ version: 0.2.0
125
83
  description: "RelatonEcma: retrieve ECMA Standards for bibliographic use \nusing the
126
84
  BibliographicItem model.\n"
127
85
  email:
@@ -148,10 +106,11 @@ files:
148
106
  - grammars/relaton-ecma.rng
149
107
  - lib/relaton_ecma.rb
150
108
  - lib/relaton_ecma/bibliographic_item.rb
109
+ - lib/relaton_ecma/data_fetcher.rb
110
+ - lib/relaton_ecma/data_parser.rb
151
111
  - lib/relaton_ecma/ecma_bibliography.rb
152
112
  - lib/relaton_ecma/hash_converter.rb
153
113
  - lib/relaton_ecma/processor.rb
154
- - lib/relaton_ecma/scrapper.rb
155
114
  - lib/relaton_ecma/version.rb
156
115
  - lib/relaton_ecma/xml_parser.rb
157
116
  - relaton_ecma.gemspec
@@ -175,7 +134,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
175
134
  - !ruby/object:Gem::Version
176
135
  version: '0'
177
136
  requirements: []
178
- rubygems_version: 3.2.3
137
+ rubygems_version: 3.4.9
179
138
  signing_key:
180
139
  specification_version: 4
181
140
  summary: 'RelatonIetf: retrieve ECMA Standards for bibliographic use using the BibliographicItem
@@ -1,29 +0,0 @@
1
- module RelatonEcma
2
- module Scrapper
3
- ENDPOINT = "https://raw.githubusercontent.com/relaton/relaton-data-ecma/master/data/".freeze
4
-
5
- class << self
6
- # @param code [String]
7
- # @return [RelatonBib::BibliographicItem]
8
- def scrape_page(code)
9
- url = "#{ENDPOINT}#{code.gsub(/[\/\s]/, '_').upcase}.yaml"
10
- parse_page url
11
- rescue OpenURI::HTTPError => e
12
- return if e.io.status.first == "404"
13
-
14
- raise RelatonBib::RequestError, "No document found for #{code} reference. #{e.message}"
15
- end
16
-
17
- private
18
-
19
- # @param url [String]
20
- # @retrurn [RelatonEcma::BibliographicItem]
21
- def parse_page(url)
22
- doc = OpenURI.open_uri url
23
- hash = YAML.safe_load(doc)
24
- hash["fetched"] = Date.today.to_s
25
- BibliographicItem.from_hash hash
26
- end
27
- end
28
- end
29
- end