relaton-iso 1.20.0 → 2.0.0.pre.alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +1 -1
  3. data/Gemfile +1 -0
  4. data/README.adoc +134 -130
  5. data/bin/console +1 -1
  6. data/grammars/basicdoc.rng +2110 -0
  7. data/grammars/biblio-standoc.rng +287 -0
  8. data/grammars/biblio.rng +2097 -0
  9. data/grammars/relaton-iso-compile.rng +11 -0
  10. data/grammars/relaton-iso.rng +214 -0
  11. data/lib/relaton/iso/bibliography.rb +206 -0
  12. data/lib/relaton/iso/data_fetcher.rb +227 -0
  13. data/lib/relaton/iso/hash_parser_v1.rb +121 -0
  14. data/lib/relaton/iso/hit.rb +62 -0
  15. data/lib/relaton/iso/hit_collection.rb +117 -0
  16. data/lib/relaton/iso/item_data.rb +49 -0
  17. data/lib/relaton/iso/model/bibdata.rb +9 -0
  18. data/lib/relaton/iso/model/bibitem.rb +7 -0
  19. data/lib/relaton/iso/model/contributor.rb +7 -0
  20. data/lib/relaton/iso/model/contributor_info.rb +9 -0
  21. data/lib/relaton/iso/model/docidentifier.rb +128 -0
  22. data/lib/relaton/iso/model/doctype.rb +13 -0
  23. data/lib/relaton/iso/model/ext.rb +47 -0
  24. data/lib/relaton/iso/model/iso_project_group.rb +21 -0
  25. data/lib/relaton/iso/model/item.rb +17 -0
  26. data/lib/relaton/iso/model/item_base.rb +19 -0
  27. data/lib/relaton/iso/model/organization.rb +9 -0
  28. data/lib/relaton/iso/model/project_number.rb +22 -0
  29. data/lib/relaton/iso/model/relation.rb +9 -0
  30. data/lib/relaton/iso/model/stagename.rb +14 -0
  31. data/lib/relaton/iso/model/structured_identifier.rb +31 -0
  32. data/lib/relaton/iso/processor.rb +78 -0
  33. data/lib/relaton/iso/queue.rb +63 -0
  34. data/lib/relaton/iso/scraper.rb +591 -0
  35. data/lib/relaton/iso/util.rb +8 -0
  36. data/lib/relaton/iso/version.rb +7 -0
  37. data/lib/relaton/iso.rb +17 -0
  38. data/relaton_iso.gemspec +9 -7
  39. metadata +76 -46
  40. data/bin/bundle +0 -109
  41. data/bin/byebug +0 -27
  42. data/bin/coderay +0 -27
  43. data/bin/gdb_wrapper +0 -29
  44. data/bin/htmldiff +0 -27
  45. data/bin/httpclient +0 -29
  46. data/bin/ldiff +0 -27
  47. data/bin/nokogiri +0 -27
  48. data/bin/pry +0 -27
  49. data/bin/pubid-nist +0 -27
  50. data/bin/racc +0 -27
  51. data/bin/rackup +0 -29
  52. data/bin/rake +0 -27
  53. data/bin/rubocop +0 -27
  54. data/bin/ruby-parse +0 -27
  55. data/bin/ruby-rewrite +0 -27
  56. data/bin/safe_yaml +0 -29
  57. data/bin/thor +0 -27
  58. data/lib/relaton_iso/data_fetcher.rb +0 -246
  59. data/lib/relaton_iso/document_identifier.rb +0 -46
  60. data/lib/relaton_iso/hash_converter.rb +0 -15
  61. data/lib/relaton_iso/hit.rb +0 -59
  62. data/lib/relaton_iso/hit_collection.rb +0 -100
  63. data/lib/relaton_iso/iso_bibliography.rb +0 -202
  64. data/lib/relaton_iso/processor.rb +0 -67
  65. data/lib/relaton_iso/queue.rb +0 -61
  66. data/lib/relaton_iso/scrapper.rb +0 -553
  67. data/lib/relaton_iso/util.rb +0 -6
  68. data/lib/relaton_iso/version.rb +0 -5
  69. data/lib/relaton_iso.rb +0 -17
@@ -1,553 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module RelatonIso
4
- # Scrapper.
5
- class Scrapper # rubocop:disable Metrics/ModuleLength
6
- DOMAIN = "https://www.iso.org"
7
-
8
- TYPES = {
9
- "TS" => "technical-specification",
10
- "DTS" => "technical-specification",
11
- "TR" => "technical-report",
12
- "DTR" => "technical-report",
13
- "PAS" => "publicly-available-specification",
14
- # "AWI" => "approvedWorkItem",
15
- # "CD" => "committeeDraft",
16
- # "FDIS" => "finalDraftInternationalStandard",
17
- # "NP" => "newProposal",
18
- # "DIS" => "draftInternationalStandard",
19
- # "WD" => "workingDraft",
20
- # "R" => "recommendation",
21
- "Guide" => "guide",
22
- "ISO" => "international-standard",
23
- "IEC" => "international-standard",
24
- "IWA" => "international-workshop-agreement",
25
- }.freeze
26
-
27
- STGABBR = {
28
- "00" => "NWIP",
29
- "10" => "AWI",
30
- "20" => "WD",
31
- "30" => "CD",
32
- "40" => "DIS",
33
- "50" => "FDIS",
34
- "60" => { "00" => "PRF", "60" => "FINAL" },
35
- }.freeze
36
-
37
- PUBLISHERS = {
38
- "IEC" => { name: "International Electrotechnical Commission",
39
- url: "www.iec.ch" },
40
- "ISO" => { name: "International Organization for Standardization",
41
- url: "www.iso.org" },
42
- "IEEE" => { name: "Institute of Electrical and Electronics Engineers",
43
- url: "www.ieee.org" },
44
- "SAE" => { name: "SAE International", url: "www.sae.org" },
45
- "CIE" => { name: " International Commission on Illumination",
46
- url: "cie.co.at" },
47
- "ASME" => { name: "American Society of Mechanical Engineers",
48
- url: "www.asme.org" },
49
- }.freeze
50
-
51
- # extend self
52
-
53
- def initialize(lang, errors)
54
- @lang = lang
55
- @errors = errors
56
- end
57
-
58
- # Parse page.
59
- # @param path [String] page path
60
- # @param lang [String, nil] language
61
- # @param errors [Hash] collection of parsing errors
62
- # @return [RelatonIsoBib::IsoBibliographicItem]
63
- def self.parse_page(path, lang: nil, errors: {})
64
- new(lang, errors).parse(path)
65
- end
66
-
67
- def parse(path) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
68
- @doc, url = get_page path
69
- titles, abstract, langs = fetch_titles_abstract
70
-
71
- RelatonIsoBib::IsoBibliographicItem.new(
72
- docid: fetch_relaton_docids,
73
- docnumber: fetch_docnumber,
74
- edition: edition,
75
- language: langs.map { |l| l[:lang] },
76
- script: langs.map { |l| script(l[:lang]) }.uniq,
77
- title: titles,
78
- doctype: fetch_type,
79
- docstatus: fetch_status,
80
- ics: fetch_ics,
81
- date: fetch_dates,
82
- contributor: fetch_contributors,
83
- editorialgroup: fetch_workgroup,
84
- abstract: abstract,
85
- copyright: fetch_copyright,
86
- link: fetch_link(url),
87
- relation: fetch_relations,
88
- place: ["Geneva"],
89
- structuredidentifier: fetch_structuredidentifier,
90
- )
91
- end
92
-
93
- def id
94
- return @id if defined?(@id)
95
-
96
- did = @doc.at("//h1/span[1]")
97
- @errors[:id] &&= did.nil?
98
- @id = did && did.text.split(" | ").first.strip
99
- end
100
-
101
- def pubid
102
- return @pubid if @pubid
103
-
104
- @pubid = Pubid::Iso::Identifier.parse(id)
105
- @pubid.root.edition ||= edition if @pubid.base
106
- @pubid
107
- rescue StandardError => e
108
- Util.error "Failed to parse pubid from #{id}: #{e.message}"
109
- end
110
-
111
- def edition
112
- return @edition if defined?(@edition)
113
-
114
- ed = @doc.at("//div[div[.='Edition']]/text()[last()]")
115
- @errors[:edition] &&= ed.nil?
116
- @edition = ed && ed.text.match(/\d+$/).to_s
117
- end
118
-
119
- #
120
- # Create document ids.
121
- #
122
- # @return [Array<RelatonBib::DocumentIdentifier>]
123
- #
124
- def fetch_relaton_docids
125
- pubid.stage ||= Pubid::Iso::Identifier.parse_stage(stage_code)
126
- [
127
- DocumentIdentifier.new(id: pubid, type: "ISO", primary: true),
128
- RelatonBib::DocumentIdentifier.new(id: isoref, type: "iso-reference"),
129
- DocumentIdentifier.new(id: pubid, type: "URN"),
130
- ]
131
- end
132
-
133
- #
134
- # Create ISO reference identifier with English language.
135
- #
136
- # @return [String] English reference identifier
137
- #
138
- def isoref
139
- params = pubid.to_h.reject { |k, _| k == :typed_stage }
140
- Pubid::Iso::Identifier.create(language: "en", **params).to_s(format: :ref_num_short)
141
- end
142
-
143
- private
144
-
145
- # Fetch titles and abstracts.
146
- # @return [Array<Array>]
147
- def fetch_titles_abstract # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
148
- titles = RelatonBib::TypedTitleStringCollection.new
149
- abstract = []
150
- langs = languages.each_with_object([]) do |l, s|
151
- # Don't need to get page for en. We already have it.
152
- d = l[:path] ? get_page(l[:path])[0] : @doc
153
- unless d.at("//h5[@class='help-block'][.='недоступно на русском языке']")
154
- s << l
155
- titles += fetch_title(d, l[:lang])
156
-
157
- abstr = parse_abstract(d, l[:lang])
158
- abstract << abstr if abstr
159
- end
160
- end
161
- [titles, abstract, langs]
162
- end
163
-
164
- def parse_abstract(doc, lang)
165
- abstract_content = doc.xpath(
166
- "//div[@itemprop='description']/p|//div[@itemprop='description']/ul/li",
167
- ).map { |a| a.name == "li" ? "- #{a.text}" : a.text }.reject(&:empty?).join("\n")
168
- @errors[:abstract] &&= abstract_content.empty?
169
- return if abstract_content.empty?
170
-
171
- { content: abstract_content, language: lang, script: script(lang), format: "text/plain" }
172
- end
173
-
174
- # Returns available languages.
175
- # @return [Array<Hash>]
176
- def languages
177
- lgs = [{ lang: "en" }]
178
- @doc.css("li#lang-switcher ul li a").each do |lang_link|
179
- lang_path = lang_link.attr("href")
180
- l = lang_path.match(%r{^/(fr)/})
181
- lgs << { lang: l[1], path: lang_path } if l && (!@lang || l[1] != @lang)
182
- end
183
- @errors[:language] &&= lgs.size == 1
184
- lgs
185
- end
186
-
187
- # Get page.
188
- # @param path [String] page's path
189
- # @return [Array<Nokogiri::HTML::Document, String>]
190
- def get_page(path) # rubocop:disable Metrics/MethodLength
191
- try = 0
192
- begin
193
- resp, uri = get_redirection path
194
- doc = try_if_fail resp, uri
195
- [doc, uri.to_s]
196
- rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
197
- EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
198
- Net::ProtocolError, Errno::ETIMEDOUT
199
- try += 1
200
- raise RelatonBib::RequestError, "Could not access #{DOMAIN}#{path}" if try > 3
201
-
202
- sleep 1
203
- retry
204
- end
205
- end
206
-
207
- #
208
- # Get the page from the given path. If the page is redirected, get the
209
- # page from the new path.
210
- #
211
- # @param [String] path path to the page
212
- #
213
- # @return [Array<Net::HTTPOK, URI>] HTTP response and URI
214
- # @raise [RelatonBib::RequestError] if the page is not found
215
- #
216
- def get_redirection(path) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
217
- uri = URI(DOMAIN + path)
218
- try = 0
219
- begin
220
- get_response uri
221
- rescue Errno::EPIPE => e
222
- try += 1
223
- retry if check_try try, uri
224
- raise e
225
- end
226
- end
227
-
228
- def check_try(try, uri)
229
- if try < 3
230
- warn "Timeout fetching #{uri}, retrying..."
231
- sleep 1
232
- true
233
- end
234
- end
235
-
236
- def get_response(uri, try = 0)
237
- raise RelatonBib::RequestError, "#{uri} not found." if try > 3
238
-
239
- resp = Net::HTTP.get_response(uri)
240
- case resp.code
241
- when "200" then [resp, uri]
242
- when "301" then get_redirection(resp["location"])
243
- when "404" then raise RelatonBib::RequestError, "#{uri} not found."
244
- else
245
- sleep (2**try)
246
- get_response uri, try + 1
247
- end
248
- end
249
-
250
- #
251
- # The iso.org site fails to respond sometimes. This method tries to get
252
- # the response again.
253
- #
254
- # @param [Net::HTTPOK] resp HTTP response
255
- # @param [URI::HTTPS] uri URI of the page
256
- #
257
- # @return [Nokogiri::HTML4::Document] document
258
- # @raise [RelatonBib::RequestError] if the page could not be parsed
259
- #
260
- def try_if_fail(resp, uri)
261
- 10.times do
262
- doc = Nokogiri::HTML(resp.body)
263
- # stop trying if page has a document id
264
- return doc if item_ref(doc)
265
-
266
- resp = Net::HTTP.get_response(uri)
267
- end
268
- raise RelatonBib::RequestError, "Could not parse the page #{uri}"
269
- end
270
-
271
- #
272
- # Generate docnumber.
273
- #
274
- # @return [String] docnumber
275
- #
276
- def fetch_docnumber
277
- pubid.to_s.match(/\d+/)&.to_s
278
- end
279
-
280
- #
281
- # Parse structuredidentifier.
282
- #
283
- # @return [RelatonBib::StructuredIdentifier] structured identifier
284
- #
285
- def fetch_structuredidentifier # rubocop:disable Metrics/MethodLength
286
- RelatonIsoBib::StructuredIdentifier.new(
287
- project_number: "#{pubid.root.publisher} #{pubid.root.number}",
288
- part: pubid.root.part&.to_s, # &.sub(/^-/, ""),
289
- type: pubid.root.publisher,
290
- )
291
- end
292
-
293
- #
294
- # Parse ID from the document.
295
- #
296
- # @param [Nokogiri::HTML::Document] doc document to parse
297
- #
298
- # @return [String, nil] ID
299
- #
300
- def item_ref(doc)
301
- ref = doc.at("//main//section/div/div/div//h1/span[1]")
302
- @errors[:reference] &&= ref.nil?
303
- ref&.text&.strip
304
- end
305
-
306
- # Fetch status.
307
- # @return [RelatonBib::DocumentStatus]
308
- def fetch_status
309
- stg, substg = stage_code.split "."
310
- RelatonBib::DocumentStatus.new(stage: stg, substage: substg)
311
- end
312
-
313
- def stage_code
314
- return @stage_code if defined?(@stage_code)
315
-
316
- stc = @doc.at("//ul[@class='dropdown-menu']/li[@class='active']/a/span[@class='stage-code']")
317
- @errors[:stage] &&= stc.nil?
318
- @stage_code = stc&.text
319
- end
320
-
321
- # def stage(stg, substg)
322
- # abbr = STGABBR[stg].is_a?(Hash) ? STGABBR[stg][substg] : STGABBR[stg]
323
- # RelatonBib::DocumentStatus::Stage.new value: stg, abbreviation: abbr
324
- # end
325
-
326
- # Fetch workgroup.
327
- # @param doc [Nokogiri::HTML::Document]
328
- # @return [RelatonIsoBib::EditorialGroup, nil]
329
- def fetch_workgroup # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
330
- wg = @doc.at("//div[contains(., 'Technical Committe')]/following-sibling::span/a")
331
- @errors[:workgroup] &&= wg.nil?
332
- return unless wg
333
-
334
- workgroup = wg.text.split "/"
335
- type = workgroup[1]&.match(/^[A-Z]+/)&.to_s || "TC"
336
- # {
337
- # name: "International Organization for Standardization",
338
- # abbreviation: "ISO",
339
- # url: "www.iso.org",
340
- # }
341
- tc_numb = workgroup[1]&.match(/\d+/)&.to_s&.to_i
342
- tc_name = wg[:title]
343
- tc = RelatonBib::WorkGroup.new(name: tc_name, identifier: wg.text,
344
- type: type, number: tc_numb)
345
- RelatonIsoBib::EditorialGroup.new(technical_committee: [tc])
346
- end
347
-
348
- # Fetch relations.
349
- # @return [Array<Hash>]
350
- def fetch_relations
351
- types = ["Now", "Now under review"]
352
- rels = @doc.xpath(
353
- "//ul[@class='steps']/li", "//div[contains(@class, 'sub-step')]"
354
- ).reduce([]) do |a, r|
355
- type, date = relation_type(r.at("h4", "h5").text.strip)
356
- next a if types.include?(type)
357
-
358
- a + create_relations(r, type, date)
359
- end
360
- @errors[:relation] &&= rels.empty?
361
- rels
362
- end
363
-
364
- #
365
- # Parse relation type and dates.
366
- #
367
- # @param [String] type parsed type
368
- #
369
- # @return [Array<String,Array>] type and dates
370
- #
371
- def relation_type(type)
372
- date = []
373
- t = case type.strip
374
- when "Previously", "Will be replaced by" then "obsoletes"
375
- when /Corrigenda|Amendments|Revised by|Now confirmed|replaced by/
376
- on = @doc.xpath('//span[@class="stage-date"][contains(., "-")]').last
377
- date << { type: "circulated", on: on.text } if on
378
- "updates"
379
- else type
380
- end
381
- [t, date]
382
- end
383
-
384
- #
385
- # Create relations.
386
- #
387
- # @param [Nokogiri::HTML::Element] rel relation element
388
- # @param [String] type relation type
389
- # @param [Hash{Symbol=>String}] date relation document date
390
- # @option date [String] :type date type
391
- # @option date [String] :on date
392
- #
393
- # @return [Array<Hash>] Relations
394
- #
395
- def create_relations(rel, type, date)
396
- rel.css("a").map do |rid|
397
- docid = DocumentIdentifier.new(type: "ISO", id: rid.text, primary: true)
398
- fref = RelatonBib::FormattedRef.new(content: rid.text, format: "text/plain")
399
- bibitem = RelatonIsoBib::IsoBibliographicItem.new(
400
- docid: [docid], formattedref: fref, date: date,
401
- )
402
- { type: type, bibitem: bibitem }
403
- end
404
- end
405
-
406
- # Fetch type.
407
- # @return [String]
408
- def fetch_type
409
- %r{
410
- ^(?<prefix>ISO|IWA|IEC)
411
- (?:(?:/CIE|/IEC|/IEEE|/PRF|/NP|/SAE|/HL7|/DGuide)*\s|/)
412
- (?<type>TS|TR|PAS|AWI|CD|FDIS|NP|DIS|WD|R|DTS|DTR|ISP|PWI|Guide|(?=\d+))
413
- }x =~ id
414
- type = TYPES[type] || TYPES[prefix] || "international-standard"
415
- RelatonIsoBib::DocumentType.new(type: type)
416
- end
417
-
418
- # Fetch titles.
419
- # @param doc [Nokogiri::HTML::Document]
420
- # @param lang [String]
421
- # @return [Array<RelatonBib::TypedTitleString>]
422
- def fetch_title(doc, lang) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
423
- types = %w[title-intro title-main title-part]
424
- ttls = parse_titles(doc)
425
- title = RelatonBib::TypedTitleStringCollection.new
426
- ttls.each.with_index do |p, i|
427
- next unless p
428
-
429
- title << RelatonBib::TypedTitleString.new(
430
- type: types[i], content: p, language: lang, script: script(lang),
431
- )
432
- end.compact
433
- main = title.map { |t| t.title.content }.join " - "
434
- title << RelatonBib::TypedTitleString.new(type: "main", content: main, language: lang, script: script(lang))
435
- end
436
-
437
- def parse_titles(doc)
438
- # head = doc.at "//nav[contains(@class,'heading-condensed')]"
439
- ttls = doc.xpath("//h1[@class='stdTitle']/span[position()>1]").map(&:text)
440
- return ttls if @errors[:title] &&= ttls.empty?
441
-
442
- ttls[0, 1] = ttls[0].split(/\s(?:-|\u2014)\s/) # if ttls.size == 1
443
- case ttls.size
444
- when 0, 1 then [nil, ttls.first, nil]
445
- else RelatonBib::TypedTitleString.intro_or_part ttls
446
- end
447
- end
448
-
449
- # Return ISO script code.
450
- # @param lang [String]
451
- # @return [String]
452
- def script(lang)
453
- case lang
454
- when "en", "fr" then "Latn"
455
- # when "ru" then "Cyrl"
456
- end
457
- end
458
-
459
- # Fetch dates
460
- # @return [Array<Hash>]
461
- def fetch_dates # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
462
- dates = []
463
- %r{^[^\s]+\s[\d-]+:(?<ref_date_str>\d{4})} =~ id
464
- pub_date_str = @doc.at("//span[@itemprop='releaseDate']")
465
- @errors[:date_pub] &&= pub_date_str.nil?
466
- if ref_date_str
467
- dates += parse_date_from_id ref_date_str, pub_date_str
468
- elsif pub_date_str
469
- dates << { type: "published", on: pub_date_str.text }
470
- end
471
- corr_data = @doc.at "//span[@itemprop='dateModified']"
472
- @errors[:date_corr] &&= corr_data.nil?
473
- dates << { type: "corrected", on: corr_data.text } if corr_data
474
- dates
475
- end
476
-
477
- def parse_date_from_id(ref_date_str, pub_date_str) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
478
- dates = []
479
- ref_date = Date.strptime ref_date_str, "%Y"
480
- if pub_date_str.nil?
481
- dates << { type: "published", on: ref_date_str }
482
- else
483
- pub_date = Date.strptime pub_date_str.text, "%Y"
484
- if pub_date.year > ref_date.year
485
- dates << { type: "published", on: ref_date_str }
486
- dates << { type: "updated", on: pub_date_str.text }
487
- else
488
- dates << { type: "published", on: pub_date_str.text }
489
- end
490
- end
491
- dates
492
- end
493
-
494
- def fetch_contributors
495
- id.sub(/\s.*/, "").split("/").reduce([]) do |mem, abbrev|
496
- publisher = PUBLISHERS[abbrev]
497
- next mem unless publisher
498
-
499
- publisher[:abbreviation] = abbrev
500
- mem << { entity: publisher, role: [type: "publisher"] }
501
- end
502
- end
503
-
504
- # Fetch ICS.
505
- # @return [Array<Hash>]
506
- def fetch_ics
507
- ics = @doc.xpath("//div[contains(., 'ICS')]/following-sibling::span/a").map do |i|
508
- code = i.text.match(/[\d.]+/).to_s.split "."
509
- { field: code[0], group: code[1], subgroup: code[2] }
510
- end
511
- @errors[:ics] &&= ics.empty?
512
- ics
513
- end
514
-
515
- #
516
- # Fetch links.
517
- #
518
- # @param url [String] document url
519
- #
520
- # @return [Array<Hash>]
521
- #
522
- def fetch_link(url) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength
523
- links = [{ type: "src", content: url }]
524
- obp = @doc.at("//a[.='Read sample']")
525
- @errors[:link_obp] &&= obp.nil?
526
- links << { type: "obp", content: obp[:href] } if obp
527
- rss = @doc.at("//a[contains(@href, 'rss')]")
528
- @errors[:link_rss] &&= rss.nil?
529
- links << { type: "rss", content: DOMAIN + rss[:href] } if rss
530
- pub = @doc.at "//p[contains(., 'publicly available')]/a",
531
- "//p[contains(., 'can be downloaded from the')]/a"
532
- @errors[:link_pub] &&= pub.nil?
533
- links << { type: "pub", content: pub[:href] } if pub
534
- links
535
- end
536
-
537
- # Fetch copyright.
538
- # @return [Array<Hash>]
539
- def fetch_copyright # rubocop:disable Metrics/MethodLength
540
- ref = item_ref @doc
541
- owner_name = ref.match(/.*?(?=\s)/).to_s
542
- from = ref.match(/(?<=:)\d{4}/).to_s
543
- if from.empty?
544
- date = @doc.at(
545
- "//span[@itemprop='releaseDate']",
546
- "//ul[@id='stages']/li[contains(@class,'active')]/ul/li[@class='active']/a/span[@class='stage-date']",
547
- )
548
- from = date.text.match(/\d{4}/).to_s
549
- end
550
- [{ owner: [{ name: owner_name }], from: from }]
551
- end
552
- end
553
- end
@@ -1,6 +0,0 @@
1
- module RelatonIso
2
- module Util
3
- extend RelatonBib::Util
4
- PROGNAME = "relaton-iso".freeze
5
- end
6
- end
@@ -1,5 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module RelatonIso
4
- VERSION = "1.20.0"
5
- end
data/lib/relaton_iso.rb DELETED
@@ -1,17 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "nokogiri"
4
- require "net/http"
5
- require "logger"
6
- require "pubid-iso"
7
- require "relaton/index"
8
- require "relaton_iso_bib"
9
- require "relaton_iso/version"
10
- require "relaton_iso/util"
11
- require "relaton_iso/hash_converter"
12
- require "relaton_iso/hit"
13
- require "relaton_iso/iso_bibliography"
14
- require "relaton_iso/document_identifier"
15
- # require "relaton_iso/index"
16
- require "relaton_iso/queue"
17
- require "relaton_iso/data_fetcher"