relaton-iso 1.20.0 → 2.0.0.pre.alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +1 -1
  3. data/Gemfile +1 -0
  4. data/README.adoc +134 -130
  5. data/bin/console +1 -1
  6. data/grammars/basicdoc.rng +2110 -0
  7. data/grammars/biblio-standoc.rng +287 -0
  8. data/grammars/biblio.rng +2097 -0
  9. data/grammars/relaton-iso-compile.rng +11 -0
  10. data/grammars/relaton-iso.rng +214 -0
  11. data/lib/relaton/iso/bibliography.rb +206 -0
  12. data/lib/relaton/iso/data_fetcher.rb +227 -0
  13. data/lib/relaton/iso/hash_parser_v1.rb +121 -0
  14. data/lib/relaton/iso/hit.rb +62 -0
  15. data/lib/relaton/iso/hit_collection.rb +117 -0
  16. data/lib/relaton/iso/item_data.rb +49 -0
  17. data/lib/relaton/iso/model/bibdata.rb +9 -0
  18. data/lib/relaton/iso/model/bibitem.rb +7 -0
  19. data/lib/relaton/iso/model/contributor.rb +7 -0
  20. data/lib/relaton/iso/model/contributor_info.rb +9 -0
  21. data/lib/relaton/iso/model/docidentifier.rb +128 -0
  22. data/lib/relaton/iso/model/doctype.rb +13 -0
  23. data/lib/relaton/iso/model/ext.rb +47 -0
  24. data/lib/relaton/iso/model/iso_project_group.rb +21 -0
  25. data/lib/relaton/iso/model/item.rb +17 -0
  26. data/lib/relaton/iso/model/item_base.rb +19 -0
  27. data/lib/relaton/iso/model/organization.rb +9 -0
  28. data/lib/relaton/iso/model/project_number.rb +22 -0
  29. data/lib/relaton/iso/model/relation.rb +9 -0
  30. data/lib/relaton/iso/model/stagename.rb +14 -0
  31. data/lib/relaton/iso/model/structured_identifier.rb +31 -0
  32. data/lib/relaton/iso/processor.rb +78 -0
  33. data/lib/relaton/iso/queue.rb +63 -0
  34. data/lib/relaton/iso/scraper.rb +591 -0
  35. data/lib/relaton/iso/util.rb +8 -0
  36. data/lib/relaton/iso/version.rb +7 -0
  37. data/lib/relaton/iso.rb +17 -0
  38. data/relaton_iso.gemspec +9 -7
  39. metadata +76 -46
  40. data/bin/bundle +0 -109
  41. data/bin/byebug +0 -27
  42. data/bin/coderay +0 -27
  43. data/bin/gdb_wrapper +0 -29
  44. data/bin/htmldiff +0 -27
  45. data/bin/httpclient +0 -29
  46. data/bin/ldiff +0 -27
  47. data/bin/nokogiri +0 -27
  48. data/bin/pry +0 -27
  49. data/bin/pubid-nist +0 -27
  50. data/bin/racc +0 -27
  51. data/bin/rackup +0 -29
  52. data/bin/rake +0 -27
  53. data/bin/rubocop +0 -27
  54. data/bin/ruby-parse +0 -27
  55. data/bin/ruby-rewrite +0 -27
  56. data/bin/safe_yaml +0 -29
  57. data/bin/thor +0 -27
  58. data/lib/relaton_iso/data_fetcher.rb +0 -246
  59. data/lib/relaton_iso/document_identifier.rb +0 -46
  60. data/lib/relaton_iso/hash_converter.rb +0 -15
  61. data/lib/relaton_iso/hit.rb +0 -59
  62. data/lib/relaton_iso/hit_collection.rb +0 -100
  63. data/lib/relaton_iso/iso_bibliography.rb +0 -202
  64. data/lib/relaton_iso/processor.rb +0 -67
  65. data/lib/relaton_iso/queue.rb +0 -61
  66. data/lib/relaton_iso/scrapper.rb +0 -553
  67. data/lib/relaton_iso/util.rb +0 -6
  68. data/lib/relaton_iso/version.rb +0 -5
  69. data/lib/relaton_iso.rb +0 -17
@@ -0,0 +1,591 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Relaton
4
+ module Iso
5
+ # Scrapper.
6
+ class Scraper # rubocop:disable Metrics/ModuleLength
7
+ DOMAIN = "https://www.iso.org"
8
+
9
+ TYPES = {
10
+ "TS" => "technical-specification",
11
+ "DTS" => "technical-specification",
12
+ "TR" => "technical-report",
13
+ "DTR" => "technical-report",
14
+ "PAS" => "publicly-available-specification",
15
+ # "AWI" => "approvedWorkItem",
16
+ # "CD" => "committeeDraft",
17
+ # "FDIS" => "finalDraftInternationalStandard",
18
+ # "NP" => "newProposal",
19
+ # "DIS" => "draftInternationalStandard",
20
+ # "WD" => "workingDraft",
21
+ # "R" => "recommendation",
22
+ "Guide" => "guide",
23
+ "ISO" => "international-standard",
24
+ "IEC" => "international-standard",
25
+ "IWA" => "international-workshop-agreement",
26
+ }.freeze
27
+
28
+ STGABBR = {
29
+ "00" => "NWIP",
30
+ "10" => "AWI",
31
+ "20" => "WD",
32
+ "30" => "CD",
33
+ "40" => "DIS",
34
+ "50" => "FDIS",
35
+ "60" => { "00" => "PRF", "60" => "FINAL" },
36
+ }.freeze
37
+
38
+ PUBLISHERS = {
39
+ "IEC" => { name: "International Electrotechnical Commission", uri: "www.iec.ch" },
40
+ "ISO" => { name: "International Organization for Standardization", uri: "www.iso.org" },
41
+ "IEEE" => { name: "Institute of Electrical and Electronics Engineers", uri: "www.ieee.org" },
42
+ "SAE" => { name: "SAE International", uri: "www.sae.org" },
43
+ "CIE" => { name: " International Commission on Illumination", uri: "cie.co.at" },
44
+ "ASME" => { name: "American Society of Mechanical Engineers", uri: "www.asme.org" },
45
+ }.freeze
46
+
47
+ # extend self
48
+
49
+ def initialize(lang, errors)
50
+ @lang = lang
51
+ @errors = errors
52
+ end
53
+
54
+ # Parse page.
55
+ # @param path [String] page path
56
+ # @param lang [String, nil] language
57
+ # @param errors [Hash] collection of parsing errors
58
+ # @return [RelatonIsoBib::IsoBibliographicItem]
59
+ def self.parse_page(path, lang: nil, errors: {})
60
+ new(lang, errors).parse(path)
61
+ end
62
+
63
+ def parse(path) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
64
+ @doc, @url = get_page path
65
+ titles, abstract, langs = fetch_titles_abstract
66
+
67
+ ItemData.new(
68
+ id: id.gsub(/[^\w]/, ""),
69
+ fetched: Date.today.to_s,
70
+ type: "standard",
71
+ docidentifier: fetch_relaton_docids,
72
+ docnumber: fetch_docnumber,
73
+ edition: edition,
74
+ language: langs.map { |l| l[:lang] },
75
+ script: langs.map { |l| script(l[:lang]) }.uniq,
76
+ title: titles,
77
+ status: fetch_status,
78
+ ics: fetch_ics,
79
+ date: fetch_dates,
80
+ contributor: fetch_contributors,
81
+ abstract: abstract,
82
+ copyright: fetch_copyright,
83
+ source: fetch_source(@url),
84
+ relation: fetch_relations,
85
+ place: [Bib::Place.new(content:"Geneva")],
86
+ structuredidentifier: fetch_structuredidentifier,
87
+ ext: parse_ext,
88
+ )
89
+ end
90
+
91
+ def id
92
+ return @id if defined?(@id)
93
+
94
+ did = @doc.at("//h1/span[1]")
95
+ @errors[:id] &&= did.nil?
96
+ @id = did && did.text.split(" | ").first.strip
97
+ end
98
+
99
+ def pubid # rubocop:disable Metrics/AbcSize
100
+ return @pubid if @pubid
101
+
102
+ @pubid = ::Pubid::Iso::Identifier.parse(id)
103
+ @pubid.root.edition ||= edition.content if @pubid.base
104
+ @pubid
105
+ rescue StandardError => e
106
+ Util.error "Failed to parse pubid from #{id}: #{e.message}"
107
+ end
108
+
109
+ def urn
110
+ pubid_dup = pubid.dup
111
+ pubid_dup.stage ||= ::Pubid::Iso::Identifier.parse_stage(stage_code)
112
+ pubid_dup
113
+ end
114
+
115
+ def edition
116
+ return @edition if defined?(@edition)
117
+
118
+ ed = @doc.at("//div[div[.='Edition']]/text()[last()]")
119
+ @errors[:edition] &&= ed.nil?
120
+ @edition = ed && Bib::Edition.new(content: ed.text.match(/\d+$/).to_s)
121
+ end
122
+
123
+ #
124
+ # Create document ids.
125
+ #
126
+ # @return [Array<RelatonBib::DocumentIdentifier>]
127
+ #
128
+ def fetch_relaton_docids
129
+ [
130
+ Docidentifier.new(content: pubid, type: "ISO", primary: true),
131
+ Docidentifier.new(content: pubid, type: "iso-reference"),
132
+ Docidentifier.new(content: urn, type: "URN"),
133
+ ]
134
+ end
135
+
136
+ #
137
+ # Create ISO reference identifier with English language.
138
+ #
139
+ # @return [String] English reference identifier
140
+ #
141
+ def isoref
142
+ params = pubid.value.to_h.reject { |k, _| k == :typed_stage }
143
+ ::Pubid::Iso::Identifier.create(language: "en", **params).to_s(format: :ref_num_short)
144
+ end
145
+
146
+ private
147
+
148
+ # Fetch titles and abstracts.
149
+ # @return [Array<Array>]
150
+ def fetch_titles_abstract # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
151
+ titles = [] # RelatonBib::TypedTitleStringCollection.new
152
+ abstract = []
153
+ langs = languages.each_with_object([]) do |l, s|
154
+ # Don't need to get page for en. We already have it.
155
+ d = l[:path] ? get_page(l[:path])[0] : @doc
156
+ unless d.at("//h5[@class='help-block'][.='недоступно на русском языке']")
157
+ s << l
158
+ titles += fetch_title(d, l[:lang])
159
+
160
+ abstr = parse_abstract(d, l[:lang])
161
+ abstract << abstr if abstr
162
+ end
163
+ end
164
+ [titles, abstract, langs]
165
+ end
166
+
167
+ def parse_abstract(doc, lang)
168
+ abstract_content = doc.xpath(
169
+ "//div[@itemprop='description']/p|//div[@itemprop='description']/ul/li",
170
+ ).map { |a| a.name == "li" ? "- #{a.text}" : a.text }.reject(&:empty?).join("\n")
171
+ @errors[:abstract] &&= abstract_content.empty?
172
+ return if abstract_content.empty?
173
+
174
+ Bib::LocalizedMarkedUpString.new(content: abstract_content, language: lang, script: script(lang))
175
+ end
176
+
177
+ # Returns available languages.
178
+ # @return [Array<Hash>]
179
+ def languages
180
+ lgs = [{ lang: "en" }]
181
+ @doc.css("li#lang-switcher ul li a").each do |lang_link|
182
+ lang_path = lang_link.attr("href")
183
+ l = lang_path.match(%r{^/(fr)/})
184
+ lgs << { lang: l[1], path: lang_path } if l && (!@lang || l[1] != @lang)
185
+ end
186
+ @errors[:language] &&= lgs.size == 1
187
+ lgs
188
+ end
189
+
190
+ # Get page.
191
+ # @param path [String] page's path
192
+ # @return [Array<Nokogiri::HTML::Document, String>]
193
+ def get_page(path) # rubocop:disable Metrics/MethodLength
194
+ try = 0
195
+ begin
196
+ resp, uri = get_redirection path
197
+ doc = try_if_fail resp, uri
198
+ [doc, uri.to_s]
199
+ rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
200
+ EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
201
+ Net::ProtocolError, Errno::ETIMEDOUT
202
+ try += 1
203
+ raise Relaton::RequestError, "Could not access #{DOMAIN}#{path}" if try > 3
204
+
205
+ sleep 1
206
+ retry
207
+ end
208
+ end
209
+
210
+ #
211
+ # Get the page from the given path. If the page is redirected, get the
212
+ # page from the new path.
213
+ #
214
+ # @param [String] path path to the page
215
+ #
216
+ # @return [Array<Net::HTTPOK, URI>] HTTP response and URI
217
+ # @raise [RelatonBib::RequestError] if the page is not found
218
+ #
219
+ def get_redirection(path) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
220
+ uri = URI(DOMAIN + path)
221
+ try = 0
222
+ begin
223
+ get_response uri
224
+ rescue Errno::EPIPE => e
225
+ try += 1
226
+ retry if check_try try, uri
227
+ raise e
228
+ end
229
+ end
230
+
231
+ def check_try(try, uri)
232
+ if try < 3
233
+ warn "Timeout fetching #{uri}, retrying..."
234
+ sleep 1
235
+ true
236
+ end
237
+ end
238
+
239
+ def get_response(uri, try = 0)
240
+ raise Relaton::RequestError, "#{uri} not found." if try > 3
241
+
242
+ resp = Net::HTTP.get_response(uri)
243
+ case resp.code
244
+ when "200" then [resp, uri]
245
+ when "301" then get_redirection(resp["location"])
246
+ when "404" then raise Relaton::RequestError, "#{uri} not found."
247
+ else
248
+ sleep (2**try)
249
+ get_response uri, try + 1
250
+ end
251
+ end
252
+
253
+ #
254
+ # The iso.org site fails to respond sometimes. This method tries to get
255
+ # the response again.
256
+ #
257
+ # @param [Net::HTTPOK] resp HTTP response
258
+ # @param [URI::HTTPS] uri URI of the page
259
+ #
260
+ # @return [Nokogiri::HTML4::Document] document
261
+ # @raise [Relaton::RequestError] if the page could not be parsed
262
+ #
263
+ def try_if_fail(resp, uri)
264
+ 10.times do
265
+ doc = Nokogiri::HTML(resp.body)
266
+ # stop trying if page has a document id
267
+ return doc if item_ref(doc)
268
+
269
+ resp = Net::HTTP.get_response(uri)
270
+ end
271
+ raise Relaton::RequestError, "Could not parse the page #{uri}"
272
+ end
273
+
274
+ #
275
+ # Generate docnumber.
276
+ #
277
+ # @return [String] docnumber
278
+ #
279
+ def fetch_docnumber
280
+ pubid.to_s.match(/\d+/)&.to_s
281
+ end
282
+
283
+ #
284
+ # Parse structuredidentifier.
285
+ #
286
+ # @return [RelatonBib::StructuredIdentifier] structured identifier
287
+ #
288
+ def fetch_structuredidentifier # rubocop:disable Metrics/MethodLength
289
+ pnum = ProjectNumber.new content: File.basename(@url, ".*")
290
+ StructuredIdentifier.new(project_number: pnum, type: pubid.root.publisher)
291
+ end
292
+
293
+ #
294
+ # Parse ID from the document.
295
+ #
296
+ # @param [Nokogiri::HTML::Document] doc document to parse
297
+ #
298
+ # @return [String, nil] ID
299
+ #
300
+ def item_ref(doc)
301
+ ref = doc.at("//main//section/div/div/div//h1/span[1]")
302
+ @errors[:reference] &&= ref.nil?
303
+ ref&.text&.strip
304
+ end
305
+
306
+ # Fetch status.
307
+ # @return [RelatonBib::DocumentStatus]
308
+ def fetch_status
309
+ stg, sbstg = stage_code.split "."
310
+ stage = Bib::Status::Stage.new content: stg
311
+ substage = Bib::Status::Stage.new content: sbstg if sbstg
312
+ Bib::Status.new(stage: stage, substage: substage)
313
+ end
314
+
315
+ def stage_code
316
+ return @stage_code if defined?(@stage_code)
317
+
318
+ stc = @doc.at("//ul[@class='dropdown-menu']/li[@class='active']/a/span[@class='stage-code']")
319
+ @errors[:stage] &&= stc.nil?
320
+ @stage_code = stc&.text
321
+ end
322
+
323
+ # def stage(stg, substg)
324
+ # abbr = STGABBR[stg].is_a?(Hash) ? STGABBR[stg][substg] : STGABBR[stg]
325
+ # RelatonBib::DocumentStatus::Stage.new value: stg, abbreviation: abbr
326
+ # end
327
+
328
+ # Fetch relations.
329
+ # @return [Array<Hash>]
330
+ def fetch_relations
331
+ types = ["Now", "Now under review"]
332
+ rels = @doc.xpath(
333
+ "//ul[@class='steps']/li", "//div[contains(@class, 'sub-step')]"
334
+ ).reduce([]) do |a, r|
335
+ type, date = relation_type(r.at("h4", "h5").text.strip)
336
+ next a if types.include?(type)
337
+
338
+ a + create_relations(r, type, date)
339
+ end
340
+ @errors[:relation] &&= rels.empty?
341
+ rels
342
+ end
343
+
344
+ #
345
+ # Parse relation type and dates.
346
+ #
347
+ # @param [String] type parsed type
348
+ #
349
+ # @return [Array<String,Array>] type and dates
350
+ #
351
+ def relation_type(type)
352
+ date = []
353
+ t = case type.strip
354
+ when "Previously", "Will be replaced by" then "obsoletes"
355
+ when /Corrigenda|Amendments|Revised by|Now confirmed|replaced by/
356
+ at = @doc.xpath('//span[@class="stage-date"][contains(., "-")]').last
357
+ date << Bib::Date.new(type: "circulated", at: at.text) if at
358
+ "updates"
359
+ else type
360
+ end
361
+ [t, date]
362
+ end
363
+
364
+ #
365
+ # Create relations.
366
+ #
367
+ # @param [Nokogiri::HTML::Element] rel relation element
368
+ # @param [String] type relation type
369
+ # @param [Hash{Symbol=>String}] date relation document date
370
+ # @option date [String] :type date type
371
+ # @option date [String] :on date
372
+ #
373
+ # @return [Array<Hash>] Relations
374
+ #
375
+ def create_relations(rel, type, date)
376
+ rel.css("a").map do |rid|
377
+ docid = Docidentifier.new(type: "ISO", content: rid.text, primary: true)
378
+ bibitem = ItemData.new(docidentifier: [docid], formattedref: rid.text, date: date)
379
+ Relation.new type: type, bibitem: bibitem
380
+ end
381
+ end
382
+
383
+ # Fetch titles.
384
+ # @param doc [Nokogiri::HTML::Document]
385
+ # @param lang [String]
386
+ # @return [Array<RelatonBib::TypedTitleString>]
387
+ def fetch_title(doc, lang) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
388
+ types = %w[title-intro title-main title-part]
389
+ ttls = parse_titles(doc)
390
+ title = [] # RelatonBib::TypedTitleStringCollection.new
391
+ ttls.each.with_index do |p, i|
392
+ next unless p
393
+
394
+ title << Bib::Title.new(type: types[i], content: p, language: lang, script: script(lang))
395
+ end.compact
396
+ main = title.map(&:content).join " - "
397
+ title << Bib::Title.new(type: "main", content: main, language: lang, script: script(lang))
398
+ end
399
+
400
+ def parse_titles(doc)
401
+ # head = doc.at "//nav[contains(@class,'heading-condensed')]"
402
+ ttls = doc.xpath("//h1[@class='stdTitle']/span[position()>1]").map(&:text)
403
+ return ttls if @errors[:title] &&= ttls.empty?
404
+
405
+ ttls[0, 1] = ttls[0].split(/\s(?:-|\u2014)\s/) # if ttls.size == 1
406
+ case ttls.size
407
+ when 0, 1 then [nil, ttls.first, nil]
408
+ else intro_or_part ttls
409
+ end
410
+ end
411
+
412
+ # @param ttls [Array<String>]
413
+ # @return [Array<String, nil>]
414
+ def intro_or_part(ttls)
415
+ if /^(Part|Partie) \d+:/.match? ttls[1]
416
+ [nil, ttls[0], ttls[1..].join(" -- ")]
417
+ else
418
+ parts = ttls.slice(2..-1)
419
+ part = parts.join " -- " if parts.any?
420
+ [ttls[0], ttls[1], part]
421
+ end
422
+ end
423
+
424
+ # Return ISO script code.
425
+ # @param lang [String]
426
+ # @return [String]
427
+ def script(lang)
428
+ case lang
429
+ when "en", "fr" then "Latn"
430
+ when "ru" then "Cyrl"
431
+ end
432
+ end
433
+
434
+ # Fetch dates
435
+ # @return [Array<Hash>]
436
+ def fetch_dates # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
437
+ dates = []
438
+ %r{^[^\s]+\s[\d-]+:(?<ref_date_str>\d{4})} =~ id
439
+ pub_date_str = @doc.at("//span[@itemprop='releaseDate']")
440
+ @errors[:date_pub] &&= pub_date_str.nil?
441
+ if ref_date_str
442
+ dates += parse_date_from_id ref_date_str, pub_date_str
443
+ elsif pub_date_str
444
+ dates << Bib::Date.new(type: "published", at: pub_date_str.text)
445
+ end
446
+ corr_data = @doc.at "//span[@itemprop='dateModified']"
447
+ @errors[:date_corr] &&= corr_data.nil?
448
+ dates << Bib::Date.new(type: "corrected", at: corr_data.text) if corr_data
449
+ dates
450
+ end
451
+
452
+ def parse_date_from_id(ref_date_str, pub_date_str) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
453
+ dates = []
454
+ ref_date = ::Date.strptime ref_date_str, "%Y"
455
+ if pub_date_str.nil?
456
+ dates << Bib::Date.new(type: "published", at: ref_date_str)
457
+ else
458
+ pub_date = ::Date.strptime pub_date_str.text, "%Y"
459
+ if pub_date.year > ref_date.year
460
+ dates << Bib::Date.new(type: "published", at: ref_date_str)
461
+ dates << Bib::Date.new(type: "updated", at: pub_date_str.text)
462
+ else
463
+ dates << Bib::Date.new(type: "published", at: pub_date_str.text)
464
+ end
465
+ end
466
+ dates
467
+ end
468
+
469
+ def fetch_contributors # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
470
+ id.sub(/\s.*/, "").split("/").reduce([]) do |mem, abbrev|
471
+ publisher = PUBLISHERS[abbrev]
472
+ next mem unless publisher
473
+
474
+ role = Bib::Contributor::Role.new(type: "publisher")
475
+ publisher[:abbreviation] = abbrev
476
+ name = Bib::TypedLocalizedString.new content: publisher[:name]
477
+ abbrev = Bib::LocalizedString.new content: publisher[:abbreviation] if publisher[:abbreviation]
478
+ uri = Bib::Uri.new(content: publisher[:uri]) if publisher[:uri]
479
+ org = Bib::Organization.new(name: [name], abbreviation: abbrev, uri: [uri])
480
+ mem << Bib::Contributor.new(organization: org, role: [role])
481
+ end
482
+ end
483
+
484
+ # Fetch ICS.
485
+ # @return [Array<Hash>]
486
+ def fetch_ics
487
+ ics = @doc.xpath("//div[contains(., 'ICS')]/following-sibling::span/a").map do |i|
488
+ code = i.text.match(/[\d.]+/).to_s # .split "."
489
+ # { field: code[0], group: code[1], subgroup: code[2] }
490
+ isoics = Isoics.fetch code
491
+ Bib::ICS.new code: code, text: isoics.description
492
+ end
493
+ @errors[:ics] &&= ics.empty?
494
+ ics
495
+ end
496
+
497
+ #
498
+ # Fetch source.
499
+ #
500
+ # @param url [String] document url
501
+ #
502
+ # @return [Array<Hash>]
503
+ #
504
+ def fetch_source(url) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength
505
+ source = [Bib::Uri.new(type: "src", content: url)]
506
+ obp = @doc.at("//a[.='Read sample']")
507
+ @errors[:link_obp] &&= obp.nil?
508
+ source << Bib::Uri.new(type: "obp", content: obp[:href]) if obp
509
+ rss = @doc.at("//a[contains(@href, 'rss')]")
510
+ @errors[:link_rss] &&= rss.nil?
511
+ source << Bib::Uri.new(type: "rss", content: DOMAIN + rss[:href]) if rss
512
+ pub = @doc.at "//p[contains(., 'publicly available')]/a",
513
+ "//p[contains(., 'can be downloaded from the')]/a"
514
+ @errors[:link_pub] &&= pub.nil?
515
+ source << Bib::Uri.new(type: "pub", content: pub[:href]) if pub
516
+ source
517
+ end
518
+
519
+ # Fetch copyright.
520
+ # @return [Array<Hash>]
521
+ def fetch_copyright # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
522
+ ref = item_ref @doc
523
+ owner_name = ref.match(/.*?(?=\s)/).to_s
524
+ from = ref.match(/(?<=:)\d{4}/).to_s
525
+ if from.empty?
526
+ date = @doc.at(
527
+ "//span[@itemprop='releaseDate']",
528
+ "//ul[@id='stages']/li[contains(@class,'active')]/ul/li[@class='active']/a/span[@class='stage-date']",
529
+ )
530
+ from = date.text.match(/\d{4}/).to_s
531
+ end
532
+ name = Bib::TypedLocalizedString.new content: owner_name
533
+ org = Bib::Organization.new name: [name]
534
+ contrib = Bib::ContributionInfo.new organization: org
535
+ [Bib::Copyright.new(owner: [contrib], from: from)]
536
+ end
537
+
538
+ def parse_ext # rubocop:disable Metrics/MethodLength
539
+ Ext.new(
540
+ doctype: fetch_type,
541
+ flavor: "iso",
542
+ editorialgroup: fetch_editorialgroup,
543
+ approvalgroup: nil,
544
+ ics: fetch_ics,
545
+ structuredidentifier: fetch_structuredidentifier,
546
+ stagename: nil,
547
+ updates_document_type: nil,
548
+ fast_track: nil,
549
+ price_code: nil,
550
+ )
551
+ end
552
+
553
+ # Fetch type.
554
+ # @return [String]
555
+ def fetch_type
556
+ %r{
557
+ ^(?<prefix>ISO|IWA|IEC)
558
+ (?:(?:/CIE|/IEC|/IEEE|/PRF|/NP|/SAE|/HL7|/DGuide)*\s|/)
559
+ (?<type>TS|TR|PAS|AWI|CD|FDIS|NP|DIS|WD|R|DTS|DTR|ISP|PWI|Guide|(?=\d+))
560
+ }x =~ id
561
+ type = TYPES[type] || TYPES[prefix] || "international-standard"
562
+ Doctype.new(content: type)
563
+ end
564
+
565
+ #
566
+ # Fetch editorialgroup.
567
+ #
568
+ # @param doc [Nokogiri::HTML::Document]
569
+ #
570
+ # @return [RelatonIsoBib::EditorialGroup, nil]
571
+ #
572
+ def fetch_editorialgroup # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
573
+ wg = @doc.at("//div[contains(., 'Technical Committe')]/following-sibling::span/a")
574
+ @errors[:editorialgroup] &&= wg.nil?
575
+ return unless wg
576
+
577
+ workgroup = wg.text.split "/"
578
+ type = workgroup[1]&.match(/^[A-Z]+/)&.to_s || "TC"
579
+ # {
580
+ # name: "International Organization for Standardization",
581
+ # abbreviation: "ISO",
582
+ # url: "www.iso.org",
583
+ # }
584
+ tc_numb = workgroup[1]&.match(/\d+/)&.to_s&.to_i
585
+ tc_name = wg[:title]
586
+ tc = Bib::WorkGroup.new(content: tc_name, identifier: wg.text, type: type, number: tc_numb)
587
+ ISOProjectGroup.new(technical_committee: [tc])
588
+ end
589
+ end
590
+ end
591
+ end
@@ -0,0 +1,8 @@
1
+ module Relaton
2
+ module Iso
3
+ module Util
4
+ extend Relaton::Bib::Util
5
+ PROGNAME = "relaton-iso".freeze
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Relaton
4
+ module Iso
5
+ VERSION = "2.0.0-alpha.1".freeze
6
+ end
7
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "net/http"
4
+ require "pubid/iso"
5
+ require "relaton/index"
6
+ require "isoics"
7
+ require "relaton/bib"
8
+ require "relaton/core"
9
+ require_relative "iso/version"
10
+ require_relative "iso/util"
11
+ require_relative "iso/item_data"
12
+ require_relative "iso/model/item"
13
+ require_relative "iso/model/relation"
14
+ require_relative "iso/model/bibitem"
15
+ require_relative "iso/model/bibdata"
16
+ require_relative "iso/hit_collection"
17
+ require_relative "iso/bibliography"
data/relaton_iso.gemspec CHANGED
@@ -2,17 +2,17 @@
2
2
 
3
3
  lib = File.expand_path("lib", __dir__)
4
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
- require "relaton_iso/version"
5
+ require "relaton/iso/version"
6
6
 
7
7
  Gem::Specification.new do |spec|
8
8
  spec.name = "relaton-iso"
9
- spec.version = RelatonIso::VERSION
9
+ spec.version = Relaton::Iso::VERSION
10
10
  spec.authors = ["Ribose Inc."]
11
11
  spec.email = ["open.source@ribose.com"]
12
12
 
13
- spec.summary = "RelatonIso: retrieve ISO Standards for bibliographic " \
13
+ spec.summary = "Relaton::Iso: retrieve ISO Standards for bibliographic " \
14
14
  "use using the IsoBibliographicItem model"
15
- spec.description = "RelatonIso: retrieve ISO Standards for bibliographic " \
15
+ spec.description = "Relaton::Iso: retrieve ISO Standards for bibliographic " \
16
16
  "use using the IsoBibliographicItem model"
17
17
 
18
18
  spec.homepage = "https://github.com/relaton/relaton-iso"
@@ -24,9 +24,11 @@ Gem::Specification.new do |spec|
24
24
  spec.bindir = "exe"
25
25
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
26
26
  spec.require_paths = ["lib"]
27
- spec.required_ruby_version = Gem::Requirement.new(">= 2.7.0")
27
+ spec.required_ruby_version = Gem::Requirement.new(">= 3.1.0")
28
28
 
29
- spec.add_dependency "pubid", "~> 0.1.1"
29
+ spec.add_dependency "isoics", "~> 0.1.6"
30
+ spec.add_dependency "pubid-iso", "~> 0.8.0"
31
+ spec.add_dependency "relaton-bib", "~> 2.0.0-alpha.1"
32
+ spec.add_dependency "relaton-core", "~> 0.0.4"
30
33
  spec.add_dependency "relaton-index", "~> 0.2.12"
31
- spec.add_dependency "relaton-iso-bib", "~> 1.20.0"
32
34
  end