relaton-omg 1.18.0 → 1.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cfc5b10fea7afe2778adf2b0a5b681b3155305acf823eb5f08d29ac07d6d95ef
4
- data.tar.gz: 74f40b36ac9b0ad1c8d979e7b21a1249234a37aba35d3791c4ab44325d75e798
3
+ metadata.gz: 3aee35ace5d33ef6a4058a8da3c76d59af173315a7ed32cb5967c0dfb9fd8296
4
+ data.tar.gz: a1138366be2d18d01354d29db3a8fc0870c4c73cbbbe3ac67ffa3700ae85f9bc
5
5
  SHA512:
6
- metadata.gz: 115a462f0e2c13e91cb89882ef4f5bed4d557b0a7332aa10706229d4091ada41439c47265cea58ea342e117ccb794d4d093ce3fb289352f9440c1de31dd03662
7
- data.tar.gz: dfde51a15b0753757b6c9f6776ca803c7a2cd58d0e9677e7b821f238f4f7d521b921804b95418ec47e782796a3ac00d3eeec7b3f88373b34035f62fc6a5f78fd
6
+ metadata.gz: b5194c85224823ac10a951ed66de9c558659751b967d909d53cbbee37ad60eb750f92b60c079fd1f125c324e23dbb651072a40eefd77018d50776331f7228b37
7
+ data.tar.gz: 1b9ef36597bec1b10285d0e5f3583615aa774f0b901d98d3042477f6c9368c20e9cf60d4f47d00564f0effc99c9d4d5bd236ab0c62e5ee70531151da899cabc3
@@ -7,7 +7,7 @@ module RelatonOmg
7
7
  # @param code [String] the OMG standard reference
8
8
  # @return [RelatonOmg::OmgBibliographicItem]
9
9
  def search(text)
10
- Scrapper.scrape_page text
10
+ Scraper.scrape_page text
11
11
  end
12
12
 
13
13
  # @param code [String] the OMG standard reference
@@ -0,0 +1,136 @@
1
+ require "nokogiri"
2
+
3
+ module RelatonOmg
4
+ class Scraper
5
+ URL_PATTERN = "https://www.omg.org/spec/".freeze
6
+
7
+ def initialize(acronym, version = nil, spec = nil)
8
+ @acronym = acronym
9
+ @version = version
10
+ @spec = spec
11
+ end
12
+
13
+ def self.scrape_page(ref)
14
+ %r{^OMG (?<acronym>[^\s]+)(?:[\s/](?<version>[\d.]+(?:\sbeta(?:\s\d)?)?))?(?:[\s/](?<spec>\w+))?$} =~ ref
15
+ return unless acronym
16
+
17
+ scraper = new(acronym, version, spec)
18
+ doc = scraper.get_doc
19
+ return if doc.nil? || scraper.fetch_link.empty?
20
+
21
+ OmgBibliographicItem.new(**scraper.item)
22
+ end
23
+
24
+ def get_doc
25
+ @url = "#{URL_PATTERN}#{@acronym}/"
26
+ @url += @version.gsub(' ', '/') if @version
27
+ @doc = Nokogiri::HTML OpenURI.open_uri(@url, open_timeout: 10)
28
+ rescue OpenURI::HTTPError, URI::InvalidURIError, Net::OpenTimeout => e
29
+ return if e.is_a?(URI::InvalidURIError) || e.io.status[0] == "404"
30
+
31
+ raise RelatonBib::RequestError, "Unable acces #{@url} (#{e.io.status.join(' ')})"
32
+ end
33
+
34
+ def item
35
+ {
36
+ id: fetch_id,
37
+ fetched: Date.today.to_s,
38
+ docid: fetch_docid,
39
+ title: fetch_title,
40
+ abstract: fetch_abstract,
41
+ version: fetch_version,
42
+ date: fetch_date,
43
+ docstatus: fetch_status,
44
+ link: fetch_link,
45
+ relation: fetch_relation,
46
+ keyword: fetch_keyword,
47
+ license: fetch_license,
48
+ }
49
+ end
50
+
51
+ def fetch_id
52
+ "#{@acronym}#{doc_version}#{@spec}"
53
+ end
54
+
55
+ def fetch_title
56
+ content = @doc.at('//dt[.="Title:"]/following-sibling::dd').text
57
+ content += ": #{@spec}" if @spec
58
+ title = RelatonBib::FormattedString.new content: content, language: "en", script: "Latn"
59
+ [RelatonBib::TypedTitleString.new(type: "main", title: title)]
60
+ end
61
+
62
+ def fetch_docid
63
+ id = [@acronym]
64
+ id << doc_version if doc_version
65
+ id << @spec if @spec
66
+ [RelatonBib::DocumentIdentifier.new(id: id.join(" "), type: "OMG", primary: true)]
67
+ end
68
+
69
+ def fetch_abstract
70
+ content = @doc.at('//section[@id="document-metadata"]/div/div/p').text
71
+ [{ content: content, language: "en", script: "Latn" }]
72
+ end
73
+
74
+ def fetch_version
75
+ [RelatonBib::BibliographicItem::Version.new(pub_date, doc_version)]
76
+ end
77
+
78
+ def doc_version
79
+ @doc_version ||= @doc.at('//dt[.="Version:"]/following-sibling::dd/p/span').text
80
+ end
81
+
82
+ def fetch_date
83
+ [type: "published", on: pub_date.to_s]
84
+ end
85
+
86
+ def pub_date
87
+ Date.parse @doc.at('//dt[.="Publication Date:"]/following-sibling::dd').text.strip
88
+ end
89
+
90
+ def fetch_status
91
+ status = @doc.at('//dt[.="Document Status:"]/following-sibling::dd')
92
+ stage = status.text.strip.match(/\w+/).to_s
93
+ RelatonBib::DocumentStatus.new(stage: stage)
94
+ end
95
+
96
+ def fetch_link
97
+ return @link if @link
98
+
99
+ @links = []
100
+ if @spec
101
+ a = @doc.at("//a[@href='#{@url}/#{@spec}/PDF']")
102
+ @links << { type: "src", content: a[:href] } if a
103
+ else
104
+ a = @doc.at('//dt[.="This Document:"]/following-sibling::dd/a')
105
+ @links << { type: "src", content: a[:href] } if a
106
+ pdf = @doc.at('//a[@class="download-document"]')
107
+ @links << { type: "pdf", content: pdf[:href] } if pdf
108
+ end
109
+ @links
110
+ end
111
+
112
+ def fetch_relation
113
+ v = @doc.xpath('//h2[.="History"]/following-sibling::section/div/table/tbody/tr')
114
+ v.reduce([]) do |mem, row|
115
+ ver = row.at("td").text
116
+ unless ver == doc_version
117
+ acronym = row.at("td[3]/a")[:href].split("/")[4]
118
+ fref = RelatonBib::FormattedRef.new content: "OMG #{acronym} #{ver}"
119
+ bibitem = OmgBibliographicItem.new formattedref: fref
120
+ mem << { type: "obsoletes", bibitem: bibitem }
121
+ end
122
+ mem
123
+ end
124
+ end
125
+
126
+ def fetch_keyword
127
+ @doc.xpath('//dt[.="Categories:"]/following-sibling::dd/ul/li/a/em').map &:text
128
+ end
129
+
130
+ def fetch_license
131
+ @doc.xpath(
132
+ '//dt/span/a[contains(., "IPR Mode")]/../../following-sibling::dd/span',
133
+ ).map { |l| l.text.match(/[\w\s-]+/).to_s.strip }
134
+ end
135
+ end
136
+ end
@@ -1,3 +1,3 @@
1
1
  module RelatonOmg
2
- VERSION = "1.18.0".freeze
2
+ VERSION = "1.18.1".freeze
3
3
  end
data/lib/relaton_omg.rb CHANGED
@@ -3,7 +3,7 @@ require "relaton_bib"
3
3
  require "relaton_omg/version"
4
4
  require "relaton_omg/config"
5
5
  require "relaton_omg/util"
6
- require "relaton_omg/scrapper"
6
+ require "relaton_omg/scraper"
7
7
  require "relaton_omg/omg_bibliography"
8
8
  require "relaton_omg/omg_bibliographic_item"
9
9
  require "relaton_omg/xml_parser"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-omg
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.18.0
4
+ version: 1.18.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-01-08 00:00:00.000000000 Z
11
+ date: 2024-06-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: relaton-bib
@@ -56,7 +56,7 @@ files:
56
56
  - lib/relaton_omg/omg_bibliographic_item.rb
57
57
  - lib/relaton_omg/omg_bibliography.rb
58
58
  - lib/relaton_omg/processor.rb
59
- - lib/relaton_omg/scrapper.rb
59
+ - lib/relaton_omg/scraper.rb
60
60
  - lib/relaton_omg/util.rb
61
61
  - lib/relaton_omg/version.rb
62
62
  - lib/relaton_omg/xml_parser.rb
@@ -82,7 +82,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
82
82
  - !ruby/object:Gem::Version
83
83
  version: '0'
84
84
  requirements: []
85
- rubygems_version: 3.3.26
85
+ rubygems_version: 3.3.27
86
86
  signing_key:
87
87
  specification_version: 4
88
88
  summary: 'RelatonOmg: retrieve OMG Standards for bibliographic using the IsoBibliographicItem
@@ -1,121 +0,0 @@
1
- require "nokogiri"
2
-
3
- module RelatonOmg
4
- module Scrapper
5
- URL_PATTERN = "https://www.omg.org/spec/".freeze
6
-
7
- class << self
8
- def scrape_page(ref) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
9
- %r{OMG (?<acronym>[^\s]+)\s?(?<version>.*)} =~ ref
10
- return unless acronym
11
-
12
- url = URL_PATTERN + acronym
13
- url += "/#{version}" if version
14
- doc = Nokogiri::HTML OpenURI.open_uri(url, open_timeout: 10)
15
- OmgBibliographicItem.new(**item(doc, acronym))
16
- rescue OpenURI::HTTPError, URI::InvalidURIError, Net::OpenTimeout => e
17
- return if e.is_a?(URI::InvalidURIError) || e.io.status[0] == "404"
18
-
19
- raise RelatonBib::RequestError, "Unable acces #{url} (#{e.io.status.join(' ')})"
20
- end
21
-
22
- private
23
-
24
- def item(doc, acronym) # rubocop:disable Metrics/MethodLength
25
- {
26
- id: fetch_id(doc, acronym),
27
- fetched: Date.today.to_s,
28
- docid: fetch_docid(doc, acronym),
29
- title: fetch_title(doc),
30
- abstract: fetch_abstract(doc),
31
- version: fetch_version(doc),
32
- date: fetch_date(doc),
33
- docstatus: fetch_status(doc),
34
- link: fetch_link(doc),
35
- relation: fetch_relation(doc),
36
- keyword: fetch_keyword(doc),
37
- license: fetch_license(doc),
38
- }
39
- end
40
-
41
- def fetch_id(doc, acronym)
42
- acronym + version(doc)
43
- end
44
-
45
- def fetch_title(doc)
46
- content = doc.at('//dt[.="Title:"]/following-sibling::dd').text
47
- title = RelatonBib::FormattedString.new content: content, language: "en", script: "Latn"
48
- [RelatonBib::TypedTitleString.new(type: "main", title: title)]
49
- end
50
-
51
- def fetch_docid(doc, acronym)
52
- id = [acronym]
53
- if (ver = version(doc))
54
- id << ver
55
- end
56
- [RelatonBib::DocumentIdentifier.new(id: id.join(" "), type: "OMG", primary: true)]
57
- end
58
-
59
- def fetch_abstract(doc)
60
- content = doc.at('//section[@id="document-metadata"]/div/div/p').text
61
- [{ content: content, language: "en", script: "Latn" }]
62
- end
63
-
64
- def fetch_version(doc)
65
- [RelatonBib::BibliographicItem::Version.new(pub_date(doc), version(doc))]
66
- end
67
-
68
- def version(doc)
69
- doc.at('//dt[.="Version:"]/following-sibling::dd/p/span').text
70
- end
71
-
72
- def fetch_date(doc)
73
- [type: "published", on: pub_date(doc).to_s]
74
- end
75
-
76
- def pub_date(doc)
77
- Date.parse doc.at('//dt[.="Publication Date:"]/following-sibling::dd').text.strip
78
- end
79
-
80
- def fetch_status(doc)
81
- status = doc.at('//dt[.="Document Status:"]/following-sibling::dd')
82
- stage = status.text.strip.match(/\w+/).to_s
83
- RelatonBib::DocumentStatus.new(stage: stage)
84
- end
85
-
86
- def fetch_link(doc)
87
- links = []
88
- a = doc.at('//dt[.="This Document:"]/following-sibling::dd/a')
89
- links << { type: "src", content: a[:href] } if a
90
- pdf = doc.at('//a[@class="download-document"]')
91
- links << { type: "pdf", content: pdf[:href] } if pdf
92
- links
93
- end
94
-
95
- def fetch_relation(doc) # rubocop:disable Metrics/MethodLength
96
- current_version = version(doc)
97
- v = doc.xpath('//h2[.="History"]/following-sibling::section/div/table/tbody/tr')
98
- v.reduce([]) do |mem, row|
99
- ver = row.at("td").text
100
- unless ver == current_version
101
- acronym = row.at("td[3]/a")[:href].split("/")[4]
102
- fref = RelatonBib::FormattedRef.new content: "OMG #{acronym} #{ver}"
103
- bibitem = OmgBibliographicItem.new formattedref: fref
104
- mem << { type: "obsoletes", bibitem: bibitem }
105
- end
106
- mem
107
- end
108
- end
109
-
110
- def fetch_keyword(doc)
111
- doc.xpath('//dt[.="Categories:"]/following-sibling::dd/ul/li/a/em').map &:text
112
- end
113
-
114
- def fetch_license(doc)
115
- doc.xpath(
116
- '//dt/span/a[contains(., "IPR Mode")]/../../following-sibling::dd/span',
117
- ).map { |l| l.text.match(/[\w\s-]+/).to_s.strip }
118
- end
119
- end
120
- end
121
- end