relaton-omg 1.18.0 → 1.18.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cfc5b10fea7afe2778adf2b0a5b681b3155305acf823eb5f08d29ac07d6d95ef
4
- data.tar.gz: 74f40b36ac9b0ad1c8d979e7b21a1249234a37aba35d3791c4ab44325d75e798
3
+ metadata.gz: 3aee35ace5d33ef6a4058a8da3c76d59af173315a7ed32cb5967c0dfb9fd8296
4
+ data.tar.gz: a1138366be2d18d01354d29db3a8fc0870c4c73cbbbe3ac67ffa3700ae85f9bc
5
5
  SHA512:
6
- metadata.gz: 115a462f0e2c13e91cb89882ef4f5bed4d557b0a7332aa10706229d4091ada41439c47265cea58ea342e117ccb794d4d093ce3fb289352f9440c1de31dd03662
7
- data.tar.gz: dfde51a15b0753757b6c9f6776ca803c7a2cd58d0e9677e7b821f238f4f7d521b921804b95418ec47e782796a3ac00d3eeec7b3f88373b34035f62fc6a5f78fd
6
+ metadata.gz: b5194c85224823ac10a951ed66de9c558659751b967d909d53cbbee37ad60eb750f92b60c079fd1f125c324e23dbb651072a40eefd77018d50776331f7228b37
7
+ data.tar.gz: 1b9ef36597bec1b10285d0e5f3583615aa774f0b901d98d3042477f6c9368c20e9cf60d4f47d00564f0effc99c9d4d5bd236ab0c62e5ee70531151da899cabc3
@@ -7,7 +7,7 @@ module RelatonOmg
7
7
  # @param code [String] the OMG standard reference
8
8
  # @return [RelatonOmg::OmgBibliographicItem]
9
9
  def search(text)
10
- Scrapper.scrape_page text
10
+ Scraper.scrape_page text
11
11
  end
12
12
 
13
13
  # @param code [String] the OMG standard reference
@@ -0,0 +1,136 @@
1
+ require "nokogiri"
2
+
3
+ module RelatonOmg
4
+ class Scraper
5
+ URL_PATTERN = "https://www.omg.org/spec/".freeze
6
+
7
+ def initialize(acronym, version = nil, spec = nil)
8
+ @acronym = acronym
9
+ @version = version
10
+ @spec = spec
11
+ end
12
+
13
+ def self.scrape_page(ref)
14
+ %r{^OMG (?<acronym>[^\s]+)(?:[\s/](?<version>[\d.]+(?:\sbeta(?:\s\d)?)?))?(?:[\s/](?<spec>\w+))?$} =~ ref
15
+ return unless acronym
16
+
17
+ scraper = new(acronym, version, spec)
18
+ doc = scraper.get_doc
19
+ return if doc.nil? || scraper.fetch_link.empty?
20
+
21
+ OmgBibliographicItem.new(**scraper.item)
22
+ end
23
+
24
+ def get_doc
25
+ @url = "#{URL_PATTERN}#{@acronym}/"
26
+ @url += @version.gsub(' ', '/') if @version
27
+ @doc = Nokogiri::HTML OpenURI.open_uri(@url, open_timeout: 10)
28
+ rescue OpenURI::HTTPError, URI::InvalidURIError, Net::OpenTimeout => e
29
+ return if e.is_a?(URI::InvalidURIError) || e.io.status[0] == "404"
30
+
31
+ raise RelatonBib::RequestError, "Unable acces #{@url} (#{e.io.status.join(' ')})"
32
+ end
33
+
34
+ def item
35
+ {
36
+ id: fetch_id,
37
+ fetched: Date.today.to_s,
38
+ docid: fetch_docid,
39
+ title: fetch_title,
40
+ abstract: fetch_abstract,
41
+ version: fetch_version,
42
+ date: fetch_date,
43
+ docstatus: fetch_status,
44
+ link: fetch_link,
45
+ relation: fetch_relation,
46
+ keyword: fetch_keyword,
47
+ license: fetch_license,
48
+ }
49
+ end
50
+
51
+ def fetch_id
52
+ "#{@acronym}#{doc_version}#{@spec}"
53
+ end
54
+
55
+ def fetch_title
56
+ content = @doc.at('//dt[.="Title:"]/following-sibling::dd').text
57
+ content += ": #{@spec}" if @spec
58
+ title = RelatonBib::FormattedString.new content: content, language: "en", script: "Latn"
59
+ [RelatonBib::TypedTitleString.new(type: "main", title: title)]
60
+ end
61
+
62
+ def fetch_docid
63
+ id = [@acronym]
64
+ id << doc_version if doc_version
65
+ id << @spec if @spec
66
+ [RelatonBib::DocumentIdentifier.new(id: id.join(" "), type: "OMG", primary: true)]
67
+ end
68
+
69
+ def fetch_abstract
70
+ content = @doc.at('//section[@id="document-metadata"]/div/div/p').text
71
+ [{ content: content, language: "en", script: "Latn" }]
72
+ end
73
+
74
+ def fetch_version
75
+ [RelatonBib::BibliographicItem::Version.new(pub_date, doc_version)]
76
+ end
77
+
78
+ def doc_version
79
+ @doc_version ||= @doc.at('//dt[.="Version:"]/following-sibling::dd/p/span').text
80
+ end
81
+
82
+ def fetch_date
83
+ [type: "published", on: pub_date.to_s]
84
+ end
85
+
86
+ def pub_date
87
+ Date.parse @doc.at('//dt[.="Publication Date:"]/following-sibling::dd').text.strip
88
+ end
89
+
90
+ def fetch_status
91
+ status = @doc.at('//dt[.="Document Status:"]/following-sibling::dd')
92
+ stage = status.text.strip.match(/\w+/).to_s
93
+ RelatonBib::DocumentStatus.new(stage: stage)
94
+ end
95
+
96
+ def fetch_link
97
+ return @link if @link
98
+
99
+ @links = []
100
+ if @spec
101
+ a = @doc.at("//a[@href='#{@url}/#{@spec}/PDF']")
102
+ @links << { type: "src", content: a[:href] } if a
103
+ else
104
+ a = @doc.at('//dt[.="This Document:"]/following-sibling::dd/a')
105
+ @links << { type: "src", content: a[:href] } if a
106
+ pdf = @doc.at('//a[@class="download-document"]')
107
+ @links << { type: "pdf", content: pdf[:href] } if pdf
108
+ end
109
+ @links
110
+ end
111
+
112
+ def fetch_relation
113
+ v = @doc.xpath('//h2[.="History"]/following-sibling::section/div/table/tbody/tr')
114
+ v.reduce([]) do |mem, row|
115
+ ver = row.at("td").text
116
+ unless ver == doc_version
117
+ acronym = row.at("td[3]/a")[:href].split("/")[4]
118
+ fref = RelatonBib::FormattedRef.new content: "OMG #{acronym} #{ver}"
119
+ bibitem = OmgBibliographicItem.new formattedref: fref
120
+ mem << { type: "obsoletes", bibitem: bibitem }
121
+ end
122
+ mem
123
+ end
124
+ end
125
+
126
+ def fetch_keyword
127
+ @doc.xpath('//dt[.="Categories:"]/following-sibling::dd/ul/li/a/em').map &:text
128
+ end
129
+
130
+ def fetch_license
131
+ @doc.xpath(
132
+ '//dt/span/a[contains(., "IPR Mode")]/../../following-sibling::dd/span',
133
+ ).map { |l| l.text.match(/[\w\s-]+/).to_s.strip }
134
+ end
135
+ end
136
+ end
@@ -1,3 +1,3 @@
1
1
  module RelatonOmg
2
- VERSION = "1.18.0".freeze
2
+ VERSION = "1.18.1".freeze
3
3
  end
data/lib/relaton_omg.rb CHANGED
@@ -3,7 +3,7 @@ require "relaton_bib"
3
3
  require "relaton_omg/version"
4
4
  require "relaton_omg/config"
5
5
  require "relaton_omg/util"
6
- require "relaton_omg/scrapper"
6
+ require "relaton_omg/scraper"
7
7
  require "relaton_omg/omg_bibliography"
8
8
  require "relaton_omg/omg_bibliographic_item"
9
9
  require "relaton_omg/xml_parser"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-omg
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.18.0
4
+ version: 1.18.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-01-08 00:00:00.000000000 Z
11
+ date: 2024-06-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: relaton-bib
@@ -56,7 +56,7 @@ files:
56
56
  - lib/relaton_omg/omg_bibliographic_item.rb
57
57
  - lib/relaton_omg/omg_bibliography.rb
58
58
  - lib/relaton_omg/processor.rb
59
- - lib/relaton_omg/scrapper.rb
59
+ - lib/relaton_omg/scraper.rb
60
60
  - lib/relaton_omg/util.rb
61
61
  - lib/relaton_omg/version.rb
62
62
  - lib/relaton_omg/xml_parser.rb
@@ -82,7 +82,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
82
82
  - !ruby/object:Gem::Version
83
83
  version: '0'
84
84
  requirements: []
85
- rubygems_version: 3.3.26
85
+ rubygems_version: 3.3.27
86
86
  signing_key:
87
87
  specification_version: 4
88
88
  summary: 'RelatonOmg: retrieve OMG Standards for bibliographic using the IsoBibliographicItem
@@ -1,121 +0,0 @@
1
- require "nokogiri"
2
-
3
- module RelatonOmg
4
- module Scrapper
5
- URL_PATTERN = "https://www.omg.org/spec/".freeze
6
-
7
- class << self
8
- def scrape_page(ref) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
9
- %r{OMG (?<acronym>[^\s]+)\s?(?<version>.*)} =~ ref
10
- return unless acronym
11
-
12
- url = URL_PATTERN + acronym
13
- url += "/#{version}" if version
14
- doc = Nokogiri::HTML OpenURI.open_uri(url, open_timeout: 10)
15
- OmgBibliographicItem.new(**item(doc, acronym))
16
- rescue OpenURI::HTTPError, URI::InvalidURIError, Net::OpenTimeout => e
17
- return if e.is_a?(URI::InvalidURIError) || e.io.status[0] == "404"
18
-
19
- raise RelatonBib::RequestError, "Unable acces #{url} (#{e.io.status.join(' ')})"
20
- end
21
-
22
- private
23
-
24
- def item(doc, acronym) # rubocop:disable Metrics/MethodLength
25
- {
26
- id: fetch_id(doc, acronym),
27
- fetched: Date.today.to_s,
28
- docid: fetch_docid(doc, acronym),
29
- title: fetch_title(doc),
30
- abstract: fetch_abstract(doc),
31
- version: fetch_version(doc),
32
- date: fetch_date(doc),
33
- docstatus: fetch_status(doc),
34
- link: fetch_link(doc),
35
- relation: fetch_relation(doc),
36
- keyword: fetch_keyword(doc),
37
- license: fetch_license(doc),
38
- }
39
- end
40
-
41
- def fetch_id(doc, acronym)
42
- acronym + version(doc)
43
- end
44
-
45
- def fetch_title(doc)
46
- content = doc.at('//dt[.="Title:"]/following-sibling::dd').text
47
- title = RelatonBib::FormattedString.new content: content, language: "en", script: "Latn"
48
- [RelatonBib::TypedTitleString.new(type: "main", title: title)]
49
- end
50
-
51
- def fetch_docid(doc, acronym)
52
- id = [acronym]
53
- if (ver = version(doc))
54
- id << ver
55
- end
56
- [RelatonBib::DocumentIdentifier.new(id: id.join(" "), type: "OMG", primary: true)]
57
- end
58
-
59
- def fetch_abstract(doc)
60
- content = doc.at('//section[@id="document-metadata"]/div/div/p').text
61
- [{ content: content, language: "en", script: "Latn" }]
62
- end
63
-
64
- def fetch_version(doc)
65
- [RelatonBib::BibliographicItem::Version.new(pub_date(doc), version(doc))]
66
- end
67
-
68
- def version(doc)
69
- doc.at('//dt[.="Version:"]/following-sibling::dd/p/span').text
70
- end
71
-
72
- def fetch_date(doc)
73
- [type: "published", on: pub_date(doc).to_s]
74
- end
75
-
76
- def pub_date(doc)
77
- Date.parse doc.at('//dt[.="Publication Date:"]/following-sibling::dd').text.strip
78
- end
79
-
80
- def fetch_status(doc)
81
- status = doc.at('//dt[.="Document Status:"]/following-sibling::dd')
82
- stage = status.text.strip.match(/\w+/).to_s
83
- RelatonBib::DocumentStatus.new(stage: stage)
84
- end
85
-
86
- def fetch_link(doc)
87
- links = []
88
- a = doc.at('//dt[.="This Document:"]/following-sibling::dd/a')
89
- links << { type: "src", content: a[:href] } if a
90
- pdf = doc.at('//a[@class="download-document"]')
91
- links << { type: "pdf", content: pdf[:href] } if pdf
92
- links
93
- end
94
-
95
- def fetch_relation(doc) # rubocop:disable Metrics/MethodLength
96
- current_version = version(doc)
97
- v = doc.xpath('//h2[.="History"]/following-sibling::section/div/table/tbody/tr')
98
- v.reduce([]) do |mem, row|
99
- ver = row.at("td").text
100
- unless ver == current_version
101
- acronym = row.at("td[3]/a")[:href].split("/")[4]
102
- fref = RelatonBib::FormattedRef.new content: "OMG #{acronym} #{ver}"
103
- bibitem = OmgBibliographicItem.new formattedref: fref
104
- mem << { type: "obsoletes", bibitem: bibitem }
105
- end
106
- mem
107
- end
108
- end
109
-
110
- def fetch_keyword(doc)
111
- doc.xpath('//dt[.="Categories:"]/following-sibling::dd/ul/li/a/em').map &:text
112
- end
113
-
114
- def fetch_license(doc)
115
- doc.xpath(
116
- '//dt/span/a[contains(., "IPR Mode")]/../../following-sibling::dd/span',
117
- ).map { |l| l.text.match(/[\w\s-]+/).to_s.strip }
118
- end
119
- end
120
- end
121
- end