relaton-omg 1.18.0 → 1.19.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.adoc +9 -16
- data/grammars/basicdoc.rng +3 -0
- data/lib/relaton_omg/omg_bibliography.rb +4 -4
- data/lib/relaton_omg/scraper.rb +136 -0
- data/lib/relaton_omg/util.rb +1 -4
- data/lib/relaton_omg/version.rb +1 -1
- data/lib/relaton_omg.rb +1 -2
- data/relaton_omg.gemspec +2 -1
- metadata +20 -7
- data/lib/relaton_omg/config.rb +0 -10
- data/lib/relaton_omg/scrapper.rb +0 -121
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 399c47b05dd8f22085cdcd9427ddb625390e107b937a260f98e744453dcdc24f
|
4
|
+
data.tar.gz: 4171e703a7376a19beea4d733af030ea36afb997bd92b22fec657eaef1bda857
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 341972ebf1cf824c3d1590e230d5ba118663219226a1ebf42f32c4dc13f19b8f231d2ea423ae2198f298dd493d6fe37f3f702db09c674b8561e53689ff249e48
|
7
|
+
data.tar.gz: 6798497edd40b952dc52d8bd63760841582d75dedc0de7f6db98aba9a976db6457b277ad2e94f7cec17c2b6e854c1b012838054d085d0dfe103271afa21ef934
|
data/README.adoc
CHANGED
@@ -31,20 +31,6 @@ Or install it yourself as:
|
|
31
31
|
|
32
32
|
== Usage
|
33
33
|
|
34
|
-
=== Configuration
|
35
|
-
|
36
|
-
Configuration is optional. The available option is `logger` which is a `Logger` instance. By default, the logger is `Logger.new($stderr)` with `Logger::WARN` level. To change the logger level, use `RelatonOmg.configure` block.
|
37
|
-
|
38
|
-
[source,ruby]
|
39
|
-
----
|
40
|
-
require 'relaton_omg'
|
41
|
-
=> true
|
42
|
-
|
43
|
-
RelatonOmg.configure do |config|
|
44
|
-
config.logger.level = Logger::DEBUG
|
45
|
-
end
|
46
|
-
----
|
47
|
-
|
48
34
|
=== Search document
|
49
35
|
|
50
36
|
Reference format is `OMG + {ACRONYM} + {VERSION}`
|
@@ -54,6 +40,9 @@ Reference format is `OMG + {ACRONYM} + {VERSION}`
|
|
54
40
|
|
55
41
|
[source,ruby]
|
56
42
|
----
|
43
|
+
require 'relaton_omg'
|
44
|
+
=> true
|
45
|
+
|
57
46
|
item = RelatonOmg::OmgBibliography.get 'OMG AMI4CCM 1.0'
|
58
47
|
[relaton-omg] (OMG AMI4CCM 1.0) Fetching from www.omg.org ...
|
59
48
|
[relaton-omg] (OMG AMI4CCM 1.0) Found: `AMI4CCM 1.0`
|
@@ -72,7 +61,7 @@ RelatonOmg::OmgBibliography.get 'OMG 1111'
|
|
72
61
|
[source,ruby]
|
73
62
|
----
|
74
63
|
item.to_xml
|
75
|
-
=> "<bibitem id="AMI4CCM1.0" schema-version="v1.2.
|
64
|
+
=> "<bibitem id="AMI4CCM1.0" schema-version="v1.2.9">
|
76
65
|
<fetched>2022-12-05</fetched>
|
77
66
|
<title type="main" format="text/plain" language="en" script="Latn">Asynchronous Method Invocation for CCM</title>
|
78
67
|
<uri type="src">https://www.omg.org/spec/AMI4CCM/1.0/About-AMI4CCM</uri>
|
@@ -106,7 +95,7 @@ item = RelatonOmg::OmgBibliographicItem.from_xml 'spec/fixtures/omg_ami4ccm_1_0.
|
|
106
95
|
[source,ruby]
|
107
96
|
----
|
108
97
|
hash = YAML.load_file 'spec/fixtures/omg_ami4ccm_1_0.yaml'
|
109
|
-
=> {"schema-version"=>"v1.2.
|
98
|
+
=> {"schema-version"=>"v1.2.9",
|
110
99
|
"id"=>"AMI4CCM1.0",
|
111
100
|
...
|
112
101
|
|
@@ -123,6 +112,10 @@ item = RelatonOmg::OmgBibliographicItem.from_yaml 'spec/fixtures/omg_ami4ccm_1_0
|
|
123
112
|
...
|
124
113
|
----
|
125
114
|
|
115
|
+
=== Logging
|
116
|
+
|
117
|
+
RelatonOmg uses the relaton-logger gem for logging. By default, it logs to STDOUT. To change the log levels and add other loggers, read the https://github.com/relaton/relaton-logger#usage[relaton-logger] documentation.
|
118
|
+
|
126
119
|
== Contributing
|
127
120
|
|
128
121
|
Bug reports and pull requests are welcome on GitHub at https://github.com/relaton/relaton-ietf.
|
data/grammars/basicdoc.rng
CHANGED
@@ -7,7 +7,7 @@ module RelatonOmg
|
|
7
7
|
# @param code [String] the OMG standard reference
|
8
8
|
# @return [RelatonOmg::OmgBibliographicItem]
|
9
9
|
def search(text)
|
10
|
-
|
10
|
+
Scraper.scrape_page text
|
11
11
|
end
|
12
12
|
|
13
13
|
# @param code [String] the OMG standard reference
|
@@ -15,12 +15,12 @@ module RelatonOmg
|
|
15
15
|
# @param opts [Hash] options
|
16
16
|
# @return [RelatonOmg::OmgBibliographicItem]
|
17
17
|
def get(code, _year = nil, _opts = {})
|
18
|
-
Util.
|
18
|
+
Util.info "Fetching from www.omg.org ...", key: code
|
19
19
|
result = search code
|
20
20
|
if result
|
21
|
-
Util.
|
21
|
+
Util.info "Found: `#{result.docidentifier.first.id}`", key: code
|
22
22
|
else
|
23
|
-
Util.
|
23
|
+
Util.info "Not found.", key: code
|
24
24
|
end
|
25
25
|
result
|
26
26
|
end
|
@@ -0,0 +1,136 @@
|
|
1
|
+
require "nokogiri"
|
2
|
+
|
3
|
+
module RelatonOmg
|
4
|
+
class Scraper
|
5
|
+
URL_PATTERN = "https://www.omg.org/spec/".freeze
|
6
|
+
|
7
|
+
def initialize(acronym, version = nil, spec = nil)
|
8
|
+
@acronym = acronym
|
9
|
+
@version = version
|
10
|
+
@spec = spec
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.scrape_page(ref)
|
14
|
+
%r{^OMG (?<acronym>[^\s]+)(?:[\s/](?<version>[\d.]+(?:\sbeta(?:\s\d)?)?))?(?:[\s/](?<spec>\w+))?$} =~ ref
|
15
|
+
return unless acronym
|
16
|
+
|
17
|
+
scraper = new(acronym, version, spec)
|
18
|
+
doc = scraper.get_doc
|
19
|
+
return if doc.nil? || scraper.fetch_link.empty?
|
20
|
+
|
21
|
+
OmgBibliographicItem.new(**scraper.item)
|
22
|
+
end
|
23
|
+
|
24
|
+
def get_doc
|
25
|
+
@url = "#{URL_PATTERN}#{@acronym}/"
|
26
|
+
@url += @version.gsub(' ', '/') if @version
|
27
|
+
@doc = Nokogiri::HTML OpenURI.open_uri(@url, open_timeout: 10)
|
28
|
+
rescue OpenURI::HTTPError, URI::InvalidURIError, Net::OpenTimeout => e
|
29
|
+
return if e.is_a?(URI::InvalidURIError) || e.io.status[0] == "404"
|
30
|
+
|
31
|
+
raise RelatonBib::RequestError, "Unable acces #{@url} (#{e.io.status.join(' ')})"
|
32
|
+
end
|
33
|
+
|
34
|
+
def item
|
35
|
+
{
|
36
|
+
id: fetch_id,
|
37
|
+
fetched: Date.today.to_s,
|
38
|
+
docid: fetch_docid,
|
39
|
+
title: fetch_title,
|
40
|
+
abstract: fetch_abstract,
|
41
|
+
version: fetch_version,
|
42
|
+
date: fetch_date,
|
43
|
+
docstatus: fetch_status,
|
44
|
+
link: fetch_link,
|
45
|
+
relation: fetch_relation,
|
46
|
+
keyword: fetch_keyword,
|
47
|
+
license: fetch_license,
|
48
|
+
}
|
49
|
+
end
|
50
|
+
|
51
|
+
def fetch_id
|
52
|
+
"#{@acronym}#{doc_version}#{@spec}"
|
53
|
+
end
|
54
|
+
|
55
|
+
def fetch_title
|
56
|
+
content = @doc.at('//dt[.="Title:"]/following-sibling::dd').text
|
57
|
+
content += ": #{@spec}" if @spec
|
58
|
+
title = RelatonBib::FormattedString.new content: content, language: "en", script: "Latn"
|
59
|
+
[RelatonBib::TypedTitleString.new(type: "main", title: title)]
|
60
|
+
end
|
61
|
+
|
62
|
+
def fetch_docid
|
63
|
+
id = [@acronym]
|
64
|
+
id << doc_version if doc_version
|
65
|
+
id << @spec if @spec
|
66
|
+
[RelatonBib::DocumentIdentifier.new(id: id.join(" "), type: "OMG", primary: true)]
|
67
|
+
end
|
68
|
+
|
69
|
+
def fetch_abstract
|
70
|
+
content = @doc.at('//section[@id="document-metadata"]/div/div/p').text
|
71
|
+
[{ content: content, language: "en", script: "Latn" }]
|
72
|
+
end
|
73
|
+
|
74
|
+
def fetch_version
|
75
|
+
[RelatonBib::BibliographicItem::Version.new(pub_date, doc_version)]
|
76
|
+
end
|
77
|
+
|
78
|
+
def doc_version
|
79
|
+
@doc_version ||= @doc.at('//dt[.="Version:"]/following-sibling::dd/p/span').text
|
80
|
+
end
|
81
|
+
|
82
|
+
def fetch_date
|
83
|
+
[type: "published", on: pub_date.to_s]
|
84
|
+
end
|
85
|
+
|
86
|
+
def pub_date
|
87
|
+
Date.parse @doc.at('//dt[.="Publication Date:"]/following-sibling::dd').text.strip
|
88
|
+
end
|
89
|
+
|
90
|
+
def fetch_status
|
91
|
+
status = @doc.at('//dt[.="Document Status:"]/following-sibling::dd')
|
92
|
+
stage = status.text.strip.match(/\w+/).to_s
|
93
|
+
RelatonBib::DocumentStatus.new(stage: stage)
|
94
|
+
end
|
95
|
+
|
96
|
+
def fetch_link
|
97
|
+
return @link if @link
|
98
|
+
|
99
|
+
@links = []
|
100
|
+
if @spec
|
101
|
+
a = @doc.at("//a[@href='#{@url}/#{@spec}/PDF']")
|
102
|
+
@links << { type: "src", content: a[:href] } if a
|
103
|
+
else
|
104
|
+
a = @doc.at('//dt[.="This Document:"]/following-sibling::dd/a')
|
105
|
+
@links << { type: "src", content: a[:href] } if a
|
106
|
+
pdf = @doc.at('//a[@class="download-document"]')
|
107
|
+
@links << { type: "pdf", content: pdf[:href] } if pdf
|
108
|
+
end
|
109
|
+
@links
|
110
|
+
end
|
111
|
+
|
112
|
+
def fetch_relation
|
113
|
+
v = @doc.xpath('//h2[.="History"]/following-sibling::section/div/table/tbody/tr')
|
114
|
+
v.reduce([]) do |mem, row|
|
115
|
+
ver = row.at("td").text
|
116
|
+
unless ver == doc_version
|
117
|
+
acronym = row.at("td[3]/a")[:href].split("/")[4]
|
118
|
+
fref = RelatonBib::FormattedRef.new content: "OMG #{acronym} #{ver}"
|
119
|
+
bibitem = OmgBibliographicItem.new formattedref: fref
|
120
|
+
mem << { type: "obsoletes", bibitem: bibitem }
|
121
|
+
end
|
122
|
+
mem
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def fetch_keyword
|
127
|
+
@doc.xpath('//dt[.="Categories:"]/following-sibling::dd/ul/li/a/em').map &:text
|
128
|
+
end
|
129
|
+
|
130
|
+
def fetch_license
|
131
|
+
@doc.xpath(
|
132
|
+
'//dt/span/a[contains(., "IPR Mode")]/../../following-sibling::dd/span',
|
133
|
+
).map { |l| l.text.match(/[\w\s-]+/).to_s.strip }
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
data/lib/relaton_omg/util.rb
CHANGED
data/lib/relaton_omg/version.rb
CHANGED
data/lib/relaton_omg.rb
CHANGED
@@ -1,9 +1,8 @@
|
|
1
1
|
require "nokogiri"
|
2
2
|
require "relaton_bib"
|
3
3
|
require "relaton_omg/version"
|
4
|
-
require "relaton_omg/config"
|
5
4
|
require "relaton_omg/util"
|
6
|
-
require "relaton_omg/
|
5
|
+
require "relaton_omg/scraper"
|
7
6
|
require "relaton_omg/omg_bibliography"
|
8
7
|
require "relaton_omg/omg_bibliographic_item"
|
9
8
|
require "relaton_omg/xml_parser"
|
data/relaton_omg.gemspec
CHANGED
@@ -26,5 +26,6 @@ Gem::Specification.new do |spec|
|
|
26
26
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
27
27
|
spec.require_paths = ["lib"]
|
28
28
|
|
29
|
-
spec.add_dependency "
|
29
|
+
spec.add_dependency "base64"
|
30
|
+
spec.add_dependency "relaton-bib", "~> 1.19.0"
|
30
31
|
end
|
metadata
CHANGED
@@ -1,29 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-omg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.19.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-07-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: base64
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: relaton-bib
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
16
30
|
requirements:
|
17
31
|
- - "~>"
|
18
32
|
- !ruby/object:Gem::Version
|
19
|
-
version: 1.
|
33
|
+
version: 1.19.0
|
20
34
|
type: :runtime
|
21
35
|
prerelease: false
|
22
36
|
version_requirements: !ruby/object:Gem::Requirement
|
23
37
|
requirements:
|
24
38
|
- - "~>"
|
25
39
|
- !ruby/object:Gem::Version
|
26
|
-
version: 1.
|
40
|
+
version: 1.19.0
|
27
41
|
description: 'RelatonOmg: retrieve OMG Standards for bibliographic using the IsoBibliographicItem
|
28
42
|
model'
|
29
43
|
email:
|
@@ -51,12 +65,11 @@ files:
|
|
51
65
|
- grammars/relaton-omg-compile.rng
|
52
66
|
- grammars/relaton-omg.rng
|
53
67
|
- lib/relaton_omg.rb
|
54
|
-
- lib/relaton_omg/config.rb
|
55
68
|
- lib/relaton_omg/hash_converter.rb
|
56
69
|
- lib/relaton_omg/omg_bibliographic_item.rb
|
57
70
|
- lib/relaton_omg/omg_bibliography.rb
|
58
71
|
- lib/relaton_omg/processor.rb
|
59
|
-
- lib/relaton_omg/
|
72
|
+
- lib/relaton_omg/scraper.rb
|
60
73
|
- lib/relaton_omg/util.rb
|
61
74
|
- lib/relaton_omg/version.rb
|
62
75
|
- lib/relaton_omg/xml_parser.rb
|
@@ -82,7 +95,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
82
95
|
- !ruby/object:Gem::Version
|
83
96
|
version: '0'
|
84
97
|
requirements: []
|
85
|
-
rubygems_version: 3.3.
|
98
|
+
rubygems_version: 3.3.27
|
86
99
|
signing_key:
|
87
100
|
specification_version: 4
|
88
101
|
summary: 'RelatonOmg: retrieve OMG Standards for bibliographic using the IsoBibliographicItem
|
data/lib/relaton_omg/config.rb
DELETED
data/lib/relaton_omg/scrapper.rb
DELETED
@@ -1,121 +0,0 @@
|
|
1
|
-
require "nokogiri"
|
2
|
-
|
3
|
-
module RelatonOmg
|
4
|
-
module Scrapper
|
5
|
-
URL_PATTERN = "https://www.omg.org/spec/".freeze
|
6
|
-
|
7
|
-
class << self
|
8
|
-
def scrape_page(ref) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
9
|
-
%r{OMG (?<acronym>[^\s]+)\s?(?<version>.*)} =~ ref
|
10
|
-
return unless acronym
|
11
|
-
|
12
|
-
url = URL_PATTERN + acronym
|
13
|
-
url += "/#{version}" if version
|
14
|
-
doc = Nokogiri::HTML OpenURI.open_uri(url, open_timeout: 10)
|
15
|
-
OmgBibliographicItem.new(**item(doc, acronym))
|
16
|
-
rescue OpenURI::HTTPError, URI::InvalidURIError, Net::OpenTimeout => e
|
17
|
-
return if e.is_a?(URI::InvalidURIError) || e.io.status[0] == "404"
|
18
|
-
|
19
|
-
raise RelatonBib::RequestError, "Unable acces #{url} (#{e.io.status.join(' ')})"
|
20
|
-
end
|
21
|
-
|
22
|
-
private
|
23
|
-
|
24
|
-
def item(doc, acronym) # rubocop:disable Metrics/MethodLength
|
25
|
-
{
|
26
|
-
id: fetch_id(doc, acronym),
|
27
|
-
fetched: Date.today.to_s,
|
28
|
-
docid: fetch_docid(doc, acronym),
|
29
|
-
title: fetch_title(doc),
|
30
|
-
abstract: fetch_abstract(doc),
|
31
|
-
version: fetch_version(doc),
|
32
|
-
date: fetch_date(doc),
|
33
|
-
docstatus: fetch_status(doc),
|
34
|
-
link: fetch_link(doc),
|
35
|
-
relation: fetch_relation(doc),
|
36
|
-
keyword: fetch_keyword(doc),
|
37
|
-
license: fetch_license(doc),
|
38
|
-
}
|
39
|
-
end
|
40
|
-
|
41
|
-
def fetch_id(doc, acronym)
|
42
|
-
acronym + version(doc)
|
43
|
-
end
|
44
|
-
|
45
|
-
def fetch_title(doc)
|
46
|
-
content = doc.at('//dt[.="Title:"]/following-sibling::dd').text
|
47
|
-
title = RelatonBib::FormattedString.new content: content, language: "en", script: "Latn"
|
48
|
-
[RelatonBib::TypedTitleString.new(type: "main", title: title)]
|
49
|
-
end
|
50
|
-
|
51
|
-
def fetch_docid(doc, acronym)
|
52
|
-
id = [acronym]
|
53
|
-
if (ver = version(doc))
|
54
|
-
id << ver
|
55
|
-
end
|
56
|
-
[RelatonBib::DocumentIdentifier.new(id: id.join(" "), type: "OMG", primary: true)]
|
57
|
-
end
|
58
|
-
|
59
|
-
def fetch_abstract(doc)
|
60
|
-
content = doc.at('//section[@id="document-metadata"]/div/div/p').text
|
61
|
-
[{ content: content, language: "en", script: "Latn" }]
|
62
|
-
end
|
63
|
-
|
64
|
-
def fetch_version(doc)
|
65
|
-
[RelatonBib::BibliographicItem::Version.new(pub_date(doc), version(doc))]
|
66
|
-
end
|
67
|
-
|
68
|
-
def version(doc)
|
69
|
-
doc.at('//dt[.="Version:"]/following-sibling::dd/p/span').text
|
70
|
-
end
|
71
|
-
|
72
|
-
def fetch_date(doc)
|
73
|
-
[type: "published", on: pub_date(doc).to_s]
|
74
|
-
end
|
75
|
-
|
76
|
-
def pub_date(doc)
|
77
|
-
Date.parse doc.at('//dt[.="Publication Date:"]/following-sibling::dd').text.strip
|
78
|
-
end
|
79
|
-
|
80
|
-
def fetch_status(doc)
|
81
|
-
status = doc.at('//dt[.="Document Status:"]/following-sibling::dd')
|
82
|
-
stage = status.text.strip.match(/\w+/).to_s
|
83
|
-
RelatonBib::DocumentStatus.new(stage: stage)
|
84
|
-
end
|
85
|
-
|
86
|
-
def fetch_link(doc)
|
87
|
-
links = []
|
88
|
-
a = doc.at('//dt[.="This Document:"]/following-sibling::dd/a')
|
89
|
-
links << { type: "src", content: a[:href] } if a
|
90
|
-
pdf = doc.at('//a[@class="download-document"]')
|
91
|
-
links << { type: "pdf", content: pdf[:href] } if pdf
|
92
|
-
links
|
93
|
-
end
|
94
|
-
|
95
|
-
def fetch_relation(doc) # rubocop:disable Metrics/MethodLength
|
96
|
-
current_version = version(doc)
|
97
|
-
v = doc.xpath('//h2[.="History"]/following-sibling::section/div/table/tbody/tr')
|
98
|
-
v.reduce([]) do |mem, row|
|
99
|
-
ver = row.at("td").text
|
100
|
-
unless ver == current_version
|
101
|
-
acronym = row.at("td[3]/a")[:href].split("/")[4]
|
102
|
-
fref = RelatonBib::FormattedRef.new content: "OMG #{acronym} #{ver}"
|
103
|
-
bibitem = OmgBibliographicItem.new formattedref: fref
|
104
|
-
mem << { type: "obsoletes", bibitem: bibitem }
|
105
|
-
end
|
106
|
-
mem
|
107
|
-
end
|
108
|
-
end
|
109
|
-
|
110
|
-
def fetch_keyword(doc)
|
111
|
-
doc.xpath('//dt[.="Categories:"]/following-sibling::dd/ul/li/a/em').map &:text
|
112
|
-
end
|
113
|
-
|
114
|
-
def fetch_license(doc)
|
115
|
-
doc.xpath(
|
116
|
-
'//dt/span/a[contains(., "IPR Mode")]/../../following-sibling::dd/span',
|
117
|
-
).map { |l| l.text.match(/[\w\s-]+/).to_s.strip }
|
118
|
-
end
|
119
|
-
end
|
120
|
-
end
|
121
|
-
end
|