relaton-omg 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,6 @@
1
+ module RelatonOmg
2
+ class HashConverter < RelatonBib::HashConverter
3
+ class << self
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,55 @@
1
+ module RelatonOmg
2
+ class OmgBibliographicItem < RelatonBib::BibliographicItem
3
+ # @return [String, NilClass]
4
+ attr_reader :doctype
5
+
6
+ # @param doctype [String]
7
+ # @param keyword [Array<String>]
8
+ def initialize(**args)
9
+ @doctype = args.delete :doctype
10
+ # @keyword = args.delete(:keyword) || []
11
+ super
12
+ end
13
+
14
+ class << self
15
+ # @param file [String] path to YAML file
16
+ # @return [RelatonOmg::OmgBibliographicItem]
17
+ def from_yaml(file)
18
+ from_hash YAML.load_file(file)
19
+ end
20
+
21
+ # @param hash [Hash]
22
+ # @return [RelatonOmg::OmgBibliographicItem]
23
+ def from_hash(hash)
24
+ new RelatonOmg::HashConverter.hash_to_bib(hash)
25
+ end
26
+
27
+ # @param file [String] path to XML file
28
+ # @return [RelatonOmg::OmgBibliographicItem]
29
+ def from_xml(file)
30
+ XMLParser.from_xml File.read file, encoding: "UTF-8"
31
+ end
32
+ end
33
+
34
+ # @param builder
35
+ # @param opts [Hash]
36
+ # @option opts [Symbol, NilClass] :date_format (:short), :full
37
+ def to_xml(builder = nil, **opts)
38
+ opts[:date_format] ||= :short
39
+ super builder, **opts do |b|
40
+ # if opts[:bibdata]
41
+ # b.ext do
42
+ # b.doctype doctype if doctype
43
+ # end
44
+ # end
45
+ end
46
+ end
47
+
48
+ # @return [Hash]
49
+ def to_hash
50
+ hash = super
51
+ hash["doctype"] = doctype if doctype
52
+ hash
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal:true
2
+
3
+ module RelatonOmg
4
+ # OMG bibliography module
5
+ module OmgBibliography
6
+ class << self
7
+ # @param code [String] the OMG standard reference
8
+ # @return [RelatonOmg::OmgBibliographicItem]
9
+ def search(text)
10
+ Scrapper.scrape_page text
11
+ end
12
+
13
+ # @param code [String] the OMG standard reference
14
+ # @param year [String] the year the standard was published (optional)
15
+ # @param opts [Hash] options
16
+ # @return [RelatonOmg::OmgBibliographicItem]
17
+ def get(code, _year = nil, _opts = {})
18
+ search code
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,40 @@
1
+ require "relaton/processor"
2
+ require "relaton_omg/xml_parser"
3
+
4
+ module RelatonOmg
5
+ class Processor < Relaton::Processor
6
+ def initialize
7
+ @short = :relaton_omg
8
+ @prefix = "OMG"
9
+ @defaultprefix = /^OMG /
10
+ @idtype = "OMG"
11
+ end
12
+
13
+ # @param code [String]
14
+ # @param date [String, NilClass] year
15
+ # @param opts [Hash]
16
+ # @return [RelatonIetf::IetfBibliographicItem]
17
+ def get(code, date, opts)
18
+ ::RelatonOmg::OmgBibliography.get(code, date, opts)
19
+ end
20
+
21
+ # @param xml [String]
22
+ # @return [RelatonIetf::IetfBibliographicItem]
23
+ def from_xml(xml)
24
+ ::RelatonOmg::XMLParser.from_xml xml
25
+ end
26
+
27
+ # @param hash [Hash]
28
+ # @return [RelatonIetf::IetfBibliographicItem]
29
+ def hash_to_bib(hash)
30
+ # item_hash = ::RelatonOmg::HashConverter.hash_to_bib(hash)
31
+ ::RelatonOmg::OmgBibliographicItem.from_hash hash
32
+ end
33
+
34
+ # Returns hash of XML grammar
35
+ # @return [String]
36
+ def grammar_hash
37
+ @grammar_hash ||= ::RelatonOmg.grammar_hash
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,124 @@
1
+ require "nokogiri"
2
+
3
+ module RelatonOmg
4
+ module Scrapper
5
+ URL_PATTERN = "https://www.omg.org/spec/"
6
+
7
+ class << self
8
+ def scrape_page(ref)
9
+ %r{OMG (?<acronym>[^\s]+)\s?(?<version>.*)} =~ ref
10
+ return unless acronym
11
+
12
+ url = URL_PATTERN + acronym
13
+ url += "/" + version if version
14
+ doc = Nokogiri::HTML open(URI(url))
15
+ OmgBibliographicItem.new item(doc, acronym)
16
+ rescue OpenURI::HTTPError => e
17
+ if e.io.status[0] == "404"
18
+ warn %{[relaton-omg] no document found for "#{ref}" reference.}
19
+ return
20
+ end
21
+
22
+ raise RelatonBib::RequestError, "Unable acces #{url} (#{e.io.status.join(" ")}"
23
+ end
24
+
25
+ private
26
+
27
+ def item(doc, acronym)
28
+ {
29
+ id: fetch_id(doc, acronym),
30
+ fetched: Date.today.to_s,
31
+ docid: fetch_docid(doc, acronym),
32
+ title: fetch_title(doc),
33
+ abstract: fetch_abstract(doc),
34
+ version: fetch_version(doc),
35
+ date: fetch_date(doc),
36
+ docstatus: fetch_status(doc),
37
+ link: fetch_link(doc),
38
+ relation: fetch_relation(doc),
39
+ keyword: fetch_keyword(doc),
40
+ license: fetch_license(doc)
41
+ }
42
+ end
43
+
44
+ def fetch_id(doc, acronym)
45
+ acronym + version(doc)
46
+ end
47
+
48
+ def fetch_title(doc)
49
+ content = doc.at('//dt[.="Title:"]/following-sibling::dd').text
50
+ title = RelatonBib::FormattedString.new content: content, language: "en", script: "Latn"
51
+ [RelatonBib::TypedTitleString.new(type: "main", title: title)]
52
+ end
53
+
54
+ def fetch_docid(doc, acronym)
55
+ id = [acronym]
56
+ if (ver = version(doc))
57
+ id << ver
58
+ end
59
+ [RelatonBib::DocumentIdentifier.new(id: id.join(" "), type: "OMG")]
60
+ end
61
+
62
+ def fetch_abstract(doc)
63
+ content = doc.at('//section[@id="document-metadata"]/p').text
64
+ [{ content: content, language: "en", script: "Latn" }]
65
+ end
66
+
67
+ def fetch_version(doc)
68
+ RelatonBib::BibliographicItem::Version.new pub_date(doc), [version(doc)]
69
+ end
70
+
71
+ def version(doc)
72
+ doc.at('//dt[.="Version:"]/following-sibling::dd/p/text()').text.strip
73
+ end
74
+
75
+ def fetch_date(doc)
76
+ [type: "published", on: pub_date(doc).to_s]
77
+ end
78
+
79
+ def pub_date(doc)
80
+ Date.parse doc.at('//dt[.="Publication Date:"]/following-sibling::dd').text.strip
81
+ end
82
+
83
+ def fetch_status(doc)
84
+ status = doc.at('//dt[.="Document Status:"]/following-sibling::dd')
85
+ stage = status.text.strip.match(/\w+/).to_s
86
+ RelatonBib::DocumentStatus.new(stage: stage)
87
+ end
88
+
89
+ def fetch_link(doc)
90
+ links = []
91
+ a = doc.at('//dt[.="This Document:"]/following-sibling::dd/a')
92
+ links << { type: "src", content: a[:href] } if a
93
+ pdf = doc.at('//a[@class="download-document"]')
94
+ links << { type: "pdf", content: pdf[:href] } if pdf
95
+ links
96
+ end
97
+
98
+ def fetch_relation(doc)
99
+ current_version = version(doc)
100
+ v = doc.xpath('//h2[.="History"]/following-sibling::section/div/table/tbody/tr')
101
+ v.reduce([]) do |mem, row|
102
+ ver = row.at("td").text
103
+ unless ver == current_version
104
+ acronym = row.at('td[3]/a')[:href].split("/")[4]
105
+ fref = RelatonBib::FormattedRef.new content: "OMG #{acronym} #{ver}"
106
+ bibitem = OmgBibliographicItem.new formattedref: fref
107
+ mem << { type: "obsoletes", bibitem: bibitem }
108
+ end
109
+ mem
110
+ end
111
+ end
112
+
113
+ def fetch_keyword(doc)
114
+ doc.xpath('//dt[.="Categories:"]/following-sibling::dd/ul/li/a/em').map &:text
115
+ end
116
+
117
+ def fetch_license(doc)
118
+ doc.xpath(
119
+ '//dt/span/a[contains(., "IPR Mode")]/../../following-sibling::dd/span'
120
+ ).map { |l| l.text.match(/[\w\s-]+/).to_s.strip }
121
+ end
122
+ end
123
+ end
124
+ end
@@ -0,0 +1,3 @@
1
+ module RelatonOmg
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,25 @@
1
+ require "nokogiri"
2
+
3
+ module RelatonOmg
4
+ class XMLParser < RelatonBib::XMLParser
5
+ class << self
6
+ def from_xml(xml)
7
+ doc = Nokogiri::XML(xml)
8
+ doc.remove_namespaces!
9
+ ietfitem = doc.at("/bibitem|/bibdata")
10
+ RelatonOmg::OmgBibliographicItem.new(item_data(ietfitem))
11
+ end
12
+
13
+ private
14
+
15
+ def item_data(ietfitem)
16
+ data = super
17
+ # ext = ietfitem.at "./ext"
18
+ # return data unless ext
19
+
20
+ # data[:doctype] = ext.at("./doctype")&.text
21
+ data
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,19 @@
1
+ require "nokogiri"
2
+ require "relaton_bib"
3
+ require "relaton_omg/version"
4
+ require "relaton_omg/scrapper"
5
+ require "relaton_omg/omg_bibliography"
6
+ require "relaton_omg/omg_bibliographic_item"
7
+ require "relaton_omg/xml_parser"
8
+ require "relaton_omg/hash_converter"
9
+
10
+ module RelatonOmg
11
+ # Returns hash of XML reammar
12
+ # @return [String]
13
+ def self.grammar_hash
14
+ gem_path = File.expand_path "..", __dir__
15
+ grammars_path = File.join gem_path, "grammars", "*"
16
+ grammars = Dir[grammars_path].sort.map { |gp| File.read gp }.join
17
+ Digest::MD5.hexdigest grammars
18
+ end
19
+ end
@@ -0,0 +1,41 @@
1
+ require_relative 'lib/relaton_omg/version'
2
+
3
+ Gem::Specification.new do |spec|
4
+ spec.name = "relaton-omg"
5
+ spec.version = RelatonOmg::VERSION
6
+ spec.authors = ["Ribose Inc."]
7
+ spec.email = ["open.source@ribose.com"]
8
+
9
+ spec.summary = "RelatonOmg: retrieve OMG Standards for bibliographic use"\
10
+ "using the IsoBibliographicItem model"
11
+ spec.description = "RelatonOmg: retrieve OMG Standards for bibliographic use"\
12
+ "using the IsoBibliographicItem model"
13
+ spec.homepage = "https://github.com/relaton/relaton-ogn"
14
+ spec.license = "BSD-2-Clause"
15
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
16
+
17
+ spec.metadata["homepage_uri"] = spec.homepage
18
+ spec.metadata["source_code_uri"] = "https://github.com/relaton/relaton-ogn"
19
+
20
+ # Specify which files should be added to the gem when it is released.
21
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
22
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
23
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
24
+ end
25
+ spec.bindir = "exe"
26
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
27
+ spec.require_paths = ["lib"]
28
+
29
+ spec.add_development_dependency "byebug"
30
+ spec.add_development_dependency "debase"
31
+ spec.add_development_dependency "equivalent-xml", "~> 0.6"
32
+ spec.add_development_dependency "pry-byebug"
33
+ spec.add_development_dependency "rake", "~> 10.0"
34
+ spec.add_development_dependency "ruby-debug-ide"
35
+ spec.add_development_dependency "ruby-jing"
36
+ spec.add_development_dependency "simplecov"
37
+ spec.add_development_dependency "vcr"
38
+ spec.add_development_dependency "webmock"
39
+
40
+ spec.add_dependency "relaton-bib", "~> 0.7.0"
41
+ end
metadata ADDED
@@ -0,0 +1,224 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: relaton-omg
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Ribose Inc.
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2020-02-17 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: byebug
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: debase
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: equivalent-xml
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0.6'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0.6'
55
+ - !ruby/object:Gem::Dependency
56
+ name: pry-byebug
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '10.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '10.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: ruby-debug-ide
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: ruby-jing
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: simplecov
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: vcr
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: webmock
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ - !ruby/object:Gem::Dependency
154
+ name: relaton-bib
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - "~>"
158
+ - !ruby/object:Gem::Version
159
+ version: 0.7.0
160
+ type: :runtime
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - "~>"
165
+ - !ruby/object:Gem::Version
166
+ version: 0.7.0
167
+ description: 'RelatonOmg: retrieve OMG Standards for bibliographic useusing the IsoBibliographicItem
168
+ model'
169
+ email:
170
+ - open.source@ribose.com
171
+ executables: []
172
+ extensions: []
173
+ extra_rdoc_files: []
174
+ files:
175
+ - ".github/workflows/macos.yml"
176
+ - ".github/workflows/ubuntu.yml"
177
+ - ".github/workflows/windows.yml"
178
+ - ".gitignore"
179
+ - ".rspec"
180
+ - ".rubocop.yml"
181
+ - Gemfile
182
+ - LICENSE.txt
183
+ - README.adoc
184
+ - Rakefile
185
+ - bin/console
186
+ - bin/rspec
187
+ - bin/setup
188
+ - grammars/biblio.rng
189
+ - lib/relaton_omg.rb
190
+ - lib/relaton_omg/hash_converter.rb
191
+ - lib/relaton_omg/omg_bibliographic_item.rb
192
+ - lib/relaton_omg/omg_bibliography.rb
193
+ - lib/relaton_omg/processor.rb
194
+ - lib/relaton_omg/scrapper.rb
195
+ - lib/relaton_omg/version.rb
196
+ - lib/relaton_omg/xml_parser.rb
197
+ - relaton_omg.gemspec
198
+ homepage: https://github.com/relaton/relaton-ogn
199
+ licenses:
200
+ - BSD-2-Clause
201
+ metadata:
202
+ homepage_uri: https://github.com/relaton/relaton-ogn
203
+ source_code_uri: https://github.com/relaton/relaton-ogn
204
+ post_install_message:
205
+ rdoc_options: []
206
+ require_paths:
207
+ - lib
208
+ required_ruby_version: !ruby/object:Gem::Requirement
209
+ requirements:
210
+ - - ">="
211
+ - !ruby/object:Gem::Version
212
+ version: 2.4.0
213
+ required_rubygems_version: !ruby/object:Gem::Requirement
214
+ requirements:
215
+ - - ">="
216
+ - !ruby/object:Gem::Version
217
+ version: '0'
218
+ requirements: []
219
+ rubygems_version: 3.0.6
220
+ signing_key:
221
+ specification_version: 4
222
+ summary: 'RelatonOmg: retrieve OMG Standards for bibliographic useusing the IsoBibliographicItem
223
+ model'
224
+ test_files: []