relaton-ieee 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,154 @@
1
+ module RelatonIeee
2
+ module Scrapper
3
+ class << self
4
+ # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
5
+
6
+ # papam hit [Hash]
7
+ # @return [RelatonOgc::OrcBibliographicItem]
8
+ def parse_page(hit)
9
+ doc = Nokogiri::HTML Faraday.get(hit["recordURL"]).body
10
+ IeeeBibliographicItem.new(
11
+ fetched: Date.today.to_s,
12
+ title: fetch_title(hit["recordTitle"]),
13
+ docid: fetch_docid(hit["recordTitle"]),
14
+ link: fetch_link(hit["recordURL"]),
15
+ docstatus: fetch_status(doc),
16
+ abstract: fetch_abstract(doc),
17
+ contributor: fetch_contributor(doc),
18
+ language: ["en"],
19
+ script: ["Latn"],
20
+ date: fetch_date(doc),
21
+ committee: fetch_committee(doc),
22
+ )
23
+ end
24
+ # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
25
+
26
+ private
27
+
28
+ # @param title [String]
29
+ # @return [Array<RelatonBib::TypedTitleString>]
30
+ def fetch_title(title)
31
+ [
32
+ RelatonBib::TypedTitleString.new(
33
+ type: "main", content: title, language: "en", script: "Latn",
34
+ ),
35
+ ]
36
+ end
37
+
38
+ # @param title [String]
39
+ # @return [Array<RelatonBib::DocumentIdentifier>]
40
+ def fetch_docid(title)
41
+ /^(?<identifier>\S+)/ =~ title
42
+ [RelatonBib::DocumentIdentifier.new(id: identifier, type: "IEEE")]
43
+ end
44
+
45
+ # @param url [String]
46
+ # @return [Array>RelatonBib::TypedUri>]
47
+ def fetch_link(url)
48
+ [RelatonBib::TypedUri.new(type: "src", content: url)]
49
+ end
50
+
51
+ # @param doc [Nokogiri::HTML::Document]
52
+ # @return [RelatonBib::DocumentStatus, NilClass]
53
+ def fetch_status(doc)
54
+ stage = doc.at("//td[.='Status']/following-sibling::td/div")
55
+ return unless stage
56
+
57
+ RelatonBib::DocumentStatus.new(stage: stage.text)
58
+ end
59
+
60
+ # @param identifier [String]
61
+ # @return [String]
62
+ # def fetch_edition(identifier)
63
+ # %r{(?<=r)(?<edition>\d+)$} =~ identifier
64
+ # edition
65
+ # end
66
+
67
+ # @param doc [Nokogiri::HTML::Document]
68
+ # @return [Array<RelatonBib::FormattedString>]
69
+ def fetch_abstract(doc)
70
+ content = doc.at("//div[@class='description']")
71
+ return [] unless content
72
+
73
+ [RelatonBib::FormattedString.new(content: content.text, language: "en",
74
+ script: "Latn")]
75
+ end
76
+
77
+ # @param doc [Nokogiri::HTML::Document]
78
+ # @return [Array<RelatonBib::ContributionInfo>]
79
+ def fetch_contributor(doc)
80
+ name = doc.at(
81
+ "//td[.='IEEE Program Manager']/following-sibling::td/div/a",
82
+ )
83
+ return [] unless name
84
+
85
+ [personn_contrib(name.text)]
86
+ end
87
+
88
+ # @param name [String]
89
+ # @return [RelatonBib::ContributionInfo]
90
+ def personn_contrib(name)
91
+ fname = RelatonBib::FullName.new(
92
+ completename: RelatonBib::LocalizedString.new(name),
93
+ )
94
+ entity = RelatonBib::Person.new(name: fname)
95
+ RelatonBib::ContributionInfo.new(
96
+ entity: entity, role: [type: "author"],
97
+ )
98
+ end
99
+
100
+ # @param name [String]
101
+ # @return [RelatonBib::ContributionInfo]
102
+ # def org_contrib(name)
103
+ # entity = RelatonBib::Organization.new(name: name)
104
+ # RelatonBib::ContributionInfo.new(
105
+ # entity: entity, role: [type: "publisher"],
106
+ # )
107
+ # end
108
+
109
+ # rubocop:disable Metrics/MethodLength
110
+
111
+ # @param date [Nokogiri::HTML::Document]
112
+ # @return [Array<RelatonBib::BibliographicDate>]
113
+ def fetch_date(doc)
114
+ dates = []
115
+ issued = doc.at "//td[.='Board Approval']/following-sibling::td/div"
116
+ if issued
117
+ dates << RelatonBib::BibliographicDate.new(type: "issued",
118
+ on: issued.text)
119
+ end
120
+ published = doc.at("//td[.='History']/following-sibling::td/div")&.
121
+ text&.match(/(?<=Published Date:)[\d-]+/)&.to_s
122
+ if published
123
+ dates << RelatonBib::BibliographicDate.new(type: "published",
124
+ on: published)
125
+ end
126
+ dates
127
+ end
128
+
129
+ # rubocop:disable Metrics/AbcSize
130
+
131
+ # @param doc [Nokogiri::HTML::Document]
132
+ # @return [Array<RelatonIeee::Committee>]
133
+ def fetch_committee(doc)
134
+ committees = []
135
+ sponsor = doc.at "//td[.='Sponsor Committee']/following-sibling::td/div"
136
+ if sponsor
137
+ committees << Committee.new(type: "sponsor", name: sponsor.text)
138
+ end
139
+ working = doc.at "//td[.='Working Group']/following-sibling::td/div"
140
+ chair = doc.at "//td[.='Working Group Chair']/following-sibling::td/div"
141
+ if working
142
+ committees << Committee.new(type: "working", name: working.text,
143
+ chair: chair.text)
144
+ end
145
+ society = doc.at "//td[.='Society']/following-sibling::td/div"
146
+ if society
147
+ committees << Committee.new(type: "society", name: society.text)
148
+ end
149
+ committees
150
+ end
151
+ # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
152
+ end
153
+ end
154
+ end
@@ -0,0 +1,3 @@
1
+ module RelatonIeee
2
+ VERSION = "1.1.0".freeze
3
+ end
@@ -0,0 +1,29 @@
1
+ module RelatonIeee
2
+ class XMLParser < RelatonBib::XMLParser
3
+ class << self
4
+ private
5
+
6
+ # Override RelatonBib::XMLParser.item_data method.
7
+ # @param item [Nokogiri::XML::Element]
8
+ # @returtn [Hash]
9
+ def item_data(item)
10
+ data = super
11
+ ext = item.at "./ext"
12
+ return data unless ext
13
+
14
+ data[:committee] = ext.xpath("./committee").map do |c|
15
+ Committee.new(
16
+ type: c[:type], name: c.at("name").text, chair: c.at("chair")&.text,
17
+ )
18
+ end
19
+ data
20
+ end
21
+
22
+ # @param item_hash [Hash]
23
+ # @return [RelatonBib::BibliographicItem]
24
+ def bib_item(item_hash)
25
+ IeeeBibliographicItem.new item_hash
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/relaton_ieee/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "relaton-ieee"
7
+ spec.version = RelatonIeee::VERSION
8
+ spec.authors = ["Ribose Inc."]
9
+ spec.email = ["open.source@ribose.com"]
10
+
11
+ spec.summary = "RelatonIeee: retrieve IEEE Standards for bibliographic "\
12
+ "use using the IeeeBibliographicItem model"
13
+ spec.description = "RelatonIeee: retrieve IEEE Standards for bibliographic "\
14
+ "use using the IeeeBibliographicItem model"
15
+ spec.homepage = "https://github.com/relaton/relaton-ieee"
16
+ spec.license = "BSD-2-Clause"
17
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
18
+
19
+ # spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
20
+
21
+ spec.metadata["homepage_uri"] = spec.homepage
22
+ spec.metadata["source_code_uri"] = spec.homepage
23
+ # spec.metadata["changelog_uri"] = "TODO: Put your gem's CHANGELOG.md URL here."
24
+
25
+ # Specify which files should be added to the gem when it is released.
26
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
27
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
28
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
29
+ end
30
+ spec.bindir = "exe"
31
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
32
+ spec.require_paths = ["lib"]
33
+
34
+ spec.add_development_dependency "debase"
35
+ spec.add_development_dependency "equivalent-xml", "~> 0.6"
36
+ spec.add_development_dependency "pry-byebug"
37
+ spec.add_development_dependency "rake", "~> 10.0"
38
+ # spec.add_development_dependency "rspec", "~> 3.0"
39
+ spec.add_development_dependency "ruby-debug-ide"
40
+ # spec.add_development_dependency "ruby-jing"
41
+ spec.add_development_dependency "simplecov"
42
+ spec.add_development_dependency "vcr"
43
+ spec.add_development_dependency "webmock"
44
+
45
+ spec.add_dependency "faraday", "~> 1.0.0"
46
+ spec.add_dependency "relaton-bib", "~> 1.1.0"
47
+ end
metadata ADDED
@@ -0,0 +1,216 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: relaton-ieee
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Ribose Inc.
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2020-06-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: debase
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: equivalent-xml
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.6'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.6'
41
+ - !ruby/object:Gem::Dependency
42
+ name: pry-byebug
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '10.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '10.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: ruby-debug-ide
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: simplecov
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: vcr
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: webmock
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: faraday
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: 1.0.0
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: 1.0.0
139
+ - !ruby/object:Gem::Dependency
140
+ name: relaton-bib
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: 1.1.0
146
+ type: :runtime
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: 1.1.0
153
+ description: 'RelatonIeee: retrieve IEEE Standards for bibliographic use using the
154
+ IeeeBibliographicItem model'
155
+ email:
156
+ - open.source@ribose.com
157
+ executables: []
158
+ extensions: []
159
+ extra_rdoc_files: []
160
+ files:
161
+ - ".github/workflows/macos.yml"
162
+ - ".github/workflows/ubuntu.yml"
163
+ - ".github/workflows/windows.yml"
164
+ - ".gitignore"
165
+ - ".rspec"
166
+ - ".rubocop.yml"
167
+ - Gemfile
168
+ - LICENSE.txt
169
+ - README.adoc
170
+ - Rakefile
171
+ - bin/console
172
+ - bin/rspec
173
+ - bin/setup
174
+ - grammars/basicdoc.rng
175
+ - grammars/biblio.rng
176
+ - grammars/isodoc.rng
177
+ - grammars/reqt.rng
178
+ - lib/relaton_ieee.rb
179
+ - lib/relaton_ieee/committee.rb
180
+ - lib/relaton_ieee/hash_converter.rb
181
+ - lib/relaton_ieee/hit.rb
182
+ - lib/relaton_ieee/hit_collection.rb
183
+ - lib/relaton_ieee/ieee_bibliographic_item.rb
184
+ - lib/relaton_ieee/ieee_bibliography.rb
185
+ - lib/relaton_ieee/processor.rb
186
+ - lib/relaton_ieee/scrapper.rb
187
+ - lib/relaton_ieee/version.rb
188
+ - lib/relaton_ieee/xml_parser.rb
189
+ - relaton_ieee.gemspec
190
+ homepage: https://github.com/relaton/relaton-ieee
191
+ licenses:
192
+ - BSD-2-Clause
193
+ metadata:
194
+ homepage_uri: https://github.com/relaton/relaton-ieee
195
+ source_code_uri: https://github.com/relaton/relaton-ieee
196
+ post_install_message:
197
+ rdoc_options: []
198
+ require_paths:
199
+ - lib
200
+ required_ruby_version: !ruby/object:Gem::Requirement
201
+ requirements:
202
+ - - ">="
203
+ - !ruby/object:Gem::Version
204
+ version: 2.3.0
205
+ required_rubygems_version: !ruby/object:Gem::Requirement
206
+ requirements:
207
+ - - ">="
208
+ - !ruby/object:Gem::Version
209
+ version: '0'
210
+ requirements: []
211
+ rubygems_version: 3.0.6
212
+ signing_key:
213
+ specification_version: 4
214
+ summary: 'RelatonIeee: retrieve IEEE Standards for bibliographic use using the IeeeBibliographicItem
215
+ model'
216
+ test_files: []