relaton-ieee 1.9.1 → 1.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +2 -0
- data/README.adoc +10 -0
- data/grammars/basicdoc.rng +26 -7
- data/grammars/biblio.rng +2 -2
- data/grammars/isodoc.rng +617 -89
- data/grammars/reqt.rng +34 -5
- data/lib/relaton_ieee/bibxml_parser.rb +23 -0
- data/lib/relaton_ieee/data_fetcher.rb +30 -7
- data/lib/relaton_ieee/data_parser.rb +18 -6
- data/lib/relaton_ieee/hit_collection.rb +18 -14
- data/lib/relaton_ieee/ieee_bibliography.rb +3 -3
- data/lib/relaton_ieee/pub_id.rb +149 -0
- data/lib/relaton_ieee/rawbib_id_parser.rb +515 -0
- data/lib/relaton_ieee/scrapper.rb +40 -47
- data/lib/relaton_ieee/version.rb +1 -1
- data/lib/relaton_ieee.rb +1 -0
- data/relaton_ieee.gemspec +1 -1
- metadata +7 -4
@@ -6,40 +6,39 @@ module RelatonIeee
|
|
6
6
|
# papam hit [Hash]
|
7
7
|
# @return [RelatonOgc::OrcBibliographicItem]
|
8
8
|
def parse_page(hit)
|
9
|
-
doc = Nokogiri::HTML Faraday.get(hit[
|
9
|
+
doc = Nokogiri::HTML Faraday.get(hit[:url]).body
|
10
10
|
IeeeBibliographicItem.new(
|
11
11
|
fetched: Date.today.to_s,
|
12
|
-
title: fetch_title(
|
13
|
-
docid: fetch_docid(hit[
|
14
|
-
link: fetch_link(hit[
|
12
|
+
title: fetch_title(doc),
|
13
|
+
docid: fetch_docid(hit[:ref]),
|
14
|
+
link: fetch_link(hit[:url]),
|
15
15
|
docstatus: fetch_status(doc),
|
16
16
|
abstract: fetch_abstract(doc),
|
17
17
|
contributor: fetch_contributor(doc),
|
18
18
|
language: ["en"],
|
19
19
|
script: ["Latn"],
|
20
20
|
date: fetch_date(doc),
|
21
|
-
committee: fetch_committee(doc)
|
21
|
+
committee: fetch_committee(doc),
|
22
22
|
)
|
23
23
|
end
|
24
24
|
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
25
25
|
|
26
26
|
private
|
27
27
|
|
28
|
-
# @param
|
28
|
+
# @param doc [String] Nokogiri::HTML4::Document
|
29
29
|
# @return [Array<RelatonBib::TypedTitleString>]
|
30
|
-
def fetch_title(
|
31
|
-
[
|
30
|
+
def fetch_title(doc)
|
31
|
+
doc.xpath("//h2[@id='stnd-title']").map do |t|
|
32
32
|
RelatonBib::TypedTitleString.new(
|
33
|
-
type: "main", content:
|
34
|
-
)
|
35
|
-
|
33
|
+
type: "main", content: t.text, language: "en", script: "Latn",
|
34
|
+
)
|
35
|
+
end
|
36
36
|
end
|
37
37
|
|
38
|
-
# @param
|
38
|
+
# @param ref [String]
|
39
39
|
# @return [Array<RelatonBib::DocumentIdentifier>]
|
40
|
-
def fetch_docid(
|
41
|
-
|
42
|
-
[RelatonBib::DocumentIdentifier.new(id: identifier, type: "IEEE")]
|
40
|
+
def fetch_docid(ref)
|
41
|
+
[RelatonBib::DocumentIdentifier.new(id: ref, type: "IEEE")]
|
43
42
|
end
|
44
43
|
|
45
44
|
# @param url [String]
|
@@ -51,10 +50,10 @@ module RelatonIeee
|
|
51
50
|
# @param doc [Nokogiri::HTML::Document]
|
52
51
|
# @return [RelatonBib::DocumentStatus, NilClass]
|
53
52
|
def fetch_status(doc)
|
54
|
-
stage = doc.at("//
|
53
|
+
stage = doc.at("//dd[@id='stnd-status']")
|
55
54
|
return unless stage
|
56
55
|
|
57
|
-
RelatonBib::DocumentStatus.new(stage: stage.text)
|
56
|
+
RelatonBib::DocumentStatus.new(stage: stage.text.split.first)
|
58
57
|
end
|
59
58
|
|
60
59
|
# @param identifier [String]
|
@@ -67,33 +66,30 @@ module RelatonIeee
|
|
67
66
|
# @param doc [Nokogiri::HTML::Document]
|
68
67
|
# @return [Array<RelatonBib::FormattedString>]
|
69
68
|
def fetch_abstract(doc)
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
69
|
+
doc.xpath("//div[@id='stnd-description']").map do |a|
|
70
|
+
RelatonBib::FormattedString.new(
|
71
|
+
content: a.text.strip, language: "en", script: "Latn",
|
72
|
+
)
|
73
|
+
end
|
75
74
|
end
|
76
75
|
|
77
76
|
# @param doc [Nokogiri::HTML::Document]
|
78
77
|
# @return [Array<RelatonBib::ContributionInfo>]
|
79
78
|
def fetch_contributor(doc)
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
return [] unless name
|
84
|
-
|
85
|
-
[personn_contrib(name.text)]
|
79
|
+
doc.xpath("//dd[@id='stnd-staff-liaison']/text()").map do |name|
|
80
|
+
person_contrib(name.text.strip)
|
81
|
+
end
|
86
82
|
end
|
87
83
|
|
88
84
|
# @param name [String]
|
89
85
|
# @return [RelatonBib::ContributionInfo]
|
90
|
-
def
|
86
|
+
def person_contrib(name)
|
91
87
|
fname = RelatonBib::FullName.new(
|
92
|
-
completename: RelatonBib::LocalizedString.new(name)
|
88
|
+
completename: RelatonBib::LocalizedString.new(name),
|
93
89
|
)
|
94
90
|
entity = RelatonBib::Person.new(name: fname)
|
95
91
|
RelatonBib::ContributionInfo.new(
|
96
|
-
entity: entity, role: [type: "author"]
|
92
|
+
entity: entity, role: [type: "author"],
|
97
93
|
)
|
98
94
|
end
|
99
95
|
|
@@ -112,16 +108,13 @@ module RelatonIeee
|
|
112
108
|
# @return [Array<RelatonBib::BibliographicDate>]
|
113
109
|
def fetch_date(doc)
|
114
110
|
dates = []
|
115
|
-
|
116
|
-
if
|
117
|
-
dates << RelatonBib::BibliographicDate.new(type: "issued",
|
118
|
-
on: issued.text)
|
111
|
+
id = doc.at "//dd[@id='stnd-approval-date']"
|
112
|
+
if id
|
113
|
+
dates << RelatonBib::BibliographicDate.new(type: "issued", on: id.text)
|
119
114
|
end
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
dates << RelatonBib::BibliographicDate.new(type: "published",
|
124
|
-
on: published)
|
115
|
+
pd = doc.at("//dd[@id='stnd-published-date']")
|
116
|
+
if pd
|
117
|
+
dates << RelatonBib::BibliographicDate.new(type: "published", on: pd.text)
|
125
118
|
end
|
126
119
|
dates
|
127
120
|
end
|
@@ -132,23 +125,23 @@ module RelatonIeee
|
|
132
125
|
# @return [Array<RelatonIeee::Committee>]
|
133
126
|
def fetch_committee(doc)
|
134
127
|
committees = []
|
135
|
-
sponsor = doc.at "//
|
128
|
+
sponsor = doc.at "//dd[@id='stnd-committee']/text()"
|
136
129
|
if sponsor
|
137
|
-
committees << Committee.new(type: "sponsor", name: sponsor.text)
|
130
|
+
committees << Committee.new(type: "sponsor", name: sponsor.text.strip)
|
138
131
|
end
|
139
132
|
sponsor = doc.at "//td[.='Standards Committee']/following-sibling::td/div/a"
|
140
133
|
if sponsor
|
141
134
|
committees << Committee.new(type: "standard", name: sponsor.text)
|
142
135
|
end
|
143
|
-
working = doc.at "//
|
144
|
-
chair = doc.at "//td[.='Working Group Chair']/following-sibling::td/div"
|
136
|
+
working = doc.at "//dd[@id='stnd-working-group']/text()"
|
145
137
|
if working
|
146
|
-
|
138
|
+
chair = doc.at "//dd[@id='stnd-working-group-chair']"
|
139
|
+
committees << Committee.new(type: "working", name: working.text.strip,
|
147
140
|
chair: chair.text)
|
148
141
|
end
|
149
|
-
society = doc.at "//
|
142
|
+
society = doc.at "//dd[@id='stnd-society']/text()"
|
150
143
|
if society
|
151
|
-
committees << Committee.new(type: "society", name: society.text)
|
144
|
+
committees << Committee.new(type: "society", name: society.text.strip)
|
152
145
|
end
|
153
146
|
committees
|
154
147
|
end
|
data/lib/relaton_ieee/version.rb
CHANGED
data/lib/relaton_ieee.rb
CHANGED
@@ -9,6 +9,7 @@ require "relaton_ieee/scrapper"
|
|
9
9
|
require "relaton_ieee/ieee_bibliographic_item"
|
10
10
|
require "relaton_ieee/committee"
|
11
11
|
require "relaton_ieee/xml_parser"
|
12
|
+
require "relaton_ieee/bibxml_parser"
|
12
13
|
require "relaton_ieee/hash_converter"
|
13
14
|
require "relaton_ieee/data_fetcher"
|
14
15
|
|
data/relaton_ieee.gemspec
CHANGED
@@ -38,6 +38,6 @@ Gem::Specification.new do |spec|
|
|
38
38
|
spec.add_development_dependency "webmock"
|
39
39
|
|
40
40
|
spec.add_dependency "faraday", "~> 1.1"
|
41
|
-
spec.add_dependency "relaton-bib", "~> 1.
|
41
|
+
spec.add_dependency "relaton-bib", "~> 1.10.0"
|
42
42
|
spec.add_dependency "rubyzip", "~> 2.3.0"
|
43
43
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-ieee
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.10.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-01-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: equivalent-xml
|
@@ -100,14 +100,14 @@ dependencies:
|
|
100
100
|
requirements:
|
101
101
|
- - "~>"
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version: 1.
|
103
|
+
version: 1.10.0
|
104
104
|
type: :runtime
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: 1.
|
110
|
+
version: 1.10.0
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: rubyzip
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -146,6 +146,7 @@ files:
|
|
146
146
|
- grammars/isodoc.rng
|
147
147
|
- grammars/reqt.rng
|
148
148
|
- lib/relaton_ieee.rb
|
149
|
+
- lib/relaton_ieee/bibxml_parser.rb
|
149
150
|
- lib/relaton_ieee/committee.rb
|
150
151
|
- lib/relaton_ieee/data_fetcher.rb
|
151
152
|
- lib/relaton_ieee/data_parser.rb
|
@@ -155,6 +156,8 @@ files:
|
|
155
156
|
- lib/relaton_ieee/ieee_bibliographic_item.rb
|
156
157
|
- lib/relaton_ieee/ieee_bibliography.rb
|
157
158
|
- lib/relaton_ieee/processor.rb
|
159
|
+
- lib/relaton_ieee/pub_id.rb
|
160
|
+
- lib/relaton_ieee/rawbib_id_parser.rb
|
158
161
|
- lib/relaton_ieee/scrapper.rb
|
159
162
|
- lib/relaton_ieee/version.rb
|
160
163
|
- lib/relaton_ieee/xml_parser.rb
|