relaton-ieee 1.9.1 → 1.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +2 -0
- data/README.adoc +10 -0
- data/grammars/basicdoc.rng +26 -7
- data/grammars/biblio.rng +2 -2
- data/grammars/isodoc.rng +617 -89
- data/grammars/reqt.rng +34 -5
- data/lib/relaton_ieee/bibxml_parser.rb +23 -0
- data/lib/relaton_ieee/data_fetcher.rb +30 -7
- data/lib/relaton_ieee/data_parser.rb +18 -6
- data/lib/relaton_ieee/hit_collection.rb +18 -14
- data/lib/relaton_ieee/ieee_bibliography.rb +3 -3
- data/lib/relaton_ieee/pub_id.rb +149 -0
- data/lib/relaton_ieee/rawbib_id_parser.rb +515 -0
- data/lib/relaton_ieee/scrapper.rb +40 -47
- data/lib/relaton_ieee/version.rb +1 -1
- data/lib/relaton_ieee.rb +1 -0
- data/relaton_ieee.gemspec +1 -1
- metadata +7 -4
@@ -6,40 +6,39 @@ module RelatonIeee
|
|
6
6
|
# papam hit [Hash]
|
7
7
|
# @return [RelatonOgc::OrcBibliographicItem]
|
8
8
|
def parse_page(hit)
|
9
|
-
doc = Nokogiri::HTML Faraday.get(hit[
|
9
|
+
doc = Nokogiri::HTML Faraday.get(hit[:url]).body
|
10
10
|
IeeeBibliographicItem.new(
|
11
11
|
fetched: Date.today.to_s,
|
12
|
-
title: fetch_title(
|
13
|
-
docid: fetch_docid(hit[
|
14
|
-
link: fetch_link(hit[
|
12
|
+
title: fetch_title(doc),
|
13
|
+
docid: fetch_docid(hit[:ref]),
|
14
|
+
link: fetch_link(hit[:url]),
|
15
15
|
docstatus: fetch_status(doc),
|
16
16
|
abstract: fetch_abstract(doc),
|
17
17
|
contributor: fetch_contributor(doc),
|
18
18
|
language: ["en"],
|
19
19
|
script: ["Latn"],
|
20
20
|
date: fetch_date(doc),
|
21
|
-
committee: fetch_committee(doc)
|
21
|
+
committee: fetch_committee(doc),
|
22
22
|
)
|
23
23
|
end
|
24
24
|
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
25
25
|
|
26
26
|
private
|
27
27
|
|
28
|
-
# @param
|
28
|
+
# @param doc [String] Nokogiri::HTML4::Document
|
29
29
|
# @return [Array<RelatonBib::TypedTitleString>]
|
30
|
-
def fetch_title(
|
31
|
-
[
|
30
|
+
def fetch_title(doc)
|
31
|
+
doc.xpath("//h2[@id='stnd-title']").map do |t|
|
32
32
|
RelatonBib::TypedTitleString.new(
|
33
|
-
type: "main", content:
|
34
|
-
)
|
35
|
-
|
33
|
+
type: "main", content: t.text, language: "en", script: "Latn",
|
34
|
+
)
|
35
|
+
end
|
36
36
|
end
|
37
37
|
|
38
|
-
# @param
|
38
|
+
# @param ref [String]
|
39
39
|
# @return [Array<RelatonBib::DocumentIdentifier>]
|
40
|
-
def fetch_docid(
|
41
|
-
|
42
|
-
[RelatonBib::DocumentIdentifier.new(id: identifier, type: "IEEE")]
|
40
|
+
def fetch_docid(ref)
|
41
|
+
[RelatonBib::DocumentIdentifier.new(id: ref, type: "IEEE")]
|
43
42
|
end
|
44
43
|
|
45
44
|
# @param url [String]
|
@@ -51,10 +50,10 @@ module RelatonIeee
|
|
51
50
|
# @param doc [Nokogiri::HTML::Document]
|
52
51
|
# @return [RelatonBib::DocumentStatus, NilClass]
|
53
52
|
def fetch_status(doc)
|
54
|
-
stage = doc.at("//
|
53
|
+
stage = doc.at("//dd[@id='stnd-status']")
|
55
54
|
return unless stage
|
56
55
|
|
57
|
-
RelatonBib::DocumentStatus.new(stage: stage.text)
|
56
|
+
RelatonBib::DocumentStatus.new(stage: stage.text.split.first)
|
58
57
|
end
|
59
58
|
|
60
59
|
# @param identifier [String]
|
@@ -67,33 +66,30 @@ module RelatonIeee
|
|
67
66
|
# @param doc [Nokogiri::HTML::Document]
|
68
67
|
# @return [Array<RelatonBib::FormattedString>]
|
69
68
|
def fetch_abstract(doc)
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
69
|
+
doc.xpath("//div[@id='stnd-description']").map do |a|
|
70
|
+
RelatonBib::FormattedString.new(
|
71
|
+
content: a.text.strip, language: "en", script: "Latn",
|
72
|
+
)
|
73
|
+
end
|
75
74
|
end
|
76
75
|
|
77
76
|
# @param doc [Nokogiri::HTML::Document]
|
78
77
|
# @return [Array<RelatonBib::ContributionInfo>]
|
79
78
|
def fetch_contributor(doc)
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
return [] unless name
|
84
|
-
|
85
|
-
[personn_contrib(name.text)]
|
79
|
+
doc.xpath("//dd[@id='stnd-staff-liaison']/text()").map do |name|
|
80
|
+
person_contrib(name.text.strip)
|
81
|
+
end
|
86
82
|
end
|
87
83
|
|
88
84
|
# @param name [String]
|
89
85
|
# @return [RelatonBib::ContributionInfo]
|
90
|
-
def
|
86
|
+
def person_contrib(name)
|
91
87
|
fname = RelatonBib::FullName.new(
|
92
|
-
completename: RelatonBib::LocalizedString.new(name)
|
88
|
+
completename: RelatonBib::LocalizedString.new(name),
|
93
89
|
)
|
94
90
|
entity = RelatonBib::Person.new(name: fname)
|
95
91
|
RelatonBib::ContributionInfo.new(
|
96
|
-
entity: entity, role: [type: "author"]
|
92
|
+
entity: entity, role: [type: "author"],
|
97
93
|
)
|
98
94
|
end
|
99
95
|
|
@@ -112,16 +108,13 @@ module RelatonIeee
|
|
112
108
|
# @return [Array<RelatonBib::BibliographicDate>]
|
113
109
|
def fetch_date(doc)
|
114
110
|
dates = []
|
115
|
-
|
116
|
-
if
|
117
|
-
dates << RelatonBib::BibliographicDate.new(type: "issued",
|
118
|
-
on: issued.text)
|
111
|
+
id = doc.at "//dd[@id='stnd-approval-date']"
|
112
|
+
if id
|
113
|
+
dates << RelatonBib::BibliographicDate.new(type: "issued", on: id.text)
|
119
114
|
end
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
dates << RelatonBib::BibliographicDate.new(type: "published",
|
124
|
-
on: published)
|
115
|
+
pd = doc.at("//dd[@id='stnd-published-date']")
|
116
|
+
if pd
|
117
|
+
dates << RelatonBib::BibliographicDate.new(type: "published", on: pd.text)
|
125
118
|
end
|
126
119
|
dates
|
127
120
|
end
|
@@ -132,23 +125,23 @@ module RelatonIeee
|
|
132
125
|
# @return [Array<RelatonIeee::Committee>]
|
133
126
|
def fetch_committee(doc)
|
134
127
|
committees = []
|
135
|
-
sponsor = doc.at "//
|
128
|
+
sponsor = doc.at "//dd[@id='stnd-committee']/text()"
|
136
129
|
if sponsor
|
137
|
-
committees << Committee.new(type: "sponsor", name: sponsor.text)
|
130
|
+
committees << Committee.new(type: "sponsor", name: sponsor.text.strip)
|
138
131
|
end
|
139
132
|
sponsor = doc.at "//td[.='Standards Committee']/following-sibling::td/div/a"
|
140
133
|
if sponsor
|
141
134
|
committees << Committee.new(type: "standard", name: sponsor.text)
|
142
135
|
end
|
143
|
-
working = doc.at "//
|
144
|
-
chair = doc.at "//td[.='Working Group Chair']/following-sibling::td/div"
|
136
|
+
working = doc.at "//dd[@id='stnd-working-group']/text()"
|
145
137
|
if working
|
146
|
-
|
138
|
+
chair = doc.at "//dd[@id='stnd-working-group-chair']"
|
139
|
+
committees << Committee.new(type: "working", name: working.text.strip,
|
147
140
|
chair: chair.text)
|
148
141
|
end
|
149
|
-
society = doc.at "//
|
142
|
+
society = doc.at "//dd[@id='stnd-society']/text()"
|
150
143
|
if society
|
151
|
-
committees << Committee.new(type: "society", name: society.text)
|
144
|
+
committees << Committee.new(type: "society", name: society.text.strip)
|
152
145
|
end
|
153
146
|
committees
|
154
147
|
end
|
data/lib/relaton_ieee/version.rb
CHANGED
data/lib/relaton_ieee.rb
CHANGED
@@ -9,6 +9,7 @@ require "relaton_ieee/scrapper"
|
|
9
9
|
require "relaton_ieee/ieee_bibliographic_item"
|
10
10
|
require "relaton_ieee/committee"
|
11
11
|
require "relaton_ieee/xml_parser"
|
12
|
+
require "relaton_ieee/bibxml_parser"
|
12
13
|
require "relaton_ieee/hash_converter"
|
13
14
|
require "relaton_ieee/data_fetcher"
|
14
15
|
|
data/relaton_ieee.gemspec
CHANGED
@@ -38,6 +38,6 @@ Gem::Specification.new do |spec|
|
|
38
38
|
spec.add_development_dependency "webmock"
|
39
39
|
|
40
40
|
spec.add_dependency "faraday", "~> 1.1"
|
41
|
-
spec.add_dependency "relaton-bib", "~> 1.
|
41
|
+
spec.add_dependency "relaton-bib", "~> 1.10.0"
|
42
42
|
spec.add_dependency "rubyzip", "~> 2.3.0"
|
43
43
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-ieee
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.10.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-01-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: equivalent-xml
|
@@ -100,14 +100,14 @@ dependencies:
|
|
100
100
|
requirements:
|
101
101
|
- - "~>"
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version: 1.
|
103
|
+
version: 1.10.0
|
104
104
|
type: :runtime
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: 1.
|
110
|
+
version: 1.10.0
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: rubyzip
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -146,6 +146,7 @@ files:
|
|
146
146
|
- grammars/isodoc.rng
|
147
147
|
- grammars/reqt.rng
|
148
148
|
- lib/relaton_ieee.rb
|
149
|
+
- lib/relaton_ieee/bibxml_parser.rb
|
149
150
|
- lib/relaton_ieee/committee.rb
|
150
151
|
- lib/relaton_ieee/data_fetcher.rb
|
151
152
|
- lib/relaton_ieee/data_parser.rb
|
@@ -155,6 +156,8 @@ files:
|
|
155
156
|
- lib/relaton_ieee/ieee_bibliographic_item.rb
|
156
157
|
- lib/relaton_ieee/ieee_bibliography.rb
|
157
158
|
- lib/relaton_ieee/processor.rb
|
159
|
+
- lib/relaton_ieee/pub_id.rb
|
160
|
+
- lib/relaton_ieee/rawbib_id_parser.rb
|
158
161
|
- lib/relaton_ieee/scrapper.rb
|
159
162
|
- lib/relaton_ieee/version.rb
|
160
163
|
- lib/relaton_ieee/xml_parser.rb
|