relaton-ieee 1.9.1 → 1.10.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -6,40 +6,39 @@ module RelatonIeee
6
6
  # papam hit [Hash]
7
7
  # @return [RelatonOgc::OrcBibliographicItem]
8
8
  def parse_page(hit)
9
- doc = Nokogiri::HTML Faraday.get(hit["recordURL"]).body
9
+ doc = Nokogiri::HTML Faraday.get(hit[:url]).body
10
10
  IeeeBibliographicItem.new(
11
11
  fetched: Date.today.to_s,
12
- title: fetch_title(hit["recordTitle"]),
13
- docid: fetch_docid(hit["recordTitle"]),
14
- link: fetch_link(hit["recordURL"]),
12
+ title: fetch_title(doc),
13
+ docid: fetch_docid(hit[:ref]),
14
+ link: fetch_link(hit[:url]),
15
15
  docstatus: fetch_status(doc),
16
16
  abstract: fetch_abstract(doc),
17
17
  contributor: fetch_contributor(doc),
18
18
  language: ["en"],
19
19
  script: ["Latn"],
20
20
  date: fetch_date(doc),
21
- committee: fetch_committee(doc)
21
+ committee: fetch_committee(doc),
22
22
  )
23
23
  end
24
24
  # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
25
25
 
26
26
  private
27
27
 
28
- # @param title [String]
28
+ # @param doc [String] Nokogiri::HTML4::Document
29
29
  # @return [Array<RelatonBib::TypedTitleString>]
30
- def fetch_title(title)
31
- [
30
+ def fetch_title(doc)
31
+ doc.xpath("//h2[@id='stnd-title']").map do |t|
32
32
  RelatonBib::TypedTitleString.new(
33
- type: "main", content: title, language: "en", script: "Latn"
34
- ),
35
- ]
33
+ type: "main", content: t.text, language: "en", script: "Latn",
34
+ )
35
+ end
36
36
  end
37
37
 
38
- # @param title [String]
38
+ # @param ref [String]
39
39
  # @return [Array<RelatonBib::DocumentIdentifier>]
40
- def fetch_docid(title)
41
- /^(?<identifier>(?:\w+\s)?\S+)/ =~ title
42
- [RelatonBib::DocumentIdentifier.new(id: identifier, type: "IEEE")]
40
+ def fetch_docid(ref)
41
+ [RelatonBib::DocumentIdentifier.new(id: ref, type: "IEEE")]
43
42
  end
44
43
 
45
44
  # @param url [String]
@@ -51,10 +50,10 @@ module RelatonIeee
51
50
  # @param doc [Nokogiri::HTML::Document]
52
51
  # @return [RelatonBib::DocumentStatus, NilClass]
53
52
  def fetch_status(doc)
54
- stage = doc.at("//td[.='Status']/following-sibling::td/div")
53
+ stage = doc.at("//dd[@id='stnd-status']")
55
54
  return unless stage
56
55
 
57
- RelatonBib::DocumentStatus.new(stage: stage.text)
56
+ RelatonBib::DocumentStatus.new(stage: stage.text.split.first)
58
57
  end
59
58
 
60
59
  # @param identifier [String]
@@ -67,33 +66,30 @@ module RelatonIeee
67
66
  # @param doc [Nokogiri::HTML::Document]
68
67
  # @return [Array<RelatonBib::FormattedString>]
69
68
  def fetch_abstract(doc)
70
- content = doc.at("//div[@class='description']")
71
- return [] unless content
72
-
73
- [RelatonBib::FormattedString.new(content: content.text, language: "en",
74
- script: "Latn")]
69
+ doc.xpath("//div[@id='stnd-description']").map do |a|
70
+ RelatonBib::FormattedString.new(
71
+ content: a.text.strip, language: "en", script: "Latn",
72
+ )
73
+ end
75
74
  end
76
75
 
77
76
  # @param doc [Nokogiri::HTML::Document]
78
77
  # @return [Array<RelatonBib::ContributionInfo>]
79
78
  def fetch_contributor(doc)
80
- name = doc.at(
81
- "//td[.='IEEE Program Manager']/following-sibling::td/div/a"
82
- )
83
- return [] unless name
84
-
85
- [personn_contrib(name.text)]
79
+ doc.xpath("//dd[@id='stnd-staff-liaison']/text()").map do |name|
80
+ person_contrib(name.text.strip)
81
+ end
86
82
  end
87
83
 
88
84
  # @param name [String]
89
85
  # @return [RelatonBib::ContributionInfo]
90
- def personn_contrib(name)
86
+ def person_contrib(name)
91
87
  fname = RelatonBib::FullName.new(
92
- completename: RelatonBib::LocalizedString.new(name)
88
+ completename: RelatonBib::LocalizedString.new(name),
93
89
  )
94
90
  entity = RelatonBib::Person.new(name: fname)
95
91
  RelatonBib::ContributionInfo.new(
96
- entity: entity, role: [type: "author"]
92
+ entity: entity, role: [type: "author"],
97
93
  )
98
94
  end
99
95
 
@@ -112,16 +108,13 @@ module RelatonIeee
112
108
  # @return [Array<RelatonBib::BibliographicDate>]
113
109
  def fetch_date(doc)
114
110
  dates = []
115
- issued = doc.at "//td[.='Board Approval']/following-sibling::td/div"
116
- if issued
117
- dates << RelatonBib::BibliographicDate.new(type: "issued",
118
- on: issued.text)
111
+ id = doc.at "//dd[@id='stnd-approval-date']"
112
+ if id
113
+ dates << RelatonBib::BibliographicDate.new(type: "issued", on: id.text)
119
114
  end
120
- published = doc.at("//td[.='History']/following-sibling::td/div")
121
- &.text&.match(/(?<=Published Date:)[\d-]+/)&.to_s
122
- if published
123
- dates << RelatonBib::BibliographicDate.new(type: "published",
124
- on: published)
115
+ pd = doc.at("//dd[@id='stnd-published-date']")
116
+ if pd
117
+ dates << RelatonBib::BibliographicDate.new(type: "published", on: pd.text)
125
118
  end
126
119
  dates
127
120
  end
@@ -132,23 +125,23 @@ module RelatonIeee
132
125
  # @return [Array<RelatonIeee::Committee>]
133
126
  def fetch_committee(doc)
134
127
  committees = []
135
- sponsor = doc.at "//td[.='Sponsor Committee']/following-sibling::td/div"
128
+ sponsor = doc.at "//dd[@id='stnd-committee']/text()"
136
129
  if sponsor
137
- committees << Committee.new(type: "sponsor", name: sponsor.text)
130
+ committees << Committee.new(type: "sponsor", name: sponsor.text.strip)
138
131
  end
139
132
  sponsor = doc.at "//td[.='Standards Committee']/following-sibling::td/div/a"
140
133
  if sponsor
141
134
  committees << Committee.new(type: "standard", name: sponsor.text)
142
135
  end
143
- working = doc.at "//td[.='Working Group']/following-sibling::td/div"
144
- chair = doc.at "//td[.='Working Group Chair']/following-sibling::td/div"
136
+ working = doc.at "//dd[@id='stnd-working-group']/text()"
145
137
  if working
146
- committees << Committee.new(type: "working", name: working.text,
138
+ chair = doc.at "//dd[@id='stnd-working-group-chair']"
139
+ committees << Committee.new(type: "working", name: working.text.strip,
147
140
  chair: chair.text)
148
141
  end
149
- society = doc.at "//td[.='Society']/following-sibling::td/div"
142
+ society = doc.at "//dd[@id='stnd-society']/text()"
150
143
  if society
151
- committees << Committee.new(type: "society", name: society.text)
144
+ committees << Committee.new(type: "society", name: society.text.strip)
152
145
  end
153
146
  committees
154
147
  end
@@ -1,3 +1,3 @@
1
1
  module RelatonIeee
2
- VERSION = "1.9.1".freeze
2
+ VERSION = "1.10.0".freeze
3
3
  end
data/lib/relaton_ieee.rb CHANGED
@@ -9,6 +9,7 @@ require "relaton_ieee/scrapper"
9
9
  require "relaton_ieee/ieee_bibliographic_item"
10
10
  require "relaton_ieee/committee"
11
11
  require "relaton_ieee/xml_parser"
12
+ require "relaton_ieee/bibxml_parser"
12
13
  require "relaton_ieee/hash_converter"
13
14
  require "relaton_ieee/data_fetcher"
14
15
 
data/relaton_ieee.gemspec CHANGED
@@ -38,6 +38,6 @@ Gem::Specification.new do |spec|
38
38
  spec.add_development_dependency "webmock"
39
39
 
40
40
  spec.add_dependency "faraday", "~> 1.1"
41
- spec.add_dependency "relaton-bib", "~> 1.9.0"
41
+ spec.add_dependency "relaton-bib", "~> 1.10.0"
42
42
  spec.add_dependency "rubyzip", "~> 2.3.0"
43
43
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-ieee
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.9.1
4
+ version: 1.10.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-09-24 00:00:00.000000000 Z
11
+ date: 2022-01-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: equivalent-xml
@@ -100,14 +100,14 @@ dependencies:
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: 1.9.0
103
+ version: 1.10.0
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: 1.9.0
110
+ version: 1.10.0
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: rubyzip
113
113
  requirement: !ruby/object:Gem::Requirement
@@ -146,6 +146,7 @@ files:
146
146
  - grammars/isodoc.rng
147
147
  - grammars/reqt.rng
148
148
  - lib/relaton_ieee.rb
149
+ - lib/relaton_ieee/bibxml_parser.rb
149
150
  - lib/relaton_ieee/committee.rb
150
151
  - lib/relaton_ieee/data_fetcher.rb
151
152
  - lib/relaton_ieee/data_parser.rb
@@ -155,6 +156,8 @@ files:
155
156
  - lib/relaton_ieee/ieee_bibliographic_item.rb
156
157
  - lib/relaton_ieee/ieee_bibliography.rb
157
158
  - lib/relaton_ieee/processor.rb
159
+ - lib/relaton_ieee/pub_id.rb
160
+ - lib/relaton_ieee/rawbib_id_parser.rb
158
161
  - lib/relaton_ieee/scrapper.rb
159
162
  - lib/relaton_ieee/version.rb
160
163
  - lib/relaton_ieee/xml_parser.rb