relaton-ieee 1.7.5 → 1.9.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +1 -11
- data/.gitignore +1 -0
- data/.rubocop.yml +1 -1
- data/README.adoc +29 -0
- data/grammars/basicdoc.rng +165 -20
- data/grammars/biblio.rng +5 -6
- data/grammars/isodoc.rng +532 -16
- data/grammars/reqt.rng +31 -2
- data/lib/relaton_ieee/bibxml_parser.rb +14 -0
- data/lib/relaton_ieee/data_fetcher.rb +213 -0
- data/lib/relaton_ieee/data_parser.rb +266 -0
- data/lib/relaton_ieee/hit_collection.rb +2 -2
- data/lib/relaton_ieee/ieee_bibliographic_item.rb +4 -4
- data/lib/relaton_ieee/ieee_bibliography.rb +15 -2
- data/lib/relaton_ieee/processor.rb +14 -1
- data/lib/relaton_ieee/version.rb +1 -1
- data/lib/relaton_ieee.rb +2 -0
- data/relaton_ieee.gemspec +3 -7
- metadata +23 -6
data/grammars/reqt.rng
CHANGED
@@ -30,15 +30,34 @@
|
|
30
30
|
<data type="boolean"/>
|
31
31
|
</attribute>
|
32
32
|
</optional>
|
33
|
+
<optional>
|
34
|
+
<attribute name="number"/>
|
35
|
+
</optional>
|
33
36
|
<optional>
|
34
37
|
<attribute name="subsequence"/>
|
35
38
|
</optional>
|
39
|
+
<optional>
|
40
|
+
<attribute name="keep-with-next">
|
41
|
+
<data type="boolean"/>
|
42
|
+
</attribute>
|
43
|
+
</optional>
|
44
|
+
<optional>
|
45
|
+
<attribute name="keep-lines-together">
|
46
|
+
<data type="boolean"/>
|
47
|
+
</attribute>
|
48
|
+
</optional>
|
36
49
|
<attribute name="id">
|
37
50
|
<data type="ID"/>
|
38
51
|
</attribute>
|
39
52
|
<optional>
|
40
53
|
<attribute name="filename"/>
|
41
54
|
</optional>
|
55
|
+
<optional>
|
56
|
+
<attribute name="model"/>
|
57
|
+
</optional>
|
58
|
+
<optional>
|
59
|
+
<attribute name="type"/>
|
60
|
+
</optional>
|
42
61
|
<optional>
|
43
62
|
<ref name="reqtitle"/>
|
44
63
|
</optional>
|
@@ -48,9 +67,9 @@
|
|
48
67
|
<optional>
|
49
68
|
<ref name="subject"/>
|
50
69
|
</optional>
|
51
|
-
<
|
70
|
+
<zeroOrMore>
|
52
71
|
<ref name="reqinherit"/>
|
53
|
-
</
|
72
|
+
</zeroOrMore>
|
54
73
|
<zeroOrMore>
|
55
74
|
<ref name="classification"/>
|
56
75
|
</zeroOrMore>
|
@@ -135,6 +154,16 @@
|
|
135
154
|
<data type="boolean"/>
|
136
155
|
</attribute>
|
137
156
|
</optional>
|
157
|
+
<optional>
|
158
|
+
<attribute name="keep-with-next">
|
159
|
+
<data type="boolean"/>
|
160
|
+
</attribute>
|
161
|
+
</optional>
|
162
|
+
<optional>
|
163
|
+
<attribute name="keep-lines-together">
|
164
|
+
<data type="boolean"/>
|
165
|
+
</attribute>
|
166
|
+
</optional>
|
138
167
|
<oneOrMore>
|
139
168
|
<ref name="BasicBlock"/>
|
140
169
|
</oneOrMore>
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module RelatonIeee
|
2
|
+
module BibXMLParser
|
3
|
+
extend RelatonBib::BibXMLParser
|
4
|
+
extend BibXMLParser
|
5
|
+
|
6
|
+
FLAVOR = "IEEE"
|
7
|
+
|
8
|
+
# @param attrs [Hash]
|
9
|
+
# @return [RelatonBib::IetfBibliographicItem]
|
10
|
+
def bib_item(**attrs)
|
11
|
+
IeeeBibliographicItem.new(**attrs)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,213 @@
|
|
1
|
+
require "zip"
|
2
|
+
require "relaton_ieee/data_parser"
|
3
|
+
|
4
|
+
module RelatonIeee
|
5
|
+
class DataFetcher
|
6
|
+
RELATION_TYPES = {
|
7
|
+
"S" => { type: "obsoletedBy" },
|
8
|
+
"V" => { type: "updates", description: "revises" },
|
9
|
+
"T" => { type: "updates", description: "amends" },
|
10
|
+
"C" => { type: "updates", description: "corrects" },
|
11
|
+
"O" => { type: "adoptedFrom" },
|
12
|
+
"P" => { type: "complementOf", description: "supplement" },
|
13
|
+
"N" => false, "G" => false,
|
14
|
+
"F" => false, "I" => false,
|
15
|
+
"E" => false, "B" => false, "W" => false
|
16
|
+
}.freeze
|
17
|
+
|
18
|
+
# @return [Hash] list of AMSID => PubID
|
19
|
+
attr_reader :backrefs
|
20
|
+
|
21
|
+
#
|
22
|
+
# Create RelatonIeee::DataFetcher instance
|
23
|
+
#
|
24
|
+
# @param [String] output output dir
|
25
|
+
# @param [Strong] format output format. Allowed values: "yaml" or "xml"
|
26
|
+
#
|
27
|
+
def initialize(output, format)
|
28
|
+
@output = output
|
29
|
+
@format = format
|
30
|
+
@ext = format.sub(/^bib/, "")
|
31
|
+
@crossrefs = {}
|
32
|
+
@backrefs = {}
|
33
|
+
end
|
34
|
+
|
35
|
+
#
|
36
|
+
# Convert documents from `ieee-rawbib` dir (IEEE dataset) to BibYAML/BibXML
|
37
|
+
#
|
38
|
+
# @param [String] output ('data') output dir
|
39
|
+
# @param [String] format ('yaml') output format.
|
40
|
+
# Allowed values: "yaml" or "xml"
|
41
|
+
#
|
42
|
+
def self.fetch(output: "data", format: "yaml")
|
43
|
+
t1 = Time.now
|
44
|
+
puts "Started at: #{t1}"
|
45
|
+
FileUtils.mkdir_p output unless Dir.exist? output
|
46
|
+
new(output, format).fetch
|
47
|
+
t2 = Time.now
|
48
|
+
puts "Stopped at: #{t2}"
|
49
|
+
puts "Done in: #{(t2 - t1).round} sec."
|
50
|
+
end
|
51
|
+
|
52
|
+
#
|
53
|
+
# Convert documents from `ieee-rawbib` dir (IEEE dataset) to BibYAML/BibXML
|
54
|
+
#
|
55
|
+
def fetch # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
56
|
+
Dir["ieee-rawbib/**/*.{xml,zip}"].reject { |f| f["Deleted_"] }.each do |f|
|
57
|
+
xml = case File.extname(f)
|
58
|
+
when ".zip" then read_zip f
|
59
|
+
when ".xml" then File.read f, encoding: "UTF-8"
|
60
|
+
end
|
61
|
+
fetch_doc xml, f
|
62
|
+
rescue StandardError => e
|
63
|
+
warn "File: #{f}"
|
64
|
+
warn e.message
|
65
|
+
warn e.backtrace
|
66
|
+
end
|
67
|
+
update_relations
|
68
|
+
end
|
69
|
+
|
70
|
+
#
|
71
|
+
# Extract XML file from zip archive
|
72
|
+
#
|
73
|
+
# @param [String] file path to achive
|
74
|
+
#
|
75
|
+
# @return [String] file content
|
76
|
+
#
|
77
|
+
def read_zip(file)
|
78
|
+
Zip::File.open(file) do |zf|
|
79
|
+
entry = zf.glob("**/*.xml").first
|
80
|
+
entry.get_input_stream.read
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
#
|
85
|
+
# Parse document and save it
|
86
|
+
#
|
87
|
+
# @param [String] xml content
|
88
|
+
# @param [String] filename source file
|
89
|
+
#
|
90
|
+
def fetch_doc(xml, filename) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
91
|
+
doc = Nokogiri::XML(xml).at("/publication")
|
92
|
+
unless doc
|
93
|
+
warn "Empty file: #{filename}"
|
94
|
+
return
|
95
|
+
end
|
96
|
+
bib = DataParser.parse doc, self
|
97
|
+
amsid = doc.at("./publicationinfo/amsid").text
|
98
|
+
if backrefs.value?(bib.docidentifier[0].id) && /updates\.\d+/ !~ filename
|
99
|
+
oamsid = backrefs.key bib.docidentifier[0].id
|
100
|
+
warn "Document exists ID: \"#{bib.docidentifier[0].id}\" AMSID: "\
|
101
|
+
"\"#{amsid}\" source: \"#{filename}\". Other AMSID: \"#{oamsid}\""
|
102
|
+
if bib.docidentifier[0].id.include?(bib.docnumber)
|
103
|
+
save_doc bib # rewrite file if the PubID mathces to the docnumber
|
104
|
+
backrefs[amsid] = bib.docidentifier[0].id
|
105
|
+
end
|
106
|
+
else
|
107
|
+
save_doc bib
|
108
|
+
backrefs[amsid] = bib.docidentifier[0].id
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
#
|
113
|
+
# Save unresolved relation reference
|
114
|
+
#
|
115
|
+
# @param [String] docnumber of main document
|
116
|
+
# @param [Nokogiri::XML::Element] amsid relation data
|
117
|
+
#
|
118
|
+
def add_crossref(docnumber, amsid)
|
119
|
+
return if RELATION_TYPES[amsid[:type]] == false
|
120
|
+
|
121
|
+
ref = { amsid: amsid.text, type: amsid[:type] }
|
122
|
+
if @crossrefs[docnumber]
|
123
|
+
@crossrefs[docnumber] << ref
|
124
|
+
else @crossrefs[docnumber] = [ref]
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
#
|
129
|
+
# Save document to file
|
130
|
+
#
|
131
|
+
# @param [RelatonIeee::IeeeBibliographicItem] bib
|
132
|
+
#
|
133
|
+
def save_doc(bib)
|
134
|
+
c = case @format
|
135
|
+
when "xml" then bib.to_xml(bibdata: true)
|
136
|
+
when "yaml" then bib.to_hash.to_yaml
|
137
|
+
else bib.send("to_#{@format}")
|
138
|
+
end
|
139
|
+
File.write file_name(bib.docnumber), c, encoding: "UTF-8"
|
140
|
+
end
|
141
|
+
|
142
|
+
#
|
143
|
+
# Make filename from PubID
|
144
|
+
#
|
145
|
+
# @param [String] docnumber
|
146
|
+
#
|
147
|
+
# @return [String] filename
|
148
|
+
#
|
149
|
+
def file_name(docnumber)
|
150
|
+
name = docnumber.gsub(/\s-/, "-").gsub(/[.\s,:\/]/, "_").squeeze("_").upcase
|
151
|
+
File.join @output, "#{name}.#{@ext}"
|
152
|
+
end
|
153
|
+
|
154
|
+
#
|
155
|
+
# Update unresoverd relations
|
156
|
+
#
|
157
|
+
def update_relations # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
158
|
+
@crossrefs.each do |dnum, rfs|
|
159
|
+
bib = nil
|
160
|
+
rfs.each do |rf|
|
161
|
+
if backrefs[rf[:amsid]]
|
162
|
+
rel = create_relation(rf[:type], backrefs[rf[:amsid]])
|
163
|
+
if rel
|
164
|
+
bib ||= read_bib(dnum)
|
165
|
+
bib.relation << rel
|
166
|
+
save_doc bib
|
167
|
+
end
|
168
|
+
else
|
169
|
+
warn "Unresolved relation: '#{rf[:amsid]}' type: '#{rf[:type]}' for '#{dnum}'"
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
#
|
176
|
+
# Create relation instance
|
177
|
+
#
|
178
|
+
# @param [String] type IEEE relation type
|
179
|
+
# @param [String] fref reference
|
180
|
+
#
|
181
|
+
# @return [RelatonBib::DocumentRelation]
|
182
|
+
#
|
183
|
+
def create_relation(type, fref)
|
184
|
+
return if RELATION_TYPES[type] == false
|
185
|
+
|
186
|
+
fr = RelatonBib::FormattedRef.new(content: fref)
|
187
|
+
bib = IeeeBibliographicItem.new formattedref: fr
|
188
|
+
desc = RELATION_TYPES[type][:description]
|
189
|
+
description = desc && RelatonBib::FormattedString.new(content: desc, language: "en", script: "Latn")
|
190
|
+
RelatonBib::DocumentRelation.new(
|
191
|
+
type: RELATION_TYPES[type][:type],
|
192
|
+
description: description,
|
193
|
+
bibitem: bib,
|
194
|
+
)
|
195
|
+
end
|
196
|
+
|
197
|
+
#
|
198
|
+
# Read document form BibXML/BibYAML file
|
199
|
+
#
|
200
|
+
# @param [String] docnumber
|
201
|
+
#
|
202
|
+
# @return [RelatonIeee::IeeeBibliographicItem]
|
203
|
+
#
|
204
|
+
def read_bib(docnumber)
|
205
|
+
c = File.read file_name(docnumber), encoding: "UTF-8"
|
206
|
+
case @format
|
207
|
+
when "xml" then XMLParser.from_xml c
|
208
|
+
when "bibxml" then BibXMLParser.parse c
|
209
|
+
else IeeeBibliographicItem.from_hash YAML.safe_load(c)
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|
@@ -0,0 +1,266 @@
|
|
1
|
+
module RelatonIeee
|
2
|
+
class DataParser
|
3
|
+
DATETYPES = { "OriginalPub" => "created", "ePub" => "published",
|
4
|
+
"LastInspecUpd" => "updated" }.freeze
|
5
|
+
|
6
|
+
attr_reader :doc, :fetcher
|
7
|
+
|
8
|
+
#
|
9
|
+
# Create RelatonIeee::DataParser instance
|
10
|
+
#
|
11
|
+
# @param [Nokogiri::XML::Element] doc document
|
12
|
+
# @param [RelatonIeee::DataFetcher] fetcher
|
13
|
+
#
|
14
|
+
def initialize(doc, fetcher)
|
15
|
+
@doc = doc
|
16
|
+
@fetcher = fetcher
|
17
|
+
end
|
18
|
+
|
19
|
+
#
|
20
|
+
# Parse IEEE document
|
21
|
+
#
|
22
|
+
# @param [Nokogiri::XML::Element] doc document
|
23
|
+
# @param [RelatonIeee::DataFetcher] fetcher <description>
|
24
|
+
#
|
25
|
+
# @return [RelatonIeee::IeeeBibliographicItem]
|
26
|
+
#
|
27
|
+
def self.parse(doc, fetcher)
|
28
|
+
new(doc, fetcher).parse
|
29
|
+
end
|
30
|
+
|
31
|
+
#
|
32
|
+
# Parse IEEE document
|
33
|
+
#
|
34
|
+
# @return [RelatonIeee::IeeeBibliographicItem]
|
35
|
+
#
|
36
|
+
def parse # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
37
|
+
args = {
|
38
|
+
type: "standard",
|
39
|
+
docnumber: docnumber,
|
40
|
+
title: parse_title,
|
41
|
+
date: parse_date,
|
42
|
+
docid: parse_docid,
|
43
|
+
contributor: parse_contributor,
|
44
|
+
abstract: parse_abstract,
|
45
|
+
copyright: parse_copyright,
|
46
|
+
language: ["en"],
|
47
|
+
script: ["Latn"],
|
48
|
+
status: parse_status,
|
49
|
+
relation: parse_relation,
|
50
|
+
link: parse_link,
|
51
|
+
keyword: parse_keyword,
|
52
|
+
ics: parse_ics,
|
53
|
+
}
|
54
|
+
IeeeBibliographicItem.new(**args)
|
55
|
+
end
|
56
|
+
|
57
|
+
#
|
58
|
+
# Parse title
|
59
|
+
#
|
60
|
+
# @return [RelatonBib::TypedTitleStringCollection]
|
61
|
+
#
|
62
|
+
def parse_title
|
63
|
+
t = doc.at("./volume/article/title").text
|
64
|
+
RelatonBib::TypedTitleString.from_string t
|
65
|
+
end
|
66
|
+
|
67
|
+
#
|
68
|
+
# Parse date
|
69
|
+
#
|
70
|
+
# @return [Array<RelatonBib::BibliographicDate>]
|
71
|
+
#
|
72
|
+
def parse_date # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength
|
73
|
+
dates = doc.xpath("./volume/article/articleinfo/date").map do |d|
|
74
|
+
da = [d.at("./year").text]
|
75
|
+
m = d.at("./month")&.text
|
76
|
+
if m
|
77
|
+
month = Date::ABBR_MONTHNAMES.index(m.sub(/\./, "")) || m
|
78
|
+
da << month.to_s.rjust(2, "0")
|
79
|
+
end
|
80
|
+
day = d.at("./day")
|
81
|
+
da << day.text.rjust(2, "0") if day
|
82
|
+
on = da.compact.join "-"
|
83
|
+
RelatonBib::BibliographicDate.new type: DATETYPES[d[:datetype]], on: on
|
84
|
+
end
|
85
|
+
pad = doc.at("./publicationinfo/PubApprovalDate")
|
86
|
+
if pad
|
87
|
+
issued = parse_date_string pad.text
|
88
|
+
dates << RelatonBib::BibliographicDate.new(type: "issued", on: issued)
|
89
|
+
end
|
90
|
+
dates
|
91
|
+
end
|
92
|
+
|
93
|
+
#
|
94
|
+
# Convert date string with month name to numeric date
|
95
|
+
#
|
96
|
+
# @param [String] date source date
|
97
|
+
#
|
98
|
+
# @return [String] numeric date
|
99
|
+
#
|
100
|
+
def parse_date_string(date)
|
101
|
+
case date
|
102
|
+
when /^\d{4}$/ then date
|
103
|
+
when /^\d{1,2}\s\w+\.?\s\d{4}/ then Date.parse(date).to_s
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
#
|
108
|
+
# Parse identifiers
|
109
|
+
#
|
110
|
+
# @return [Array<RelatonBib::DocumentIdentifier>]
|
111
|
+
#
|
112
|
+
def parse_docid
|
113
|
+
ids = [{ id: doc.at("./title").text, type: "IEEE" }]
|
114
|
+
isbn = doc.at("./publicationinfo/isbn")
|
115
|
+
ids << { id: isbn.text, type: "ISBN" } if isbn
|
116
|
+
doi = doc.at("./volume/article/articleinfo/articledoi")
|
117
|
+
ids << { id: doi.text, type: "DOI" } if doi
|
118
|
+
ids.map do |dcid|
|
119
|
+
RelatonBib::DocumentIdentifier.new(**dcid)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
#
|
124
|
+
# Parse docnumber
|
125
|
+
#
|
126
|
+
# @return [String] PubID
|
127
|
+
#
|
128
|
+
def docnumber
|
129
|
+
@docnumber ||= doc.at("./publicationinfo/stdnumber").text
|
130
|
+
end
|
131
|
+
|
132
|
+
#
|
133
|
+
# Parse contributors
|
134
|
+
#
|
135
|
+
# @return [Array<RelatonBib::ContributionInfo>]
|
136
|
+
#
|
137
|
+
def parse_contributor # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
138
|
+
doc.xpath("./publicationinfo/publisher").map do |contrib|
|
139
|
+
n = contrib.at("./publishername").text
|
140
|
+
addr = contrib.xpath("./address").map do |a|
|
141
|
+
RelatonBib::Address.new(
|
142
|
+
street: [],
|
143
|
+
city: a.at("./city")&.text,
|
144
|
+
country: a.at("./country").text,
|
145
|
+
)
|
146
|
+
end
|
147
|
+
e = create_org n, addr
|
148
|
+
RelatonBib::ContributionInfo.new entity: e, role: [type: "publisher"]
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
#
|
153
|
+
# Create organization
|
154
|
+
#
|
155
|
+
# @param [String] name organization's name
|
156
|
+
# @param [Array<Hash>] addr address
|
157
|
+
#
|
158
|
+
# @return [RelatonBib::Organization]
|
159
|
+
def create_org(name, addr = []) # rubocop:disable Metrics/MethodLength
|
160
|
+
case name
|
161
|
+
when "IEEE"
|
162
|
+
abbr = name
|
163
|
+
n = "Institute of Electrical and Electronics Engineers"
|
164
|
+
url = "http://www.ieee.org"
|
165
|
+
when "ANSI"
|
166
|
+
abbr = name
|
167
|
+
n = "American National Standards Institute"
|
168
|
+
url = "https://www.ansi.org"
|
169
|
+
else n = name
|
170
|
+
end
|
171
|
+
RelatonBib::Organization.new(
|
172
|
+
name: n, abbreviation: abbr, url: url, contact: addr,
|
173
|
+
)
|
174
|
+
end
|
175
|
+
|
176
|
+
#
|
177
|
+
# Parse abstract
|
178
|
+
#
|
179
|
+
# @return [Array<RelatonBib::FormattedString>]
|
180
|
+
#
|
181
|
+
def parse_abstract
|
182
|
+
doc.xpath("./volume/article/articleinfo/abstract").map do |a|
|
183
|
+
RelatonBib::FormattedString.new(
|
184
|
+
content: a.text, language: "en", script: "Latn",
|
185
|
+
)
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
#
|
190
|
+
# Parse copyright
|
191
|
+
#
|
192
|
+
# @return [Array<RelatonBib::CopyrightAssociation>]
|
193
|
+
#
|
194
|
+
def parse_copyright
|
195
|
+
doc.xpath("./publicationinfo/copyrightgroup/copyright").map do |c|
|
196
|
+
owner = c.at("./holder").text.split("/").map do |own|
|
197
|
+
RelatonBib::ContributionInfo.new entity: create_org(own)
|
198
|
+
end
|
199
|
+
RelatonBib::CopyrightAssociation.new(
|
200
|
+
owner: owner, from: c.at("./year").text,
|
201
|
+
)
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
#
|
206
|
+
# Parse status
|
207
|
+
#
|
208
|
+
# @return [RelatonBib::DocumentStatus]
|
209
|
+
#
|
210
|
+
def parse_status
|
211
|
+
stage = doc.at("./publicationinfo/standard_status").text
|
212
|
+
RelatonBib::DocumentStatus.new stage: stage
|
213
|
+
end
|
214
|
+
|
215
|
+
#
|
216
|
+
# Parse relation
|
217
|
+
#
|
218
|
+
# @return [RelatonBib::DocRelationCollection]
|
219
|
+
#
|
220
|
+
def parse_relation # rubocop:disable Metrics/AbcSize
|
221
|
+
rels = []
|
222
|
+
doc.xpath("./publicationinfo/standard_relationship").each do |r|
|
223
|
+
if (ref = fetcher.backrefs[r.text])
|
224
|
+
rel = fetcher.create_relation(r[:type], ref)
|
225
|
+
rels << rel if rel
|
226
|
+
elsif !/Inactive Date/.match?(r) then fetcher.add_crossref(docnumber, r)
|
227
|
+
end
|
228
|
+
end
|
229
|
+
RelatonBib::DocRelationCollection.new rels
|
230
|
+
end
|
231
|
+
|
232
|
+
#
|
233
|
+
# Parce link
|
234
|
+
#
|
235
|
+
# @return [Array<RelatonBib::TypedUri>]
|
236
|
+
#
|
237
|
+
def parse_link
|
238
|
+
doc.xpath("./volume/article/articleinfo/amsid").map do |id|
|
239
|
+
l = "https://ieeexplore.ieee.org/document/#{id.text}"
|
240
|
+
RelatonBib::TypedUri.new content: l, type: "src"
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
#
|
245
|
+
# Parse keyword
|
246
|
+
#
|
247
|
+
# @return [Array<Strign>]
|
248
|
+
#
|
249
|
+
def parse_keyword
|
250
|
+
doc.xpath(
|
251
|
+
"./volume/article/articleinfo/keywordset/keyword/keywordterm",
|
252
|
+
).map &:text
|
253
|
+
end
|
254
|
+
|
255
|
+
#
|
256
|
+
# Parse ICS
|
257
|
+
#
|
258
|
+
# @return [Array<RelatonBib::ICS>]
|
259
|
+
#
|
260
|
+
def parse_ics
|
261
|
+
doc.xpath("./publicationinfo/icscodes/code_term").map do |ics|
|
262
|
+
RelatonBib::ICS.new code: ics[:codenum], text: ics.text
|
263
|
+
end
|
264
|
+
end
|
265
|
+
end
|
266
|
+
end
|
@@ -15,7 +15,7 @@ module RelatonIeee
|
|
15
15
|
# @param opts [Hash]
|
16
16
|
def initialize(ref) # rubocop:disable Metrics/MethodLength
|
17
17
|
super
|
18
|
-
code = ref.sub
|
18
|
+
code = ref.sub(/^IEEE\s(Std\s)?/, "")
|
19
19
|
search = CGI.escape({ data: { searchTerm: code } }.to_json)
|
20
20
|
url = "#{DOMAIN}/bin/standards/search?data=#{search}"
|
21
21
|
resp = Faraday.get url
|
@@ -23,7 +23,7 @@ module RelatonIeee
|
|
23
23
|
json = JSON.parse resp_json["message"]
|
24
24
|
@array = json["response"]["searchResults"]["resultsMapList"]
|
25
25
|
.reduce([]) do |s, hit|
|
26
|
-
/^(?:\w+\s)?(?<id>[A-Z\d
|
26
|
+
/^(?:\w+\s)?(?<id>[A-Z\d.]+)(?:-(?<year>\d{4}))?/ =~ hit["record"]["recordTitle"]
|
27
27
|
next s unless id && code =~ %r{^#{id}}
|
28
28
|
|
29
29
|
s << Hit.new(hit["record"].merge(code: id, year: year.to_i), self)
|
@@ -5,7 +5,7 @@ module RelatonIeee
|
|
5
5
|
|
6
6
|
# @param committee [Array<RelatonIeee::Committee>]
|
7
7
|
def initialize(**args)
|
8
|
-
@committee = args.delete
|
8
|
+
@committee = args.delete(:committee) || []
|
9
9
|
super
|
10
10
|
end
|
11
11
|
|
@@ -13,7 +13,7 @@ module RelatonIeee
|
|
13
13
|
# @return [RelatonIeee::IeeeBibliographicItem]
|
14
14
|
def self.from_hash(hash)
|
15
15
|
item_hash = ::RelatonIeee::HashConverter.hash_to_bib(hash)
|
16
|
-
new
|
16
|
+
new(**item_hash)
|
17
17
|
end
|
18
18
|
|
19
19
|
# @param opts [Hash]
|
@@ -22,7 +22,7 @@ module RelatonIeee
|
|
22
22
|
# @option opts [String] :lang language
|
23
23
|
# @return [String] XML
|
24
24
|
def to_xml(**opts)
|
25
|
-
super
|
25
|
+
super(**opts) do |bldr|
|
26
26
|
if opts[:bibdata] && committee.any?
|
27
27
|
bldr.ext do |b|
|
28
28
|
committee.each { |c| c.to_xml b }
|
@@ -34,7 +34,7 @@ module RelatonIeee
|
|
34
34
|
# @return [Hash]
|
35
35
|
def to_hash
|
36
36
|
hash = super
|
37
|
-
hash["committee"] = committee.map &:to_hash
|
37
|
+
hash["committee"] = committee.map &:to_hash if committee.any?
|
38
38
|
hash
|
39
39
|
end
|
40
40
|
|
@@ -19,7 +19,7 @@ module RelatonIeee
|
|
19
19
|
warn "[relaton-ieee] (\"#{code}\") fetching..."
|
20
20
|
result = search(code) || (return nil)
|
21
21
|
year ||= code.match(/(?<=-)\d{4}/)&.to_s
|
22
|
-
ret = bib_results_filter(result, year)
|
22
|
+
ret = bib_results_filter(result, code, year)
|
23
23
|
if ret[:ret]
|
24
24
|
item = ret[:ret].fetch
|
25
25
|
warn "[relaton-ieee] (\"#{code}\") found #{item.docidentifier.first.id}"
|
@@ -42,9 +42,13 @@ module RelatonIeee
|
|
42
42
|
# @param opts [Hash] options
|
43
43
|
#
|
44
44
|
# @return [Hash]
|
45
|
-
def bib_results_filter(result, year)
|
45
|
+
def bib_results_filter(result, ref, year)
|
46
|
+
rp1 = ref_parts ref
|
46
47
|
missed_years = []
|
47
48
|
result.each do |hit|
|
49
|
+
rp2 = ref_parts hit.hit["recordTitle"]
|
50
|
+
next if rp1[:code] != rp2[:code] || rp1[:corr] != rp2[:corr]
|
51
|
+
|
48
52
|
return { ret: hit } if !year
|
49
53
|
|
50
54
|
return { ret: hit } if year.to_i == hit.hit[:year]
|
@@ -54,6 +58,15 @@ module RelatonIeee
|
|
54
58
|
{ years: missed_years.uniq }
|
55
59
|
end
|
56
60
|
|
61
|
+
def ref_parts(ref)
|
62
|
+
%r{
|
63
|
+
^(?:IEEE\s(?:Std\s)?)?
|
64
|
+
(?<code>[^-/]+)
|
65
|
+
(?:-(?<year>\d{4}))?
|
66
|
+
(?:/(?<corr>\w+\s\d+-\d{4}))?
|
67
|
+
}x.match ref
|
68
|
+
end
|
69
|
+
|
57
70
|
# @param code [Strig]
|
58
71
|
# @param year [String]
|
59
72
|
# @param missed_years [Array<Strig>]
|
@@ -4,11 +4,12 @@ module RelatonIeee
|
|
4
4
|
class Processor < Relaton::Processor
|
5
5
|
attr_reader :idtype
|
6
6
|
|
7
|
-
def initialize
|
7
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
8
8
|
@short = :relaton_ieee
|
9
9
|
@prefix = "IEEE"
|
10
10
|
@defaultprefix = %r{^IEEE\s}
|
11
11
|
@idtype = "IEEE"
|
12
|
+
@datasets = %w[ieee-rawbib]
|
12
13
|
end
|
13
14
|
|
14
15
|
# @param code [String]
|
@@ -19,6 +20,18 @@ module RelatonIeee
|
|
19
20
|
::RelatonIeee::IeeeBibliography.get(code, date, opts)
|
20
21
|
end
|
21
22
|
|
23
|
+
#
|
24
|
+
# Fetch all the documents from ./iee-rawbib directory
|
25
|
+
#
|
26
|
+
# @param [String] _source source name
|
27
|
+
# @param [Hash] opts
|
28
|
+
# @option opts [String] :output directory to output documents
|
29
|
+
# @option opts [String] :format
|
30
|
+
#
|
31
|
+
def fetch_data(_source, opts)
|
32
|
+
DataFetcher.fetch(**opts)
|
33
|
+
end
|
34
|
+
|
22
35
|
# @param xml [String]
|
23
36
|
# @return [RelatonIeee::IeeeBibliographicItem]
|
24
37
|
def from_xml(xml)
|
data/lib/relaton_ieee/version.rb
CHANGED
data/lib/relaton_ieee.rb
CHANGED
@@ -9,7 +9,9 @@ require "relaton_ieee/scrapper"
|
|
9
9
|
require "relaton_ieee/ieee_bibliographic_item"
|
10
10
|
require "relaton_ieee/committee"
|
11
11
|
require "relaton_ieee/xml_parser"
|
12
|
+
require "relaton_ieee/bibxml_parser"
|
12
13
|
require "relaton_ieee/hash_converter"
|
14
|
+
require "relaton_ieee/data_fetcher"
|
13
15
|
|
14
16
|
module RelatonIeee
|
15
17
|
class Error < StandardError; end
|