relaton-ieee 1.9.1 → 1.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +2 -0
- data/README.adoc +10 -0
- data/grammars/basicdoc.rng +26 -7
- data/grammars/biblio.rng +2 -2
- data/grammars/isodoc.rng +617 -89
- data/grammars/reqt.rng +34 -5
- data/lib/relaton_ieee/bibxml_parser.rb +23 -0
- data/lib/relaton_ieee/data_fetcher.rb +30 -7
- data/lib/relaton_ieee/data_parser.rb +18 -6
- data/lib/relaton_ieee/hit_collection.rb +18 -14
- data/lib/relaton_ieee/ieee_bibliography.rb +3 -3
- data/lib/relaton_ieee/pub_id.rb +149 -0
- data/lib/relaton_ieee/rawbib_id_parser.rb +515 -0
- data/lib/relaton_ieee/scrapper.rb +40 -47
- data/lib/relaton_ieee/version.rb +1 -1
- data/lib/relaton_ieee.rb +1 -0
- data/relaton_ieee.gemspec +1 -1
- metadata +7 -4
data/grammars/reqt.rng
CHANGED
@@ -58,15 +58,23 @@
|
|
58
58
|
<optional>
|
59
59
|
<attribute name="type"/>
|
60
60
|
</optional>
|
61
|
+
<optional>
|
62
|
+
<attribute name="tag"/>
|
63
|
+
</optional>
|
64
|
+
<optional>
|
65
|
+
<attribute name="multilingual-rendering">
|
66
|
+
<ref name="MultilingualRenderingType"/>
|
67
|
+
</attribute>
|
68
|
+
</optional>
|
61
69
|
<optional>
|
62
70
|
<ref name="reqtitle"/>
|
63
71
|
</optional>
|
64
72
|
<optional>
|
65
73
|
<ref name="label"/>
|
66
74
|
</optional>
|
67
|
-
<
|
75
|
+
<zeroOrMore>
|
68
76
|
<ref name="subject"/>
|
69
|
-
</
|
77
|
+
</zeroOrMore>
|
70
78
|
<zeroOrMore>
|
71
79
|
<ref name="reqinherit"/>
|
72
80
|
</zeroOrMore>
|
@@ -80,6 +88,7 @@
|
|
80
88
|
<ref name="verification"/>
|
81
89
|
<ref name="import"/>
|
82
90
|
<ref name="description"/>
|
91
|
+
<ref name="component"/>
|
83
92
|
</choice>
|
84
93
|
</zeroOrMore>
|
85
94
|
<optional>
|
@@ -100,17 +109,23 @@
|
|
100
109
|
</define>
|
101
110
|
<define name="label">
|
102
111
|
<element name="label">
|
103
|
-
<
|
112
|
+
<oneOrMore>
|
113
|
+
<ref name="TextElement"/>
|
114
|
+
</oneOrMore>
|
104
115
|
</element>
|
105
116
|
</define>
|
106
117
|
<define name="subject">
|
107
118
|
<element name="subject">
|
108
|
-
<
|
119
|
+
<oneOrMore>
|
120
|
+
<ref name="TextElement"/>
|
121
|
+
</oneOrMore>
|
109
122
|
</element>
|
110
123
|
</define>
|
111
124
|
<define name="reqinherit">
|
112
125
|
<element name="inherit">
|
113
|
-
<
|
126
|
+
<oneOrMore>
|
127
|
+
<ref name="TextElement"/>
|
128
|
+
</oneOrMore>
|
114
129
|
</element>
|
115
130
|
</define>
|
116
131
|
<define name="measurementtarget">
|
@@ -138,6 +153,12 @@
|
|
138
153
|
<ref name="RequirementSubpart"/>
|
139
154
|
</element>
|
140
155
|
</define>
|
156
|
+
<define name="component">
|
157
|
+
<element name="component">
|
158
|
+
<attribute name="class"/>
|
159
|
+
<ref name="RequirementSubpart"/>
|
160
|
+
</element>
|
161
|
+
</define>
|
141
162
|
<define name="reqt_references">
|
142
163
|
<element name="references">
|
143
164
|
<oneOrMore>
|
@@ -164,6 +185,14 @@
|
|
164
185
|
<data type="boolean"/>
|
165
186
|
</attribute>
|
166
187
|
</optional>
|
188
|
+
<optional>
|
189
|
+
<attribute name="tag"/>
|
190
|
+
</optional>
|
191
|
+
<optional>
|
192
|
+
<attribute name="multilingual-rendering">
|
193
|
+
<ref name="MultilingualRenderingType"/>
|
194
|
+
</attribute>
|
195
|
+
</optional>
|
167
196
|
<oneOrMore>
|
168
197
|
<ref name="BasicBlock"/>
|
169
198
|
</oneOrMore>
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module RelatonIeee
|
2
|
+
module BibXMLParser
|
3
|
+
extend RelatonBib::BibXMLParser
|
4
|
+
extend BibXMLParser
|
5
|
+
|
6
|
+
# @param attrs [Hash]
|
7
|
+
# @return [RelatonBib::IetfBibliographicItem]
|
8
|
+
def bib_item(**attrs)
|
9
|
+
IeeeBibliographicItem.new(**attrs)
|
10
|
+
end
|
11
|
+
|
12
|
+
#
|
13
|
+
# Return PubID type
|
14
|
+
#
|
15
|
+
# @param [String] _ docidentifier
|
16
|
+
#
|
17
|
+
# @return [String] type
|
18
|
+
#
|
19
|
+
def pubid_type(_)
|
20
|
+
"IEEE"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require "zip"
|
2
2
|
require "relaton_ieee/data_parser"
|
3
|
+
require "relaton_ieee/rawbib_id_parser"
|
3
4
|
|
4
5
|
module RelatonIeee
|
5
6
|
class DataFetcher
|
@@ -27,8 +28,10 @@ module RelatonIeee
|
|
27
28
|
def initialize(output, format)
|
28
29
|
@output = output
|
29
30
|
@format = format
|
31
|
+
@ext = format.sub(/^bib/, "")
|
30
32
|
@crossrefs = {}
|
31
33
|
@backrefs = {}
|
34
|
+
# @normtitles = []
|
32
35
|
end
|
33
36
|
|
34
37
|
#
|
@@ -63,6 +66,7 @@ module RelatonIeee
|
|
63
66
|
warn e.message
|
64
67
|
warn e.backtrace
|
65
68
|
end
|
69
|
+
# File.write "normtitles.txt", @normtitles.join("\n")
|
66
70
|
update_relations
|
67
71
|
end
|
68
72
|
|
@@ -86,20 +90,33 @@ module RelatonIeee
|
|
86
90
|
# @param [String] xml content
|
87
91
|
# @param [String] filename source file
|
88
92
|
#
|
89
|
-
def fetch_doc(xml, filename) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
93
|
+
def fetch_doc(xml, filename) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
90
94
|
doc = Nokogiri::XML(xml).at("/publication")
|
91
95
|
unless doc
|
92
96
|
warn "Empty file: #{filename}"
|
93
97
|
return
|
94
98
|
end
|
99
|
+
stdid = doc.at("./publicationinfo/standard_id").text
|
100
|
+
if stdid == "0"
|
101
|
+
# nt = doc&.at("./normtitle")&.text
|
102
|
+
# ntid = @normtitles.index nt
|
103
|
+
# @normtitles << nt if nt && !ntid
|
104
|
+
warn "Zero standard_id in #{filename}"
|
105
|
+
return
|
106
|
+
end
|
95
107
|
bib = DataParser.parse doc, self
|
108
|
+
if bib.docnumber.nil?
|
109
|
+
nt = doc&.at("./normtitle")&.text
|
110
|
+
warn "PubID parse error. Normtitle: #{nt}, file: #{filename}"
|
111
|
+
return
|
112
|
+
end
|
96
113
|
amsid = doc.at("./publicationinfo/amsid").text
|
97
114
|
if backrefs.value?(bib.docidentifier[0].id) && /updates\.\d+/ !~ filename
|
98
115
|
oamsid = backrefs.key bib.docidentifier[0].id
|
99
116
|
warn "Document exists ID: \"#{bib.docidentifier[0].id}\" AMSID: "\
|
100
117
|
"\"#{amsid}\" source: \"#{filename}\". Other AMSID: \"#{oamsid}\""
|
101
|
-
if bib.docidentifier[0].id.include?(
|
102
|
-
save_doc bib # rewrite file if the PubID
|
118
|
+
if bib.docidentifier[0].id.include?(doc.at("./publicationinfo/stdnumber").text)
|
119
|
+
save_doc bib # rewrite file if the PubID matches to the stdnumber
|
103
120
|
backrefs[amsid] = bib.docidentifier[0].id
|
104
121
|
end
|
105
122
|
else
|
@@ -130,7 +147,11 @@ module RelatonIeee
|
|
130
147
|
# @param [RelatonIeee::IeeeBibliographicItem] bib
|
131
148
|
#
|
132
149
|
def save_doc(bib)
|
133
|
-
c = @format
|
150
|
+
c = case @format
|
151
|
+
when "xml" then bib.to_xml(bibdata: true)
|
152
|
+
when "yaml" then bib.to_hash.to_yaml
|
153
|
+
else bib.send("to_#{@format}")
|
154
|
+
end
|
134
155
|
File.write file_name(bib.docnumber), c, encoding: "UTF-8"
|
135
156
|
end
|
136
157
|
|
@@ -142,8 +163,8 @@ module RelatonIeee
|
|
142
163
|
# @return [String] filename
|
143
164
|
#
|
144
165
|
def file_name(docnumber)
|
145
|
-
name = docnumber.gsub(/[
|
146
|
-
File.join @output, "#{name}.#{@
|
166
|
+
name = docnumber.gsub(/\s-/, "-").gsub(/[\s,:\/]/, "_").squeeze("_").upcase
|
167
|
+
File.join @output, "#{name}.#{@ext}"
|
147
168
|
end
|
148
169
|
|
149
170
|
#
|
@@ -198,7 +219,9 @@ module RelatonIeee
|
|
198
219
|
#
|
199
220
|
def read_bib(docnumber)
|
200
221
|
c = File.read file_name(docnumber), encoding: "UTF-8"
|
201
|
-
|
222
|
+
case @format
|
223
|
+
when "xml" then XMLParser.from_xml c
|
224
|
+
when "bibxml" then BibXMLParser.parse c
|
202
225
|
else IeeeBibliographicItem.from_hash YAML.safe_load(c)
|
203
226
|
end
|
204
227
|
end
|
@@ -57,11 +57,15 @@ module RelatonIeee
|
|
57
57
|
#
|
58
58
|
# Parse title
|
59
59
|
#
|
60
|
-
# @return [RelatonBib::
|
60
|
+
# @return [Array<RelatonBib::TypedTitleString>]
|
61
61
|
#
|
62
62
|
def parse_title
|
63
|
-
t =
|
64
|
-
|
63
|
+
t = []
|
64
|
+
content = doc.at("./volume/article/title").text
|
65
|
+
if content =~ /\A(.+)\s-\sredline\z/i
|
66
|
+
t << RelatonBib::TypedTitleString.new(content: $1, type: "title-main")
|
67
|
+
end
|
68
|
+
t << RelatonBib::TypedTitleString.new(content: content, type: "main")
|
65
69
|
end
|
66
70
|
|
67
71
|
#
|
@@ -110,7 +114,7 @@ module RelatonIeee
|
|
110
114
|
# @return [Array<RelatonBib::DocumentIdentifier>]
|
111
115
|
#
|
112
116
|
def parse_docid
|
113
|
-
ids = [{ id:
|
117
|
+
ids = [{ id: pubid.to_s, type: "IEEE" }]
|
114
118
|
isbn = doc.at("./publicationinfo/isbn")
|
115
119
|
ids << { id: isbn.text, type: "ISBN" } if isbn
|
116
120
|
doi = doc.at("./volume/article/articleinfo/articledoi")
|
@@ -120,13 +124,20 @@ module RelatonIeee
|
|
120
124
|
end
|
121
125
|
end
|
122
126
|
|
127
|
+
def pubid
|
128
|
+
@pubid ||= begin
|
129
|
+
nt = doc.at("./normtitle").text
|
130
|
+
RawbibIdParser.parse(nt)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
123
134
|
#
|
124
135
|
# Parse docnumber
|
125
136
|
#
|
126
137
|
# @return [String] PubID
|
127
138
|
#
|
128
139
|
def docnumber
|
129
|
-
@docnumber ||= doc.at("./publicationinfo/stdnumber").text
|
140
|
+
@docnumber ||= pubid&.to_id # doc.at("./publicationinfo/stdnumber").text
|
130
141
|
end
|
131
142
|
|
132
143
|
#
|
@@ -223,7 +234,8 @@ module RelatonIeee
|
|
223
234
|
if (ref = fetcher.backrefs[r.text])
|
224
235
|
rel = fetcher.create_relation(r[:type], ref)
|
225
236
|
rels << rel if rel
|
226
|
-
elsif !/Inactive Date/.match?(r)
|
237
|
+
elsif !/Inactive Date/.match?(r) && docnumber
|
238
|
+
fetcher.add_crossref(docnumber, r)
|
227
239
|
end
|
228
240
|
end
|
229
241
|
RelatonBib::DocRelationCollection.new rels
|
@@ -11,23 +11,27 @@ module RelatonIeee
|
|
11
11
|
|
12
12
|
# rubocop:disable Metrics/AbcSize
|
13
13
|
|
14
|
-
# @param
|
14
|
+
# @param reference [Strig]
|
15
15
|
# @param opts [Hash]
|
16
|
-
def initialize(
|
16
|
+
def initialize(reference) # rubocop:disable Metrics/MethodLength
|
17
17
|
super
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
resp = Faraday.
|
22
|
-
|
23
|
-
|
24
|
-
@array = json["
|
25
|
-
|
26
|
-
|
27
|
-
|
18
|
+
code1 = reference.sub(/^IEEE\s(Std\s)?/, "")
|
19
|
+
url = "#{DOMAIN}/wp-admin/admin-ajax.php"
|
20
|
+
query = reference.gsub("/", " ")
|
21
|
+
resp = Faraday.post url, { action: "ieee_cloudsearch", q: query }
|
22
|
+
json = JSON.parse resp.body
|
23
|
+
html = Nokogiri::HTML json["html"]
|
24
|
+
# @array = json["results"]["hits"]["hit"].reduce([]) do |s, hit|
|
25
|
+
@array = html.xpath("//h4/a").reduce([]) do |s, hit|
|
26
|
+
# fields = hit["fields"]
|
27
|
+
# ref = html.at("//h4/a[@href='#{fields['doc_id_l']}']").text.strip
|
28
|
+
ref = hit.text.strip
|
29
|
+
/^(?:\w+\s)?(?<code2>[A-Z\d.]+)(?:-(?<year>\d{4}))?/ =~ ref
|
30
|
+
next s unless code2 && code1 =~ %r{^#{code2}}
|
28
31
|
|
29
|
-
|
30
|
-
|
32
|
+
hit_data = { ref: ref, year: year.to_i, url: hit[:href] }
|
33
|
+
s << Hit.new(hit_data, self)
|
34
|
+
end.sort_by { |h| h.hit[:year].to_s + h.hit[:url] }.reverse
|
31
35
|
end
|
32
36
|
# rubocop:enable Metrics/AbcSize
|
33
37
|
end
|
@@ -15,7 +15,7 @@ module RelatonIeee
|
|
15
15
|
#
|
16
16
|
# @return [Hash, NilClass] returns { ret: RelatonBib::BibliographicItem }
|
17
17
|
# if document is found else returns NilClass
|
18
|
-
def get(code, year = nil, _opts = {})
|
18
|
+
def get(code, year = nil, _opts = {}) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
19
19
|
warn "[relaton-ieee] (\"#{code}\") fetching..."
|
20
20
|
result = search(code) || (return nil)
|
21
21
|
year ||= code.match(/(?<=-)\d{4}/)&.to_s
|
@@ -42,11 +42,11 @@ module RelatonIeee
|
|
42
42
|
# @param opts [Hash] options
|
43
43
|
#
|
44
44
|
# @return [Hash]
|
45
|
-
def bib_results_filter(result, ref, year)
|
45
|
+
def bib_results_filter(result, ref, year) # rubocop:disable Metrics/AbcSize
|
46
46
|
rp1 = ref_parts ref
|
47
47
|
missed_years = []
|
48
48
|
result.each do |hit|
|
49
|
-
rp2 = ref_parts hit.hit[
|
49
|
+
rp2 = ref_parts hit.hit[:ref]
|
50
50
|
next if rp1[:code] != rp2[:code] || rp1[:corr] != rp2[:corr]
|
51
51
|
|
52
52
|
return { ret: hit } if !year
|
@@ -0,0 +1,149 @@
|
|
1
|
+
module RelatonIeee
|
2
|
+
class PubId
|
3
|
+
class Id
|
4
|
+
# @return [String]
|
5
|
+
attr_reader :number
|
6
|
+
|
7
|
+
# @return [String, nil]
|
8
|
+
attr_reader :publisher, :stage, :part, :status, :approval, :edition,
|
9
|
+
:draft, :rev, :corr, :amd, :redline, :year, :month
|
10
|
+
|
11
|
+
#
|
12
|
+
# PubId constructor
|
13
|
+
#
|
14
|
+
# @param [String] number
|
15
|
+
# @param [<Hash>] **args
|
16
|
+
# @option args [String] :number
|
17
|
+
# @option args [String] :publisher
|
18
|
+
# @option args [String] :stage
|
19
|
+
# @option args [String] :part
|
20
|
+
# @option args [String] :status
|
21
|
+
# @option args [String] :approval
|
22
|
+
# @option args [String] :edition
|
23
|
+
# @option args [String] :draft
|
24
|
+
# @option args [String] :rev
|
25
|
+
# @option args [String] :corr
|
26
|
+
# @option args [String] :amd
|
27
|
+
# @option args [Boolean] :redline
|
28
|
+
# @option args [String] :year
|
29
|
+
# @option args [String] :month
|
30
|
+
#
|
31
|
+
def initialize(number:, **args) # rubocop:disable Metrics/MethodLength
|
32
|
+
@publisher = args[:publisher]
|
33
|
+
@stage = args[:stage]
|
34
|
+
@number = number
|
35
|
+
@part = args[:part]
|
36
|
+
@status = args[:status]
|
37
|
+
@approval = args[:approval]
|
38
|
+
@edition = args[:edition]
|
39
|
+
@draft = args[:draft]
|
40
|
+
@rev = args[:rev]
|
41
|
+
@corr = args[:corr]
|
42
|
+
@amd = args[:amd]
|
43
|
+
@year = args[:year]
|
44
|
+
@month = args[:month]
|
45
|
+
@redline = args[:redline]
|
46
|
+
end
|
47
|
+
|
48
|
+
#
|
49
|
+
# PubId string representation
|
50
|
+
#
|
51
|
+
# @return [String]
|
52
|
+
#
|
53
|
+
def to_s # rubocop:disable Metrics/AbcSize
|
54
|
+
out = number
|
55
|
+
out = "#{stage} #{out}" if stage
|
56
|
+
out = "#{approval} #{out}" if approval
|
57
|
+
out = "#{status} #{out}" if status
|
58
|
+
out = "#{publisher} #{out}" if publisher
|
59
|
+
out += "-#{part}" if part
|
60
|
+
out += edition_to_s + draft_to_s + rev_to_s + corr_to_s + amd_to_s
|
61
|
+
out + year_to_s + month_to_s + redline_to_s
|
62
|
+
end
|
63
|
+
|
64
|
+
def edition_to_s
|
65
|
+
edition ? "/E-#{edition}" : ""
|
66
|
+
end
|
67
|
+
|
68
|
+
def draft_to_s
|
69
|
+
draft ? "/D-#{draft}" : ""
|
70
|
+
end
|
71
|
+
|
72
|
+
def rev_to_s
|
73
|
+
rev ? "/R-#{rev}" : ""
|
74
|
+
end
|
75
|
+
|
76
|
+
def corr_to_s
|
77
|
+
corr ? "/Cor#{corr}" : ""
|
78
|
+
end
|
79
|
+
|
80
|
+
def amd_to_s
|
81
|
+
amd ? "/Amd#{amd}" : ""
|
82
|
+
end
|
83
|
+
|
84
|
+
def year_to_s
|
85
|
+
year ? ".#{year}" : ""
|
86
|
+
end
|
87
|
+
|
88
|
+
def month_to_s
|
89
|
+
month ? "-#{month}" : ""
|
90
|
+
end
|
91
|
+
|
92
|
+
def redline_to_s
|
93
|
+
redline ? " Redline" : ""
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# @return [Array<RelatonIeee::PubId::Id>]
|
98
|
+
attr_reader :pubid
|
99
|
+
|
100
|
+
#
|
101
|
+
# IEEE publication id
|
102
|
+
#
|
103
|
+
# @param [Array<Hash>, Hash] pubid
|
104
|
+
#
|
105
|
+
def initialize(pubid)
|
106
|
+
@pubid = array(pubid).map { |id| Id.new(**id) }
|
107
|
+
end
|
108
|
+
|
109
|
+
#
|
110
|
+
# Convert to array
|
111
|
+
#
|
112
|
+
# @param [Array<Hash>, Hash] pid
|
113
|
+
#
|
114
|
+
# @return [Array<Hash>]
|
115
|
+
#
|
116
|
+
def array(pid)
|
117
|
+
pid.is_a?(Array) ? pid : [pid]
|
118
|
+
end
|
119
|
+
|
120
|
+
#
|
121
|
+
# PubId string representation
|
122
|
+
#
|
123
|
+
# @return [String]
|
124
|
+
#
|
125
|
+
def to_s
|
126
|
+
pubid.map(&:to_s).join("/")
|
127
|
+
end
|
128
|
+
|
129
|
+
#
|
130
|
+
# Generate ID without publisher and second number
|
131
|
+
#
|
132
|
+
# @return [String]
|
133
|
+
#
|
134
|
+
def to_id # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
135
|
+
out = pubid[0].to_s
|
136
|
+
if pubid.size > 1
|
137
|
+
out += pubid[1].edition_to_s if pubid[0].edition.nil?
|
138
|
+
out += pubid[1].draft_to_s if pubid[0].draft.nil?
|
139
|
+
out += pubid[1].rev_to_s if pubid[0].rev.nil?
|
140
|
+
out += pubid[1].corr_to_s if pubid[0].corr.nil?
|
141
|
+
out += pubid[1].amd_to_s if pubid[0].amd.nil?
|
142
|
+
out += pubid[1].year_to_s if pubid[0].year.nil?
|
143
|
+
out += pubid[1].month_to_s if pubid[0].month.nil?
|
144
|
+
out += pubid[1].redline_to_s unless pubid[0].redline
|
145
|
+
end
|
146
|
+
out
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|