relaton-ieee 1.8.0 → 1.9.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +1 -11
- data/.gitignore +1 -0
- data/.rubocop.yml +1 -1
- data/README.adoc +29 -0
- data/grammars/biblio.rng +1 -0
- data/grammars/isodoc.rng +72 -10
- data/lib/relaton_ieee/bibxml_parser.rb +14 -0
- data/lib/relaton_ieee/data_fetcher.rb +229 -0
- data/lib/relaton_ieee/data_parser.rb +274 -0
- data/lib/relaton_ieee/hit_collection.rb +2 -2
- data/lib/relaton_ieee/ieee_bibliographic_item.rb +4 -4
- data/lib/relaton_ieee/ieee_bibliography.rb +15 -2
- data/lib/relaton_ieee/processor.rb +14 -1
- data/lib/relaton_ieee/pub_id.rb +149 -0
- data/lib/relaton_ieee/rawbib_id_parser.rb +515 -0
- data/lib/relaton_ieee/version.rb +1 -1
- data/lib/relaton_ieee.rb +2 -0
- data/relaton_ieee.gemspec +3 -7
- metadata +24 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f5ea9caf8eaf59f616cad6b137ff46fbfd0f28d38ad92c457467a3d19032d2a7
|
4
|
+
data.tar.gz: 88b376e90082b8a6bb730e5a07b96930b292c092ece0ce82850a50a9d9cdeb0e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c5eeb5339a3d598da5509de3ffc2bb9231694f9a9edc6603b40cb2634ded16f6ebee01c0ffb222b624426075c7dfd28382bdfac428077097821215a017680f7f
|
7
|
+
data.tar.gz: 9d8ee0f3c616d91d53ce9bfd54ceaf4a5cd458ac78471e69f7179daf6f5cf225681dadfe409011012f8c4e987e11f87e27a7e82b777aff2138873be5761aa617
|
data/.github/workflows/rake.yml
CHANGED
@@ -16,19 +16,9 @@ jobs:
|
|
16
16
|
strategy:
|
17
17
|
fail-fast: false
|
18
18
|
matrix:
|
19
|
-
ruby: [ '
|
19
|
+
ruby: [ '3.0', '2.7', '2.6', '2.5' ]
|
20
20
|
os: [ ubuntu-latest, windows-latest, macos-latest ]
|
21
21
|
experimental: [ false ]
|
22
|
-
include:
|
23
|
-
- ruby: '3.0'
|
24
|
-
os: 'ubuntu-latest'
|
25
|
-
experimental: true
|
26
|
-
- ruby: '3.0'
|
27
|
-
os: 'windows-latest'
|
28
|
-
experimental: true
|
29
|
-
- ruby: '3.0'
|
30
|
-
os: 'macos-latest'
|
31
|
-
experimental: true
|
32
22
|
steps:
|
33
23
|
- uses: actions/checkout@v2
|
34
24
|
with:
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
data/README.adoc
CHANGED
@@ -138,6 +138,16 @@ RelatonIeee::IeeeBibliography.get("IEEE 528-2019")
|
|
138
138
|
...
|
139
139
|
----
|
140
140
|
|
141
|
+
=== Typed links
|
142
|
+
|
143
|
+
Each IEEE document has `src` type link.
|
144
|
+
|
145
|
+
[source,ruby]
|
146
|
+
----
|
147
|
+
item.link
|
148
|
+
=> [#<RelatonBib::TypedUri:0x00007fe885219ba0 @content=#<Addressable::URI:0x8ac URI:https://standards.ieee.org/standard/528-2019.html>, @type="src">]
|
149
|
+
----
|
150
|
+
|
141
151
|
=== Create bibliographic item from XML
|
142
152
|
[source,ruby]
|
143
153
|
----
|
@@ -158,6 +168,25 @@ hash = YAML.load_file 'spec/fixtures/ieee_528_2019.yaml'
|
|
158
168
|
...
|
159
169
|
----
|
160
170
|
|
171
|
+
=== Fetch data
|
172
|
+
|
173
|
+
There is an IEEE dataset https://github.com/ietf-ribose/ieee-rawbib which can be converted into BibXML/BibYAML formats. The dataset needs to be placed into local directiory.
|
174
|
+
|
175
|
+
The method `RelatonIeee::DataFetcher.fetch(output: "data", format: "yaml")` converts all the documents from the local `ieee-rawbib` directory and save them to the `./data` folder in YAML format.
|
176
|
+
Arguments:
|
177
|
+
|
178
|
+
- `output` - folder to save documents (default './data').
|
179
|
+
- `format` - format in which the documents are saved. Possimle formats are: `yaml`, `xml` (default `yaml`).
|
180
|
+
|
181
|
+
[source,ruby]
|
182
|
+
----
|
183
|
+
RelatonIeee::DataFetcher.fetch
|
184
|
+
Started at: 2021-09-24 17:55:07 +0200
|
185
|
+
Stopped at: 2021-09-24 17:57:30 +0200
|
186
|
+
Done in: 143 sec.
|
187
|
+
=> nil
|
188
|
+
----
|
189
|
+
|
161
190
|
== Development
|
162
191
|
|
163
192
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/grammars/biblio.rng
CHANGED
data/grammars/isodoc.rng
CHANGED
@@ -45,6 +45,11 @@
|
|
45
45
|
<optional>
|
46
46
|
<attribute name="alt"/>
|
47
47
|
</optional>
|
48
|
+
<optional>
|
49
|
+
<attribute name="updatetype">
|
50
|
+
<data type="boolean"/>
|
51
|
+
</attribute>
|
52
|
+
</optional>
|
48
53
|
<text/>
|
49
54
|
</element>
|
50
55
|
</define>
|
@@ -199,6 +204,18 @@
|
|
199
204
|
</zeroOrMore>
|
200
205
|
</element>
|
201
206
|
</define>
|
207
|
+
<define name="dt">
|
208
|
+
<element name="dt">
|
209
|
+
<optional>
|
210
|
+
<attribute name="id">
|
211
|
+
<data type="ID"/>
|
212
|
+
</attribute>
|
213
|
+
</optional>
|
214
|
+
<zeroOrMore>
|
215
|
+
<ref name="TextElement"/>
|
216
|
+
</zeroOrMore>
|
217
|
+
</element>
|
218
|
+
</define>
|
202
219
|
<define name="example">
|
203
220
|
<element name="example">
|
204
221
|
<attribute name="id">
|
@@ -543,6 +560,9 @@
|
|
543
560
|
</define>
|
544
561
|
<define name="BibDataExtensionType">
|
545
562
|
<ref name="doctype"/>
|
563
|
+
<optional>
|
564
|
+
<ref name="docsubtype"/>
|
565
|
+
</optional>
|
546
566
|
<optional>
|
547
567
|
<ref name="editorialgroup"/>
|
548
568
|
</optional>
|
@@ -890,6 +910,14 @@
|
|
890
910
|
</define>
|
891
911
|
</include>
|
892
912
|
<!-- end overrides -->
|
913
|
+
<define name="docsubtype">
|
914
|
+
<element name="subdoctype">
|
915
|
+
<ref name="DocumentSubtype"/>
|
916
|
+
</element>
|
917
|
+
</define>
|
918
|
+
<define name="DocumentSubtype">
|
919
|
+
<text/>
|
920
|
+
</define>
|
893
921
|
<define name="colgroup">
|
894
922
|
<element name="colgroup">
|
895
923
|
<oneOrMore>
|
@@ -939,7 +967,34 @@
|
|
939
967
|
<define name="concept">
|
940
968
|
<element name="concept">
|
941
969
|
<optional>
|
942
|
-
<attribute name="
|
970
|
+
<attribute name="ital">
|
971
|
+
<data type="boolean"/>
|
972
|
+
</attribute>
|
973
|
+
</optional>
|
974
|
+
<optional>
|
975
|
+
<attribute name="ref">
|
976
|
+
<data type="boolean"/>
|
977
|
+
</attribute>
|
978
|
+
</optional>
|
979
|
+
<optional>
|
980
|
+
<element name="refterm">
|
981
|
+
<zeroOrMore>
|
982
|
+
<choice>
|
983
|
+
<ref name="PureTextElement"/>
|
984
|
+
<ref name="stem"/>
|
985
|
+
</choice>
|
986
|
+
</zeroOrMore>
|
987
|
+
</element>
|
988
|
+
</optional>
|
989
|
+
<optional>
|
990
|
+
<element name="renderterm">
|
991
|
+
<zeroOrMore>
|
992
|
+
<choice>
|
993
|
+
<ref name="PureTextElement"/>
|
994
|
+
<ref name="stem"/>
|
995
|
+
</choice>
|
996
|
+
</zeroOrMore>
|
997
|
+
</element>
|
943
998
|
</optional>
|
944
999
|
<choice>
|
945
1000
|
<ref name="eref"/>
|
@@ -965,6 +1020,9 @@
|
|
965
1020
|
</attribute>
|
966
1021
|
<attribute name="name"/>
|
967
1022
|
<attribute name="action"/>
|
1023
|
+
<optional>
|
1024
|
+
<attribute name="class"/>
|
1025
|
+
</optional>
|
968
1026
|
<zeroOrMore>
|
969
1027
|
<choice>
|
970
1028
|
<ref name="TextElement"/>
|
@@ -1191,13 +1249,17 @@
|
|
1191
1249
|
</define>
|
1192
1250
|
<define name="IsoWorkgroup">
|
1193
1251
|
<optional>
|
1194
|
-
<attribute name="number"
|
1195
|
-
<data type="int"/>
|
1196
|
-
</attribute>
|
1252
|
+
<attribute name="number"/>
|
1197
1253
|
</optional>
|
1198
1254
|
<optional>
|
1199
1255
|
<attribute name="type"/>
|
1200
1256
|
</optional>
|
1257
|
+
<optional>
|
1258
|
+
<attribute name="identifier"/>
|
1259
|
+
</optional>
|
1260
|
+
<optional>
|
1261
|
+
<attribute name="prefix"/>
|
1262
|
+
</optional>
|
1201
1263
|
<text/>
|
1202
1264
|
</define>
|
1203
1265
|
<define name="ics">
|
@@ -1459,26 +1521,26 @@
|
|
1459
1521
|
<optional>
|
1460
1522
|
<ref name="section-title"/>
|
1461
1523
|
</optional>
|
1462
|
-
<
|
1524
|
+
<choice>
|
1463
1525
|
<choice>
|
1464
1526
|
<group>
|
1465
|
-
<
|
1527
|
+
<oneOrMore>
|
1466
1528
|
<ref name="BasicBlock"/>
|
1467
|
-
</
|
1529
|
+
</oneOrMore>
|
1468
1530
|
<zeroOrMore>
|
1469
1531
|
<ref name="note"/>
|
1470
1532
|
</zeroOrMore>
|
1471
1533
|
</group>
|
1472
1534
|
<ref name="amend"/>
|
1473
1535
|
</choice>
|
1474
|
-
<
|
1536
|
+
<oneOrMore>
|
1475
1537
|
<choice>
|
1476
1538
|
<ref name="clause-subsection"/>
|
1477
1539
|
<ref name="terms"/>
|
1478
1540
|
<ref name="definitions"/>
|
1479
1541
|
</choice>
|
1480
|
-
</
|
1481
|
-
</
|
1542
|
+
</oneOrMore>
|
1543
|
+
</choice>
|
1482
1544
|
</define>
|
1483
1545
|
<define name="Annex-Section">
|
1484
1546
|
<optional>
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module RelatonIeee
|
2
|
+
module BibXMLParser
|
3
|
+
extend RelatonBib::BibXMLParser
|
4
|
+
extend BibXMLParser
|
5
|
+
|
6
|
+
FLAVOR = "IEEE".freeze
|
7
|
+
|
8
|
+
# @param attrs [Hash]
|
9
|
+
# @return [RelatonBib::IetfBibliographicItem]
|
10
|
+
def bib_item(**attrs)
|
11
|
+
IeeeBibliographicItem.new(**attrs)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,229 @@
|
|
1
|
+
require "zip"
|
2
|
+
require "relaton_ieee/data_parser"
|
3
|
+
require "relaton_ieee/rawbib_id_parser"
|
4
|
+
|
5
|
+
module RelatonIeee
|
6
|
+
class DataFetcher
|
7
|
+
RELATION_TYPES = {
|
8
|
+
"S" => { type: "obsoletedBy" },
|
9
|
+
"V" => { type: "updates", description: "revises" },
|
10
|
+
"T" => { type: "updates", description: "amends" },
|
11
|
+
"C" => { type: "updates", description: "corrects" },
|
12
|
+
"O" => { type: "adoptedFrom" },
|
13
|
+
"P" => { type: "complementOf", description: "supplement" },
|
14
|
+
"N" => false, "G" => false,
|
15
|
+
"F" => false, "I" => false,
|
16
|
+
"E" => false, "B" => false, "W" => false
|
17
|
+
}.freeze
|
18
|
+
|
19
|
+
# @return [Hash] list of AMSID => PubID
|
20
|
+
attr_reader :backrefs
|
21
|
+
|
22
|
+
#
|
23
|
+
# Create RelatonIeee::DataFetcher instance
|
24
|
+
#
|
25
|
+
# @param [String] output output dir
|
26
|
+
# @param [Strong] format output format. Allowed values: "yaml" or "xml"
|
27
|
+
#
|
28
|
+
def initialize(output, format)
|
29
|
+
@output = output
|
30
|
+
@format = format
|
31
|
+
@ext = format.sub(/^bib/, "")
|
32
|
+
@crossrefs = {}
|
33
|
+
@backrefs = {}
|
34
|
+
# @normtitles = []
|
35
|
+
end
|
36
|
+
|
37
|
+
#
|
38
|
+
# Convert documents from `ieee-rawbib` dir (IEEE dataset) to BibYAML/BibXML
|
39
|
+
#
|
40
|
+
# @param [String] output ('data') output dir
|
41
|
+
# @param [String] format ('yaml') output format.
|
42
|
+
# Allowed values: "yaml" or "xml"
|
43
|
+
#
|
44
|
+
def self.fetch(output: "data", format: "yaml")
|
45
|
+
t1 = Time.now
|
46
|
+
puts "Started at: #{t1}"
|
47
|
+
FileUtils.mkdir_p output unless Dir.exist? output
|
48
|
+
new(output, format).fetch
|
49
|
+
t2 = Time.now
|
50
|
+
puts "Stopped at: #{t2}"
|
51
|
+
puts "Done in: #{(t2 - t1).round} sec."
|
52
|
+
end
|
53
|
+
|
54
|
+
#
|
55
|
+
# Convert documents from `ieee-rawbib` dir (IEEE dataset) to BibYAML/BibXML
|
56
|
+
#
|
57
|
+
def fetch # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
58
|
+
Dir["ieee-rawbib/**/*.{xml,zip}"].reject { |f| f["Deleted_"] }.each do |f|
|
59
|
+
xml = case File.extname(f)
|
60
|
+
when ".zip" then read_zip f
|
61
|
+
when ".xml" then File.read f, encoding: "UTF-8"
|
62
|
+
end
|
63
|
+
fetch_doc xml, f
|
64
|
+
rescue StandardError => e
|
65
|
+
warn "File: #{f}"
|
66
|
+
warn e.message
|
67
|
+
warn e.backtrace
|
68
|
+
end
|
69
|
+
# File.write "normtitles.txt", @normtitles.join("\n")
|
70
|
+
update_relations
|
71
|
+
end
|
72
|
+
|
73
|
+
#
|
74
|
+
# Extract XML file from zip archive
|
75
|
+
#
|
76
|
+
# @param [String] file path to achive
|
77
|
+
#
|
78
|
+
# @return [String] file content
|
79
|
+
#
|
80
|
+
def read_zip(file)
|
81
|
+
Zip::File.open(file) do |zf|
|
82
|
+
entry = zf.glob("**/*.xml").first
|
83
|
+
entry.get_input_stream.read
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
#
|
88
|
+
# Parse document and save it
|
89
|
+
#
|
90
|
+
# @param [String] xml content
|
91
|
+
# @param [String] filename source file
|
92
|
+
#
|
93
|
+
def fetch_doc(xml, filename) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
94
|
+
doc = Nokogiri::XML(xml).at("/publication")
|
95
|
+
unless doc
|
96
|
+
warn "Empty file: #{filename}"
|
97
|
+
return
|
98
|
+
end
|
99
|
+
stdid = doc.at("./publicationinfo/standard_id").text
|
100
|
+
if stdid == "0"
|
101
|
+
# nt = doc&.at("./normtitle")&.text
|
102
|
+
# ntid = @normtitles.index nt
|
103
|
+
# @normtitles << nt if nt && !ntid
|
104
|
+
warn "Zero standard_id in #{filename}"
|
105
|
+
return
|
106
|
+
end
|
107
|
+
bib = DataParser.parse doc, self
|
108
|
+
if bib.docnumber.nil?
|
109
|
+
nt = doc&.at("./normtitle")&.text
|
110
|
+
warn "PubID parse error. Normtitle: #{nt}, file: #{filename}"
|
111
|
+
return
|
112
|
+
end
|
113
|
+
amsid = doc.at("./publicationinfo/amsid").text
|
114
|
+
if backrefs.value?(bib.docidentifier[0].id) && /updates\.\d+/ !~ filename
|
115
|
+
oamsid = backrefs.key bib.docidentifier[0].id
|
116
|
+
warn "Document exists ID: \"#{bib.docidentifier[0].id}\" AMSID: "\
|
117
|
+
"\"#{amsid}\" source: \"#{filename}\". Other AMSID: \"#{oamsid}\""
|
118
|
+
if bib.docidentifier[0].id.include?(doc.at("./publicationinfo/stdnumber").text)
|
119
|
+
save_doc bib # rewrite file if the PubID matches to the stdnumber
|
120
|
+
backrefs[amsid] = bib.docidentifier[0].id
|
121
|
+
end
|
122
|
+
else
|
123
|
+
save_doc bib
|
124
|
+
backrefs[amsid] = bib.docidentifier[0].id
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
#
|
129
|
+
# Save unresolved relation reference
|
130
|
+
#
|
131
|
+
# @param [String] docnumber of main document
|
132
|
+
# @param [Nokogiri::XML::Element] amsid relation data
|
133
|
+
#
|
134
|
+
def add_crossref(docnumber, amsid)
|
135
|
+
return if RELATION_TYPES[amsid[:type]] == false
|
136
|
+
|
137
|
+
ref = { amsid: amsid.text, type: amsid[:type] }
|
138
|
+
if @crossrefs[docnumber]
|
139
|
+
@crossrefs[docnumber] << ref
|
140
|
+
else @crossrefs[docnumber] = [ref]
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
#
|
145
|
+
# Save document to file
|
146
|
+
#
|
147
|
+
# @param [RelatonIeee::IeeeBibliographicItem] bib
|
148
|
+
#
|
149
|
+
def save_doc(bib)
|
150
|
+
c = case @format
|
151
|
+
when "xml" then bib.to_xml(bibdata: true)
|
152
|
+
when "yaml" then bib.to_hash.to_yaml
|
153
|
+
else bib.send("to_#{@format}")
|
154
|
+
end
|
155
|
+
File.write file_name(bib.docnumber), c, encoding: "UTF-8"
|
156
|
+
end
|
157
|
+
|
158
|
+
#
|
159
|
+
# Make filename from PubID
|
160
|
+
#
|
161
|
+
# @param [String] docnumber
|
162
|
+
#
|
163
|
+
# @return [String] filename
|
164
|
+
#
|
165
|
+
def file_name(docnumber)
|
166
|
+
name = docnumber.gsub(/\s-/, "-").gsub(/[\s,:\/]/, "_").squeeze("_").upcase
|
167
|
+
File.join @output, "#{name}.#{@ext}"
|
168
|
+
end
|
169
|
+
|
170
|
+
#
|
171
|
+
# Update unresoverd relations
|
172
|
+
#
|
173
|
+
def update_relations # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
174
|
+
@crossrefs.each do |dnum, rfs|
|
175
|
+
bib = nil
|
176
|
+
rfs.each do |rf|
|
177
|
+
if backrefs[rf[:amsid]]
|
178
|
+
rel = create_relation(rf[:type], backrefs[rf[:amsid]])
|
179
|
+
if rel
|
180
|
+
bib ||= read_bib(dnum)
|
181
|
+
bib.relation << rel
|
182
|
+
save_doc bib
|
183
|
+
end
|
184
|
+
else
|
185
|
+
warn "Unresolved relation: '#{rf[:amsid]}' type: '#{rf[:type]}' for '#{dnum}'"
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
#
|
192
|
+
# Create relation instance
|
193
|
+
#
|
194
|
+
# @param [String] type IEEE relation type
|
195
|
+
# @param [String] fref reference
|
196
|
+
#
|
197
|
+
# @return [RelatonBib::DocumentRelation]
|
198
|
+
#
|
199
|
+
def create_relation(type, fref)
|
200
|
+
return if RELATION_TYPES[type] == false
|
201
|
+
|
202
|
+
fr = RelatonBib::FormattedRef.new(content: fref)
|
203
|
+
bib = IeeeBibliographicItem.new formattedref: fr
|
204
|
+
desc = RELATION_TYPES[type][:description]
|
205
|
+
description = desc && RelatonBib::FormattedString.new(content: desc, language: "en", script: "Latn")
|
206
|
+
RelatonBib::DocumentRelation.new(
|
207
|
+
type: RELATION_TYPES[type][:type],
|
208
|
+
description: description,
|
209
|
+
bibitem: bib,
|
210
|
+
)
|
211
|
+
end
|
212
|
+
|
213
|
+
#
|
214
|
+
# Read document form BibXML/BibYAML file
|
215
|
+
#
|
216
|
+
# @param [String] docnumber
|
217
|
+
#
|
218
|
+
# @return [RelatonIeee::IeeeBibliographicItem]
|
219
|
+
#
|
220
|
+
def read_bib(docnumber)
|
221
|
+
c = File.read file_name(docnumber), encoding: "UTF-8"
|
222
|
+
case @format
|
223
|
+
when "xml" then XMLParser.from_xml c
|
224
|
+
when "bibxml" then BibXMLParser.parse c
|
225
|
+
else IeeeBibliographicItem.from_hash YAML.safe_load(c)
|
226
|
+
end
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|