relaton-ieee 1.8.0 → 1.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +1 -11
- data/.gitignore +1 -0
- data/.rubocop.yml +1 -1
- data/README.adoc +29 -0
- data/grammars/biblio.rng +1 -0
- data/grammars/isodoc.rng +72 -10
- data/lib/relaton_ieee/bibxml_parser.rb +14 -0
- data/lib/relaton_ieee/data_fetcher.rb +229 -0
- data/lib/relaton_ieee/data_parser.rb +274 -0
- data/lib/relaton_ieee/hit_collection.rb +2 -2
- data/lib/relaton_ieee/ieee_bibliographic_item.rb +4 -4
- data/lib/relaton_ieee/ieee_bibliography.rb +15 -2
- data/lib/relaton_ieee/processor.rb +14 -1
- data/lib/relaton_ieee/pub_id.rb +149 -0
- data/lib/relaton_ieee/rawbib_id_parser.rb +515 -0
- data/lib/relaton_ieee/version.rb +1 -1
- data/lib/relaton_ieee.rb +2 -0
- data/relaton_ieee.gemspec +3 -7
- metadata +24 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f5ea9caf8eaf59f616cad6b137ff46fbfd0f28d38ad92c457467a3d19032d2a7
|
4
|
+
data.tar.gz: 88b376e90082b8a6bb730e5a07b96930b292c092ece0ce82850a50a9d9cdeb0e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c5eeb5339a3d598da5509de3ffc2bb9231694f9a9edc6603b40cb2634ded16f6ebee01c0ffb222b624426075c7dfd28382bdfac428077097821215a017680f7f
|
7
|
+
data.tar.gz: 9d8ee0f3c616d91d53ce9bfd54ceaf4a5cd458ac78471e69f7179daf6f5cf225681dadfe409011012f8c4e987e11f87e27a7e82b777aff2138873be5761aa617
|
data/.github/workflows/rake.yml
CHANGED
@@ -16,19 +16,9 @@ jobs:
|
|
16
16
|
strategy:
|
17
17
|
fail-fast: false
|
18
18
|
matrix:
|
19
|
-
ruby: [ '
|
19
|
+
ruby: [ '3.0', '2.7', '2.6', '2.5' ]
|
20
20
|
os: [ ubuntu-latest, windows-latest, macos-latest ]
|
21
21
|
experimental: [ false ]
|
22
|
-
include:
|
23
|
-
- ruby: '3.0'
|
24
|
-
os: 'ubuntu-latest'
|
25
|
-
experimental: true
|
26
|
-
- ruby: '3.0'
|
27
|
-
os: 'windows-latest'
|
28
|
-
experimental: true
|
29
|
-
- ruby: '3.0'
|
30
|
-
os: 'macos-latest'
|
31
|
-
experimental: true
|
32
22
|
steps:
|
33
23
|
- uses: actions/checkout@v2
|
34
24
|
with:
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
data/README.adoc
CHANGED
@@ -138,6 +138,16 @@ RelatonIeee::IeeeBibliography.get("IEEE 528-2019")
|
|
138
138
|
...
|
139
139
|
----
|
140
140
|
|
141
|
+
=== Typed links
|
142
|
+
|
143
|
+
Each IEEE document has `src` type link.
|
144
|
+
|
145
|
+
[source,ruby]
|
146
|
+
----
|
147
|
+
item.link
|
148
|
+
=> [#<RelatonBib::TypedUri:0x00007fe885219ba0 @content=#<Addressable::URI:0x8ac URI:https://standards.ieee.org/standard/528-2019.html>, @type="src">]
|
149
|
+
----
|
150
|
+
|
141
151
|
=== Create bibliographic item from XML
|
142
152
|
[source,ruby]
|
143
153
|
----
|
@@ -158,6 +168,25 @@ hash = YAML.load_file 'spec/fixtures/ieee_528_2019.yaml'
|
|
158
168
|
...
|
159
169
|
----
|
160
170
|
|
171
|
+
=== Fetch data
|
172
|
+
|
173
|
+
There is an IEEE dataset https://github.com/ietf-ribose/ieee-rawbib which can be converted into BibXML/BibYAML formats. The dataset needs to be placed into local directiory.
|
174
|
+
|
175
|
+
The method `RelatonIeee::DataFetcher.fetch(output: "data", format: "yaml")` converts all the documents from the local `ieee-rawbib` directory and save them to the `./data` folder in YAML format.
|
176
|
+
Arguments:
|
177
|
+
|
178
|
+
- `output` - folder to save documents (default './data').
|
179
|
+
- `format` - format in which the documents are saved. Possimle formats are: `yaml`, `xml` (default `yaml`).
|
180
|
+
|
181
|
+
[source,ruby]
|
182
|
+
----
|
183
|
+
RelatonIeee::DataFetcher.fetch
|
184
|
+
Started at: 2021-09-24 17:55:07 +0200
|
185
|
+
Stopped at: 2021-09-24 17:57:30 +0200
|
186
|
+
Done in: 143 sec.
|
187
|
+
=> nil
|
188
|
+
----
|
189
|
+
|
161
190
|
== Development
|
162
191
|
|
163
192
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/grammars/biblio.rng
CHANGED
data/grammars/isodoc.rng
CHANGED
@@ -45,6 +45,11 @@
|
|
45
45
|
<optional>
|
46
46
|
<attribute name="alt"/>
|
47
47
|
</optional>
|
48
|
+
<optional>
|
49
|
+
<attribute name="updatetype">
|
50
|
+
<data type="boolean"/>
|
51
|
+
</attribute>
|
52
|
+
</optional>
|
48
53
|
<text/>
|
49
54
|
</element>
|
50
55
|
</define>
|
@@ -199,6 +204,18 @@
|
|
199
204
|
</zeroOrMore>
|
200
205
|
</element>
|
201
206
|
</define>
|
207
|
+
<define name="dt">
|
208
|
+
<element name="dt">
|
209
|
+
<optional>
|
210
|
+
<attribute name="id">
|
211
|
+
<data type="ID"/>
|
212
|
+
</attribute>
|
213
|
+
</optional>
|
214
|
+
<zeroOrMore>
|
215
|
+
<ref name="TextElement"/>
|
216
|
+
</zeroOrMore>
|
217
|
+
</element>
|
218
|
+
</define>
|
202
219
|
<define name="example">
|
203
220
|
<element name="example">
|
204
221
|
<attribute name="id">
|
@@ -543,6 +560,9 @@
|
|
543
560
|
</define>
|
544
561
|
<define name="BibDataExtensionType">
|
545
562
|
<ref name="doctype"/>
|
563
|
+
<optional>
|
564
|
+
<ref name="docsubtype"/>
|
565
|
+
</optional>
|
546
566
|
<optional>
|
547
567
|
<ref name="editorialgroup"/>
|
548
568
|
</optional>
|
@@ -890,6 +910,14 @@
|
|
890
910
|
</define>
|
891
911
|
</include>
|
892
912
|
<!-- end overrides -->
|
913
|
+
<define name="docsubtype">
|
914
|
+
<element name="subdoctype">
|
915
|
+
<ref name="DocumentSubtype"/>
|
916
|
+
</element>
|
917
|
+
</define>
|
918
|
+
<define name="DocumentSubtype">
|
919
|
+
<text/>
|
920
|
+
</define>
|
893
921
|
<define name="colgroup">
|
894
922
|
<element name="colgroup">
|
895
923
|
<oneOrMore>
|
@@ -939,7 +967,34 @@
|
|
939
967
|
<define name="concept">
|
940
968
|
<element name="concept">
|
941
969
|
<optional>
|
942
|
-
<attribute name="
|
970
|
+
<attribute name="ital">
|
971
|
+
<data type="boolean"/>
|
972
|
+
</attribute>
|
973
|
+
</optional>
|
974
|
+
<optional>
|
975
|
+
<attribute name="ref">
|
976
|
+
<data type="boolean"/>
|
977
|
+
</attribute>
|
978
|
+
</optional>
|
979
|
+
<optional>
|
980
|
+
<element name="refterm">
|
981
|
+
<zeroOrMore>
|
982
|
+
<choice>
|
983
|
+
<ref name="PureTextElement"/>
|
984
|
+
<ref name="stem"/>
|
985
|
+
</choice>
|
986
|
+
</zeroOrMore>
|
987
|
+
</element>
|
988
|
+
</optional>
|
989
|
+
<optional>
|
990
|
+
<element name="renderterm">
|
991
|
+
<zeroOrMore>
|
992
|
+
<choice>
|
993
|
+
<ref name="PureTextElement"/>
|
994
|
+
<ref name="stem"/>
|
995
|
+
</choice>
|
996
|
+
</zeroOrMore>
|
997
|
+
</element>
|
943
998
|
</optional>
|
944
999
|
<choice>
|
945
1000
|
<ref name="eref"/>
|
@@ -965,6 +1020,9 @@
|
|
965
1020
|
</attribute>
|
966
1021
|
<attribute name="name"/>
|
967
1022
|
<attribute name="action"/>
|
1023
|
+
<optional>
|
1024
|
+
<attribute name="class"/>
|
1025
|
+
</optional>
|
968
1026
|
<zeroOrMore>
|
969
1027
|
<choice>
|
970
1028
|
<ref name="TextElement"/>
|
@@ -1191,13 +1249,17 @@
|
|
1191
1249
|
</define>
|
1192
1250
|
<define name="IsoWorkgroup">
|
1193
1251
|
<optional>
|
1194
|
-
<attribute name="number"
|
1195
|
-
<data type="int"/>
|
1196
|
-
</attribute>
|
1252
|
+
<attribute name="number"/>
|
1197
1253
|
</optional>
|
1198
1254
|
<optional>
|
1199
1255
|
<attribute name="type"/>
|
1200
1256
|
</optional>
|
1257
|
+
<optional>
|
1258
|
+
<attribute name="identifier"/>
|
1259
|
+
</optional>
|
1260
|
+
<optional>
|
1261
|
+
<attribute name="prefix"/>
|
1262
|
+
</optional>
|
1201
1263
|
<text/>
|
1202
1264
|
</define>
|
1203
1265
|
<define name="ics">
|
@@ -1459,26 +1521,26 @@
|
|
1459
1521
|
<optional>
|
1460
1522
|
<ref name="section-title"/>
|
1461
1523
|
</optional>
|
1462
|
-
<
|
1524
|
+
<choice>
|
1463
1525
|
<choice>
|
1464
1526
|
<group>
|
1465
|
-
<
|
1527
|
+
<oneOrMore>
|
1466
1528
|
<ref name="BasicBlock"/>
|
1467
|
-
</
|
1529
|
+
</oneOrMore>
|
1468
1530
|
<zeroOrMore>
|
1469
1531
|
<ref name="note"/>
|
1470
1532
|
</zeroOrMore>
|
1471
1533
|
</group>
|
1472
1534
|
<ref name="amend"/>
|
1473
1535
|
</choice>
|
1474
|
-
<
|
1536
|
+
<oneOrMore>
|
1475
1537
|
<choice>
|
1476
1538
|
<ref name="clause-subsection"/>
|
1477
1539
|
<ref name="terms"/>
|
1478
1540
|
<ref name="definitions"/>
|
1479
1541
|
</choice>
|
1480
|
-
</
|
1481
|
-
</
|
1542
|
+
</oneOrMore>
|
1543
|
+
</choice>
|
1482
1544
|
</define>
|
1483
1545
|
<define name="Annex-Section">
|
1484
1546
|
<optional>
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module RelatonIeee
|
2
|
+
module BibXMLParser
|
3
|
+
extend RelatonBib::BibXMLParser
|
4
|
+
extend BibXMLParser
|
5
|
+
|
6
|
+
FLAVOR = "IEEE".freeze
|
7
|
+
|
8
|
+
# @param attrs [Hash]
|
9
|
+
# @return [RelatonBib::IetfBibliographicItem]
|
10
|
+
def bib_item(**attrs)
|
11
|
+
IeeeBibliographicItem.new(**attrs)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,229 @@
|
|
1
|
+
require "zip"
|
2
|
+
require "relaton_ieee/data_parser"
|
3
|
+
require "relaton_ieee/rawbib_id_parser"
|
4
|
+
|
5
|
+
module RelatonIeee
|
6
|
+
class DataFetcher
|
7
|
+
RELATION_TYPES = {
|
8
|
+
"S" => { type: "obsoletedBy" },
|
9
|
+
"V" => { type: "updates", description: "revises" },
|
10
|
+
"T" => { type: "updates", description: "amends" },
|
11
|
+
"C" => { type: "updates", description: "corrects" },
|
12
|
+
"O" => { type: "adoptedFrom" },
|
13
|
+
"P" => { type: "complementOf", description: "supplement" },
|
14
|
+
"N" => false, "G" => false,
|
15
|
+
"F" => false, "I" => false,
|
16
|
+
"E" => false, "B" => false, "W" => false
|
17
|
+
}.freeze
|
18
|
+
|
19
|
+
# @return [Hash] list of AMSID => PubID
|
20
|
+
attr_reader :backrefs
|
21
|
+
|
22
|
+
#
|
23
|
+
# Create RelatonIeee::DataFetcher instance
|
24
|
+
#
|
25
|
+
# @param [String] output output dir
|
26
|
+
# @param [Strong] format output format. Allowed values: "yaml" or "xml"
|
27
|
+
#
|
28
|
+
def initialize(output, format)
|
29
|
+
@output = output
|
30
|
+
@format = format
|
31
|
+
@ext = format.sub(/^bib/, "")
|
32
|
+
@crossrefs = {}
|
33
|
+
@backrefs = {}
|
34
|
+
# @normtitles = []
|
35
|
+
end
|
36
|
+
|
37
|
+
#
|
38
|
+
# Convert documents from `ieee-rawbib` dir (IEEE dataset) to BibYAML/BibXML
|
39
|
+
#
|
40
|
+
# @param [String] output ('data') output dir
|
41
|
+
# @param [String] format ('yaml') output format.
|
42
|
+
# Allowed values: "yaml" or "xml"
|
43
|
+
#
|
44
|
+
def self.fetch(output: "data", format: "yaml")
|
45
|
+
t1 = Time.now
|
46
|
+
puts "Started at: #{t1}"
|
47
|
+
FileUtils.mkdir_p output unless Dir.exist? output
|
48
|
+
new(output, format).fetch
|
49
|
+
t2 = Time.now
|
50
|
+
puts "Stopped at: #{t2}"
|
51
|
+
puts "Done in: #{(t2 - t1).round} sec."
|
52
|
+
end
|
53
|
+
|
54
|
+
#
|
55
|
+
# Convert documents from `ieee-rawbib` dir (IEEE dataset) to BibYAML/BibXML
|
56
|
+
#
|
57
|
+
def fetch # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
58
|
+
Dir["ieee-rawbib/**/*.{xml,zip}"].reject { |f| f["Deleted_"] }.each do |f|
|
59
|
+
xml = case File.extname(f)
|
60
|
+
when ".zip" then read_zip f
|
61
|
+
when ".xml" then File.read f, encoding: "UTF-8"
|
62
|
+
end
|
63
|
+
fetch_doc xml, f
|
64
|
+
rescue StandardError => e
|
65
|
+
warn "File: #{f}"
|
66
|
+
warn e.message
|
67
|
+
warn e.backtrace
|
68
|
+
end
|
69
|
+
# File.write "normtitles.txt", @normtitles.join("\n")
|
70
|
+
update_relations
|
71
|
+
end
|
72
|
+
|
73
|
+
#
|
74
|
+
# Extract XML file from zip archive
|
75
|
+
#
|
76
|
+
# @param [String] file path to achive
|
77
|
+
#
|
78
|
+
# @return [String] file content
|
79
|
+
#
|
80
|
+
def read_zip(file)
|
81
|
+
Zip::File.open(file) do |zf|
|
82
|
+
entry = zf.glob("**/*.xml").first
|
83
|
+
entry.get_input_stream.read
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
#
|
88
|
+
# Parse document and save it
|
89
|
+
#
|
90
|
+
# @param [String] xml content
|
91
|
+
# @param [String] filename source file
|
92
|
+
#
|
93
|
+
def fetch_doc(xml, filename) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
94
|
+
doc = Nokogiri::XML(xml).at("/publication")
|
95
|
+
unless doc
|
96
|
+
warn "Empty file: #{filename}"
|
97
|
+
return
|
98
|
+
end
|
99
|
+
stdid = doc.at("./publicationinfo/standard_id").text
|
100
|
+
if stdid == "0"
|
101
|
+
# nt = doc&.at("./normtitle")&.text
|
102
|
+
# ntid = @normtitles.index nt
|
103
|
+
# @normtitles << nt if nt && !ntid
|
104
|
+
warn "Zero standard_id in #{filename}"
|
105
|
+
return
|
106
|
+
end
|
107
|
+
bib = DataParser.parse doc, self
|
108
|
+
if bib.docnumber.nil?
|
109
|
+
nt = doc&.at("./normtitle")&.text
|
110
|
+
warn "PubID parse error. Normtitle: #{nt}, file: #{filename}"
|
111
|
+
return
|
112
|
+
end
|
113
|
+
amsid = doc.at("./publicationinfo/amsid").text
|
114
|
+
if backrefs.value?(bib.docidentifier[0].id) && /updates\.\d+/ !~ filename
|
115
|
+
oamsid = backrefs.key bib.docidentifier[0].id
|
116
|
+
warn "Document exists ID: \"#{bib.docidentifier[0].id}\" AMSID: "\
|
117
|
+
"\"#{amsid}\" source: \"#{filename}\". Other AMSID: \"#{oamsid}\""
|
118
|
+
if bib.docidentifier[0].id.include?(doc.at("./publicationinfo/stdnumber").text)
|
119
|
+
save_doc bib # rewrite file if the PubID matches to the stdnumber
|
120
|
+
backrefs[amsid] = bib.docidentifier[0].id
|
121
|
+
end
|
122
|
+
else
|
123
|
+
save_doc bib
|
124
|
+
backrefs[amsid] = bib.docidentifier[0].id
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
#
|
129
|
+
# Save unresolved relation reference
|
130
|
+
#
|
131
|
+
# @param [String] docnumber of main document
|
132
|
+
# @param [Nokogiri::XML::Element] amsid relation data
|
133
|
+
#
|
134
|
+
def add_crossref(docnumber, amsid)
|
135
|
+
return if RELATION_TYPES[amsid[:type]] == false
|
136
|
+
|
137
|
+
ref = { amsid: amsid.text, type: amsid[:type] }
|
138
|
+
if @crossrefs[docnumber]
|
139
|
+
@crossrefs[docnumber] << ref
|
140
|
+
else @crossrefs[docnumber] = [ref]
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
#
|
145
|
+
# Save document to file
|
146
|
+
#
|
147
|
+
# @param [RelatonIeee::IeeeBibliographicItem] bib
|
148
|
+
#
|
149
|
+
def save_doc(bib)
|
150
|
+
c = case @format
|
151
|
+
when "xml" then bib.to_xml(bibdata: true)
|
152
|
+
when "yaml" then bib.to_hash.to_yaml
|
153
|
+
else bib.send("to_#{@format}")
|
154
|
+
end
|
155
|
+
File.write file_name(bib.docnumber), c, encoding: "UTF-8"
|
156
|
+
end
|
157
|
+
|
158
|
+
#
|
159
|
+
# Make filename from PubID
|
160
|
+
#
|
161
|
+
# @param [String] docnumber
|
162
|
+
#
|
163
|
+
# @return [String] filename
|
164
|
+
#
|
165
|
+
def file_name(docnumber)
|
166
|
+
name = docnumber.gsub(/\s-/, "-").gsub(/[\s,:\/]/, "_").squeeze("_").upcase
|
167
|
+
File.join @output, "#{name}.#{@ext}"
|
168
|
+
end
|
169
|
+
|
170
|
+
#
|
171
|
+
# Update unresoverd relations
|
172
|
+
#
|
173
|
+
def update_relations # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
174
|
+
@crossrefs.each do |dnum, rfs|
|
175
|
+
bib = nil
|
176
|
+
rfs.each do |rf|
|
177
|
+
if backrefs[rf[:amsid]]
|
178
|
+
rel = create_relation(rf[:type], backrefs[rf[:amsid]])
|
179
|
+
if rel
|
180
|
+
bib ||= read_bib(dnum)
|
181
|
+
bib.relation << rel
|
182
|
+
save_doc bib
|
183
|
+
end
|
184
|
+
else
|
185
|
+
warn "Unresolved relation: '#{rf[:amsid]}' type: '#{rf[:type]}' for '#{dnum}'"
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
#
|
192
|
+
# Create relation instance
|
193
|
+
#
|
194
|
+
# @param [String] type IEEE relation type
|
195
|
+
# @param [String] fref reference
|
196
|
+
#
|
197
|
+
# @return [RelatonBib::DocumentRelation]
|
198
|
+
#
|
199
|
+
def create_relation(type, fref)
|
200
|
+
return if RELATION_TYPES[type] == false
|
201
|
+
|
202
|
+
fr = RelatonBib::FormattedRef.new(content: fref)
|
203
|
+
bib = IeeeBibliographicItem.new formattedref: fr
|
204
|
+
desc = RELATION_TYPES[type][:description]
|
205
|
+
description = desc && RelatonBib::FormattedString.new(content: desc, language: "en", script: "Latn")
|
206
|
+
RelatonBib::DocumentRelation.new(
|
207
|
+
type: RELATION_TYPES[type][:type],
|
208
|
+
description: description,
|
209
|
+
bibitem: bib,
|
210
|
+
)
|
211
|
+
end
|
212
|
+
|
213
|
+
#
|
214
|
+
# Read document form BibXML/BibYAML file
|
215
|
+
#
|
216
|
+
# @param [String] docnumber
|
217
|
+
#
|
218
|
+
# @return [RelatonIeee::IeeeBibliographicItem]
|
219
|
+
#
|
220
|
+
def read_bib(docnumber)
|
221
|
+
c = File.read file_name(docnumber), encoding: "UTF-8"
|
222
|
+
case @format
|
223
|
+
when "xml" then XMLParser.from_xml c
|
224
|
+
when "bibxml" then BibXMLParser.parse c
|
225
|
+
else IeeeBibliographicItem.from_hash YAML.safe_load(c)
|
226
|
+
end
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|