relaton-ieee 1.8.0 → 1.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f09ad9bd390643d4c3b77249151eda8d29c50a6a2af928cf075bccd91ac19b11
4
- data.tar.gz: 12ea887aefc3f6e9c486391277deb04bb978171838be04a7ad3605c1774ed290
3
+ metadata.gz: f5ea9caf8eaf59f616cad6b137ff46fbfd0f28d38ad92c457467a3d19032d2a7
4
+ data.tar.gz: 88b376e90082b8a6bb730e5a07b96930b292c092ece0ce82850a50a9d9cdeb0e
5
5
  SHA512:
6
- metadata.gz: 348bd013d0b3c7ca93cc1bf03d1b39c4814980832803fb97049d356dd7989e23407bb270e7d32e93d936e41930149ac55ad7e7d6d27cc4a7510c0ed563f2dbc0
7
- data.tar.gz: d30bbe7e3c1ebb21414b7a148b3851892e62f64bcadbd3af0d0267ab898cb75dfa9af28def3295cb13740ddbd49e1e13435d4a455128c423c4d1b074ce828c1b
6
+ metadata.gz: c5eeb5339a3d598da5509de3ffc2bb9231694f9a9edc6603b40cb2634ded16f6ebee01c0ffb222b624426075c7dfd28382bdfac428077097821215a017680f7f
7
+ data.tar.gz: 9d8ee0f3c616d91d53ce9bfd54ceaf4a5cd458ac78471e69f7179daf6f5cf225681dadfe409011012f8c4e987e11f87e27a7e82b777aff2138873be5761aa617
@@ -16,19 +16,9 @@ jobs:
16
16
  strategy:
17
17
  fail-fast: false
18
18
  matrix:
19
- ruby: [ '2.7', '2.6', '2.5', '2.4' ]
19
+ ruby: [ '3.0', '2.7', '2.6', '2.5' ]
20
20
  os: [ ubuntu-latest, windows-latest, macos-latest ]
21
21
  experimental: [ false ]
22
- include:
23
- - ruby: '3.0'
24
- os: 'ubuntu-latest'
25
- experimental: true
26
- - ruby: '3.0'
27
- os: 'windows-latest'
28
- experimental: true
29
- - ruby: '3.0'
30
- os: 'macos-latest'
31
- experimental: true
32
22
  steps:
33
23
  - uses: actions/checkout@v2
34
24
  with:
data/.gitignore CHANGED
@@ -11,4 +11,5 @@
11
11
  .rspec_status
12
12
  .rubocop-https---raw-githubusercontent-com-riboseinc-oss-guides-master-ci-rubocop-yml
13
13
  .vscode/
14
+ ieee-rawbib/
14
15
  Gemfile.lock
data/.rubocop.yml CHANGED
@@ -5,6 +5,6 @@
5
5
  inherit_from:
6
6
  - https://raw.githubusercontent.com/riboseinc/oss-guides/master/ci/rubocop.yml
7
7
  AllCops:
8
- TargetRubyVersion: 2.4
8
+ TargetRubyVersion: 2.5
9
9
  Rails:
10
10
  Enabled: false
data/README.adoc CHANGED
@@ -138,6 +138,16 @@ RelatonIeee::IeeeBibliography.get("IEEE 528-2019")
138
138
  ...
139
139
  ----
140
140
 
141
+ === Typed links
142
+
143
+ Each IEEE document has `src` type link.
144
+
145
+ [source,ruby]
146
+ ----
147
+ item.link
148
+ => [#<RelatonBib::TypedUri:0x00007fe885219ba0 @content=#<Addressable::URI:0x8ac URI:https://standards.ieee.org/standard/528-2019.html>, @type="src">]
149
+ ----
150
+
141
151
  === Create bibliographic item from XML
142
152
  [source,ruby]
143
153
  ----
@@ -158,6 +168,25 @@ hash = YAML.load_file 'spec/fixtures/ieee_528_2019.yaml'
158
168
  ...
159
169
  ----
160
170
 
171
+ === Fetch data
172
+
173
+ There is an IEEE dataset https://github.com/ietf-ribose/ieee-rawbib which can be converted into BibXML/BibYAML formats. The dataset needs to be placed into local directiory.
174
+
175
+ The method `RelatonIeee::DataFetcher.fetch(output: "data", format: "yaml")` converts all the documents from the local `ieee-rawbib` directory and save them to the `./data` folder in YAML format.
176
+ Arguments:
177
+
178
+ - `output` - folder to save documents (default './data').
179
+ - `format` - format in which the documents are saved. Possimle formats are: `yaml`, `xml` (default `yaml`).
180
+
181
+ [source,ruby]
182
+ ----
183
+ RelatonIeee::DataFetcher.fetch
184
+ Started at: 2021-09-24 17:55:07 +0200
185
+ Stopped at: 2021-09-24 17:57:30 +0200
186
+ Done in: 143 sec.
187
+ => nil
188
+ ----
189
+
161
190
  == Development
162
191
 
163
192
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
data/grammars/biblio.rng CHANGED
@@ -787,6 +787,7 @@
787
787
  <value>adapted</value>
788
788
  <value>vote-started</value>
789
789
  <value>vote-ended</value>
790
+ <value>announced</value>
790
791
  </choice>
791
792
  </define>
792
793
  <define name="bdate">
data/grammars/isodoc.rng CHANGED
@@ -45,6 +45,11 @@
45
45
  <optional>
46
46
  <attribute name="alt"/>
47
47
  </optional>
48
+ <optional>
49
+ <attribute name="updatetype">
50
+ <data type="boolean"/>
51
+ </attribute>
52
+ </optional>
48
53
  <text/>
49
54
  </element>
50
55
  </define>
@@ -199,6 +204,18 @@
199
204
  </zeroOrMore>
200
205
  </element>
201
206
  </define>
207
+ <define name="dt">
208
+ <element name="dt">
209
+ <optional>
210
+ <attribute name="id">
211
+ <data type="ID"/>
212
+ </attribute>
213
+ </optional>
214
+ <zeroOrMore>
215
+ <ref name="TextElement"/>
216
+ </zeroOrMore>
217
+ </element>
218
+ </define>
202
219
  <define name="example">
203
220
  <element name="example">
204
221
  <attribute name="id">
@@ -543,6 +560,9 @@
543
560
  </define>
544
561
  <define name="BibDataExtensionType">
545
562
  <ref name="doctype"/>
563
+ <optional>
564
+ <ref name="docsubtype"/>
565
+ </optional>
546
566
  <optional>
547
567
  <ref name="editorialgroup"/>
548
568
  </optional>
@@ -890,6 +910,14 @@
890
910
  </define>
891
911
  </include>
892
912
  <!-- end overrides -->
913
+ <define name="docsubtype">
914
+ <element name="subdoctype">
915
+ <ref name="DocumentSubtype"/>
916
+ </element>
917
+ </define>
918
+ <define name="DocumentSubtype">
919
+ <text/>
920
+ </define>
893
921
  <define name="colgroup">
894
922
  <element name="colgroup">
895
923
  <oneOrMore>
@@ -939,7 +967,34 @@
939
967
  <define name="concept">
940
968
  <element name="concept">
941
969
  <optional>
942
- <attribute name="term"/>
970
+ <attribute name="ital">
971
+ <data type="boolean"/>
972
+ </attribute>
973
+ </optional>
974
+ <optional>
975
+ <attribute name="ref">
976
+ <data type="boolean"/>
977
+ </attribute>
978
+ </optional>
979
+ <optional>
980
+ <element name="refterm">
981
+ <zeroOrMore>
982
+ <choice>
983
+ <ref name="PureTextElement"/>
984
+ <ref name="stem"/>
985
+ </choice>
986
+ </zeroOrMore>
987
+ </element>
988
+ </optional>
989
+ <optional>
990
+ <element name="renderterm">
991
+ <zeroOrMore>
992
+ <choice>
993
+ <ref name="PureTextElement"/>
994
+ <ref name="stem"/>
995
+ </choice>
996
+ </zeroOrMore>
997
+ </element>
943
998
  </optional>
944
999
  <choice>
945
1000
  <ref name="eref"/>
@@ -965,6 +1020,9 @@
965
1020
  </attribute>
966
1021
  <attribute name="name"/>
967
1022
  <attribute name="action"/>
1023
+ <optional>
1024
+ <attribute name="class"/>
1025
+ </optional>
968
1026
  <zeroOrMore>
969
1027
  <choice>
970
1028
  <ref name="TextElement"/>
@@ -1191,13 +1249,17 @@
1191
1249
  </define>
1192
1250
  <define name="IsoWorkgroup">
1193
1251
  <optional>
1194
- <attribute name="number">
1195
- <data type="int"/>
1196
- </attribute>
1252
+ <attribute name="number"/>
1197
1253
  </optional>
1198
1254
  <optional>
1199
1255
  <attribute name="type"/>
1200
1256
  </optional>
1257
+ <optional>
1258
+ <attribute name="identifier"/>
1259
+ </optional>
1260
+ <optional>
1261
+ <attribute name="prefix"/>
1262
+ </optional>
1201
1263
  <text/>
1202
1264
  </define>
1203
1265
  <define name="ics">
@@ -1459,26 +1521,26 @@
1459
1521
  <optional>
1460
1522
  <ref name="section-title"/>
1461
1523
  </optional>
1462
- <group>
1524
+ <choice>
1463
1525
  <choice>
1464
1526
  <group>
1465
- <zeroOrMore>
1527
+ <oneOrMore>
1466
1528
  <ref name="BasicBlock"/>
1467
- </zeroOrMore>
1529
+ </oneOrMore>
1468
1530
  <zeroOrMore>
1469
1531
  <ref name="note"/>
1470
1532
  </zeroOrMore>
1471
1533
  </group>
1472
1534
  <ref name="amend"/>
1473
1535
  </choice>
1474
- <zeroOrMore>
1536
+ <oneOrMore>
1475
1537
  <choice>
1476
1538
  <ref name="clause-subsection"/>
1477
1539
  <ref name="terms"/>
1478
1540
  <ref name="definitions"/>
1479
1541
  </choice>
1480
- </zeroOrMore>
1481
- </group>
1542
+ </oneOrMore>
1543
+ </choice>
1482
1544
  </define>
1483
1545
  <define name="Annex-Section">
1484
1546
  <optional>
@@ -0,0 +1,14 @@
1
+ module RelatonIeee
2
+ module BibXMLParser
3
+ extend RelatonBib::BibXMLParser
4
+ extend BibXMLParser
5
+
6
+ FLAVOR = "IEEE".freeze
7
+
8
+ # @param attrs [Hash]
9
+ # @return [RelatonBib::IetfBibliographicItem]
10
+ def bib_item(**attrs)
11
+ IeeeBibliographicItem.new(**attrs)
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,229 @@
1
+ require "zip"
2
+ require "relaton_ieee/data_parser"
3
+ require "relaton_ieee/rawbib_id_parser"
4
+
5
+ module RelatonIeee
6
+ class DataFetcher
7
+ RELATION_TYPES = {
8
+ "S" => { type: "obsoletedBy" },
9
+ "V" => { type: "updates", description: "revises" },
10
+ "T" => { type: "updates", description: "amends" },
11
+ "C" => { type: "updates", description: "corrects" },
12
+ "O" => { type: "adoptedFrom" },
13
+ "P" => { type: "complementOf", description: "supplement" },
14
+ "N" => false, "G" => false,
15
+ "F" => false, "I" => false,
16
+ "E" => false, "B" => false, "W" => false
17
+ }.freeze
18
+
19
+ # @return [Hash] list of AMSID => PubID
20
+ attr_reader :backrefs
21
+
22
+ #
23
+ # Create RelatonIeee::DataFetcher instance
24
+ #
25
+ # @param [String] output output dir
26
+ # @param [Strong] format output format. Allowed values: "yaml" or "xml"
27
+ #
28
+ def initialize(output, format)
29
+ @output = output
30
+ @format = format
31
+ @ext = format.sub(/^bib/, "")
32
+ @crossrefs = {}
33
+ @backrefs = {}
34
+ # @normtitles = []
35
+ end
36
+
37
+ #
38
+ # Convert documents from `ieee-rawbib` dir (IEEE dataset) to BibYAML/BibXML
39
+ #
40
+ # @param [String] output ('data') output dir
41
+ # @param [String] format ('yaml') output format.
42
+ # Allowed values: "yaml" or "xml"
43
+ #
44
+ def self.fetch(output: "data", format: "yaml")
45
+ t1 = Time.now
46
+ puts "Started at: #{t1}"
47
+ FileUtils.mkdir_p output unless Dir.exist? output
48
+ new(output, format).fetch
49
+ t2 = Time.now
50
+ puts "Stopped at: #{t2}"
51
+ puts "Done in: #{(t2 - t1).round} sec."
52
+ end
53
+
54
+ #
55
+ # Convert documents from `ieee-rawbib` dir (IEEE dataset) to BibYAML/BibXML
56
+ #
57
+ def fetch # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
58
+ Dir["ieee-rawbib/**/*.{xml,zip}"].reject { |f| f["Deleted_"] }.each do |f|
59
+ xml = case File.extname(f)
60
+ when ".zip" then read_zip f
61
+ when ".xml" then File.read f, encoding: "UTF-8"
62
+ end
63
+ fetch_doc xml, f
64
+ rescue StandardError => e
65
+ warn "File: #{f}"
66
+ warn e.message
67
+ warn e.backtrace
68
+ end
69
+ # File.write "normtitles.txt", @normtitles.join("\n")
70
+ update_relations
71
+ end
72
+
73
+ #
74
+ # Extract XML file from zip archive
75
+ #
76
+ # @param [String] file path to achive
77
+ #
78
+ # @return [String] file content
79
+ #
80
+ def read_zip(file)
81
+ Zip::File.open(file) do |zf|
82
+ entry = zf.glob("**/*.xml").first
83
+ entry.get_input_stream.read
84
+ end
85
+ end
86
+
87
+ #
88
+ # Parse document and save it
89
+ #
90
+ # @param [String] xml content
91
+ # @param [String] filename source file
92
+ #
93
+ def fetch_doc(xml, filename) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
94
+ doc = Nokogiri::XML(xml).at("/publication")
95
+ unless doc
96
+ warn "Empty file: #{filename}"
97
+ return
98
+ end
99
+ stdid = doc.at("./publicationinfo/standard_id").text
100
+ if stdid == "0"
101
+ # nt = doc&.at("./normtitle")&.text
102
+ # ntid = @normtitles.index nt
103
+ # @normtitles << nt if nt && !ntid
104
+ warn "Zero standard_id in #{filename}"
105
+ return
106
+ end
107
+ bib = DataParser.parse doc, self
108
+ if bib.docnumber.nil?
109
+ nt = doc&.at("./normtitle")&.text
110
+ warn "PubID parse error. Normtitle: #{nt}, file: #{filename}"
111
+ return
112
+ end
113
+ amsid = doc.at("./publicationinfo/amsid").text
114
+ if backrefs.value?(bib.docidentifier[0].id) && /updates\.\d+/ !~ filename
115
+ oamsid = backrefs.key bib.docidentifier[0].id
116
+ warn "Document exists ID: \"#{bib.docidentifier[0].id}\" AMSID: "\
117
+ "\"#{amsid}\" source: \"#{filename}\". Other AMSID: \"#{oamsid}\""
118
+ if bib.docidentifier[0].id.include?(doc.at("./publicationinfo/stdnumber").text)
119
+ save_doc bib # rewrite file if the PubID matches to the stdnumber
120
+ backrefs[amsid] = bib.docidentifier[0].id
121
+ end
122
+ else
123
+ save_doc bib
124
+ backrefs[amsid] = bib.docidentifier[0].id
125
+ end
126
+ end
127
+
128
+ #
129
+ # Save unresolved relation reference
130
+ #
131
+ # @param [String] docnumber of main document
132
+ # @param [Nokogiri::XML::Element] amsid relation data
133
+ #
134
+ def add_crossref(docnumber, amsid)
135
+ return if RELATION_TYPES[amsid[:type]] == false
136
+
137
+ ref = { amsid: amsid.text, type: amsid[:type] }
138
+ if @crossrefs[docnumber]
139
+ @crossrefs[docnumber] << ref
140
+ else @crossrefs[docnumber] = [ref]
141
+ end
142
+ end
143
+
144
+ #
145
+ # Save document to file
146
+ #
147
+ # @param [RelatonIeee::IeeeBibliographicItem] bib
148
+ #
149
+ def save_doc(bib)
150
+ c = case @format
151
+ when "xml" then bib.to_xml(bibdata: true)
152
+ when "yaml" then bib.to_hash.to_yaml
153
+ else bib.send("to_#{@format}")
154
+ end
155
+ File.write file_name(bib.docnumber), c, encoding: "UTF-8"
156
+ end
157
+
158
+ #
159
+ # Make filename from PubID
160
+ #
161
+ # @param [String] docnumber
162
+ #
163
+ # @return [String] filename
164
+ #
165
+ def file_name(docnumber)
166
+ name = docnumber.gsub(/\s-/, "-").gsub(/[\s,:\/]/, "_").squeeze("_").upcase
167
+ File.join @output, "#{name}.#{@ext}"
168
+ end
169
+
170
+ #
171
+ # Update unresoverd relations
172
+ #
173
+ def update_relations # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
174
+ @crossrefs.each do |dnum, rfs|
175
+ bib = nil
176
+ rfs.each do |rf|
177
+ if backrefs[rf[:amsid]]
178
+ rel = create_relation(rf[:type], backrefs[rf[:amsid]])
179
+ if rel
180
+ bib ||= read_bib(dnum)
181
+ bib.relation << rel
182
+ save_doc bib
183
+ end
184
+ else
185
+ warn "Unresolved relation: '#{rf[:amsid]}' type: '#{rf[:type]}' for '#{dnum}'"
186
+ end
187
+ end
188
+ end
189
+ end
190
+
191
+ #
192
+ # Create relation instance
193
+ #
194
+ # @param [String] type IEEE relation type
195
+ # @param [String] fref reference
196
+ #
197
+ # @return [RelatonBib::DocumentRelation]
198
+ #
199
+ def create_relation(type, fref)
200
+ return if RELATION_TYPES[type] == false
201
+
202
+ fr = RelatonBib::FormattedRef.new(content: fref)
203
+ bib = IeeeBibliographicItem.new formattedref: fr
204
+ desc = RELATION_TYPES[type][:description]
205
+ description = desc && RelatonBib::FormattedString.new(content: desc, language: "en", script: "Latn")
206
+ RelatonBib::DocumentRelation.new(
207
+ type: RELATION_TYPES[type][:type],
208
+ description: description,
209
+ bibitem: bib,
210
+ )
211
+ end
212
+
213
+ #
214
+ # Read document form BibXML/BibYAML file
215
+ #
216
+ # @param [String] docnumber
217
+ #
218
+ # @return [RelatonIeee::IeeeBibliographicItem]
219
+ #
220
+ def read_bib(docnumber)
221
+ c = File.read file_name(docnumber), encoding: "UTF-8"
222
+ case @format
223
+ when "xml" then XMLParser.from_xml c
224
+ when "bibxml" then BibXMLParser.parse c
225
+ else IeeeBibliographicItem.from_hash YAML.safe_load(c)
226
+ end
227
+ end
228
+ end
229
+ end