relaton-ieee 1.8.0 → 1.9.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f09ad9bd390643d4c3b77249151eda8d29c50a6a2af928cf075bccd91ac19b11
4
- data.tar.gz: 12ea887aefc3f6e9c486391277deb04bb978171838be04a7ad3605c1774ed290
3
+ metadata.gz: f5ea9caf8eaf59f616cad6b137ff46fbfd0f28d38ad92c457467a3d19032d2a7
4
+ data.tar.gz: 88b376e90082b8a6bb730e5a07b96930b292c092ece0ce82850a50a9d9cdeb0e
5
5
  SHA512:
6
- metadata.gz: 348bd013d0b3c7ca93cc1bf03d1b39c4814980832803fb97049d356dd7989e23407bb270e7d32e93d936e41930149ac55ad7e7d6d27cc4a7510c0ed563f2dbc0
7
- data.tar.gz: d30bbe7e3c1ebb21414b7a148b3851892e62f64bcadbd3af0d0267ab898cb75dfa9af28def3295cb13740ddbd49e1e13435d4a455128c423c4d1b074ce828c1b
6
+ metadata.gz: c5eeb5339a3d598da5509de3ffc2bb9231694f9a9edc6603b40cb2634ded16f6ebee01c0ffb222b624426075c7dfd28382bdfac428077097821215a017680f7f
7
+ data.tar.gz: 9d8ee0f3c616d91d53ce9bfd54ceaf4a5cd458ac78471e69f7179daf6f5cf225681dadfe409011012f8c4e987e11f87e27a7e82b777aff2138873be5761aa617
@@ -16,19 +16,9 @@ jobs:
16
16
  strategy:
17
17
  fail-fast: false
18
18
  matrix:
19
- ruby: [ '2.7', '2.6', '2.5', '2.4' ]
19
+ ruby: [ '3.0', '2.7', '2.6', '2.5' ]
20
20
  os: [ ubuntu-latest, windows-latest, macos-latest ]
21
21
  experimental: [ false ]
22
- include:
23
- - ruby: '3.0'
24
- os: 'ubuntu-latest'
25
- experimental: true
26
- - ruby: '3.0'
27
- os: 'windows-latest'
28
- experimental: true
29
- - ruby: '3.0'
30
- os: 'macos-latest'
31
- experimental: true
32
22
  steps:
33
23
  - uses: actions/checkout@v2
34
24
  with:
data/.gitignore CHANGED
@@ -11,4 +11,5 @@
11
11
  .rspec_status
12
12
  .rubocop-https---raw-githubusercontent-com-riboseinc-oss-guides-master-ci-rubocop-yml
13
13
  .vscode/
14
+ ieee-rawbib/
14
15
  Gemfile.lock
data/.rubocop.yml CHANGED
@@ -5,6 +5,6 @@
5
5
  inherit_from:
6
6
  - https://raw.githubusercontent.com/riboseinc/oss-guides/master/ci/rubocop.yml
7
7
  AllCops:
8
- TargetRubyVersion: 2.4
8
+ TargetRubyVersion: 2.5
9
9
  Rails:
10
10
  Enabled: false
data/README.adoc CHANGED
@@ -138,6 +138,16 @@ RelatonIeee::IeeeBibliography.get("IEEE 528-2019")
138
138
  ...
139
139
  ----
140
140
 
141
+ === Typed links
142
+
143
+ Each IEEE document has `src` type link.
144
+
145
+ [source,ruby]
146
+ ----
147
+ item.link
148
+ => [#<RelatonBib::TypedUri:0x00007fe885219ba0 @content=#<Addressable::URI:0x8ac URI:https://standards.ieee.org/standard/528-2019.html>, @type="src">]
149
+ ----
150
+
141
151
  === Create bibliographic item from XML
142
152
  [source,ruby]
143
153
  ----
@@ -158,6 +168,25 @@ hash = YAML.load_file 'spec/fixtures/ieee_528_2019.yaml'
158
168
  ...
159
169
  ----
160
170
 
171
+ === Fetch data
172
+
173
+ There is an IEEE dataset https://github.com/ietf-ribose/ieee-rawbib which can be converted into BibXML/BibYAML formats. The dataset needs to be placed into local directiory.
174
+
175
+ The method `RelatonIeee::DataFetcher.fetch(output: "data", format: "yaml")` converts all the documents from the local `ieee-rawbib` directory and save them to the `./data` folder in YAML format.
176
+ Arguments:
177
+
178
+ - `output` - folder to save documents (default './data').
179
+ - `format` - format in which the documents are saved. Possimle formats are: `yaml`, `xml` (default `yaml`).
180
+
181
+ [source,ruby]
182
+ ----
183
+ RelatonIeee::DataFetcher.fetch
184
+ Started at: 2021-09-24 17:55:07 +0200
185
+ Stopped at: 2021-09-24 17:57:30 +0200
186
+ Done in: 143 sec.
187
+ => nil
188
+ ----
189
+
161
190
  == Development
162
191
 
163
192
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
data/grammars/biblio.rng CHANGED
@@ -787,6 +787,7 @@
787
787
  <value>adapted</value>
788
788
  <value>vote-started</value>
789
789
  <value>vote-ended</value>
790
+ <value>announced</value>
790
791
  </choice>
791
792
  </define>
792
793
  <define name="bdate">
data/grammars/isodoc.rng CHANGED
@@ -45,6 +45,11 @@
45
45
  <optional>
46
46
  <attribute name="alt"/>
47
47
  </optional>
48
+ <optional>
49
+ <attribute name="updatetype">
50
+ <data type="boolean"/>
51
+ </attribute>
52
+ </optional>
48
53
  <text/>
49
54
  </element>
50
55
  </define>
@@ -199,6 +204,18 @@
199
204
  </zeroOrMore>
200
205
  </element>
201
206
  </define>
207
+ <define name="dt">
208
+ <element name="dt">
209
+ <optional>
210
+ <attribute name="id">
211
+ <data type="ID"/>
212
+ </attribute>
213
+ </optional>
214
+ <zeroOrMore>
215
+ <ref name="TextElement"/>
216
+ </zeroOrMore>
217
+ </element>
218
+ </define>
202
219
  <define name="example">
203
220
  <element name="example">
204
221
  <attribute name="id">
@@ -543,6 +560,9 @@
543
560
  </define>
544
561
  <define name="BibDataExtensionType">
545
562
  <ref name="doctype"/>
563
+ <optional>
564
+ <ref name="docsubtype"/>
565
+ </optional>
546
566
  <optional>
547
567
  <ref name="editorialgroup"/>
548
568
  </optional>
@@ -890,6 +910,14 @@
890
910
  </define>
891
911
  </include>
892
912
  <!-- end overrides -->
913
+ <define name="docsubtype">
914
+ <element name="subdoctype">
915
+ <ref name="DocumentSubtype"/>
916
+ </element>
917
+ </define>
918
+ <define name="DocumentSubtype">
919
+ <text/>
920
+ </define>
893
921
  <define name="colgroup">
894
922
  <element name="colgroup">
895
923
  <oneOrMore>
@@ -939,7 +967,34 @@
939
967
  <define name="concept">
940
968
  <element name="concept">
941
969
  <optional>
942
- <attribute name="term"/>
970
+ <attribute name="ital">
971
+ <data type="boolean"/>
972
+ </attribute>
973
+ </optional>
974
+ <optional>
975
+ <attribute name="ref">
976
+ <data type="boolean"/>
977
+ </attribute>
978
+ </optional>
979
+ <optional>
980
+ <element name="refterm">
981
+ <zeroOrMore>
982
+ <choice>
983
+ <ref name="PureTextElement"/>
984
+ <ref name="stem"/>
985
+ </choice>
986
+ </zeroOrMore>
987
+ </element>
988
+ </optional>
989
+ <optional>
990
+ <element name="renderterm">
991
+ <zeroOrMore>
992
+ <choice>
993
+ <ref name="PureTextElement"/>
994
+ <ref name="stem"/>
995
+ </choice>
996
+ </zeroOrMore>
997
+ </element>
943
998
  </optional>
944
999
  <choice>
945
1000
  <ref name="eref"/>
@@ -965,6 +1020,9 @@
965
1020
  </attribute>
966
1021
  <attribute name="name"/>
967
1022
  <attribute name="action"/>
1023
+ <optional>
1024
+ <attribute name="class"/>
1025
+ </optional>
968
1026
  <zeroOrMore>
969
1027
  <choice>
970
1028
  <ref name="TextElement"/>
@@ -1191,13 +1249,17 @@
1191
1249
  </define>
1192
1250
  <define name="IsoWorkgroup">
1193
1251
  <optional>
1194
- <attribute name="number">
1195
- <data type="int"/>
1196
- </attribute>
1252
+ <attribute name="number"/>
1197
1253
  </optional>
1198
1254
  <optional>
1199
1255
  <attribute name="type"/>
1200
1256
  </optional>
1257
+ <optional>
1258
+ <attribute name="identifier"/>
1259
+ </optional>
1260
+ <optional>
1261
+ <attribute name="prefix"/>
1262
+ </optional>
1201
1263
  <text/>
1202
1264
  </define>
1203
1265
  <define name="ics">
@@ -1459,26 +1521,26 @@
1459
1521
  <optional>
1460
1522
  <ref name="section-title"/>
1461
1523
  </optional>
1462
- <group>
1524
+ <choice>
1463
1525
  <choice>
1464
1526
  <group>
1465
- <zeroOrMore>
1527
+ <oneOrMore>
1466
1528
  <ref name="BasicBlock"/>
1467
- </zeroOrMore>
1529
+ </oneOrMore>
1468
1530
  <zeroOrMore>
1469
1531
  <ref name="note"/>
1470
1532
  </zeroOrMore>
1471
1533
  </group>
1472
1534
  <ref name="amend"/>
1473
1535
  </choice>
1474
- <zeroOrMore>
1536
+ <oneOrMore>
1475
1537
  <choice>
1476
1538
  <ref name="clause-subsection"/>
1477
1539
  <ref name="terms"/>
1478
1540
  <ref name="definitions"/>
1479
1541
  </choice>
1480
- </zeroOrMore>
1481
- </group>
1542
+ </oneOrMore>
1543
+ </choice>
1482
1544
  </define>
1483
1545
  <define name="Annex-Section">
1484
1546
  <optional>
@@ -0,0 +1,14 @@
1
+ module RelatonIeee
2
+ module BibXMLParser
3
+ extend RelatonBib::BibXMLParser
4
+ extend BibXMLParser
5
+
6
+ FLAVOR = "IEEE".freeze
7
+
8
+ # @param attrs [Hash]
9
+ # @return [RelatonBib::IetfBibliographicItem]
10
+ def bib_item(**attrs)
11
+ IeeeBibliographicItem.new(**attrs)
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,229 @@
1
+ require "zip"
2
+ require "relaton_ieee/data_parser"
3
+ require "relaton_ieee/rawbib_id_parser"
4
+
5
+ module RelatonIeee
6
+ class DataFetcher
7
+ RELATION_TYPES = {
8
+ "S" => { type: "obsoletedBy" },
9
+ "V" => { type: "updates", description: "revises" },
10
+ "T" => { type: "updates", description: "amends" },
11
+ "C" => { type: "updates", description: "corrects" },
12
+ "O" => { type: "adoptedFrom" },
13
+ "P" => { type: "complementOf", description: "supplement" },
14
+ "N" => false, "G" => false,
15
+ "F" => false, "I" => false,
16
+ "E" => false, "B" => false, "W" => false
17
+ }.freeze
18
+
19
+ # @return [Hash] list of AMSID => PubID
20
+ attr_reader :backrefs
21
+
22
+ #
23
+ # Create RelatonIeee::DataFetcher instance
24
+ #
25
+ # @param [String] output output dir
26
+ # @param [Strong] format output format. Allowed values: "yaml" or "xml"
27
+ #
28
+ def initialize(output, format)
29
+ @output = output
30
+ @format = format
31
+ @ext = format.sub(/^bib/, "")
32
+ @crossrefs = {}
33
+ @backrefs = {}
34
+ # @normtitles = []
35
+ end
36
+
37
+ #
38
+ # Convert documents from `ieee-rawbib` dir (IEEE dataset) to BibYAML/BibXML
39
+ #
40
+ # @param [String] output ('data') output dir
41
+ # @param [String] format ('yaml') output format.
42
+ # Allowed values: "yaml" or "xml"
43
+ #
44
+ def self.fetch(output: "data", format: "yaml")
45
+ t1 = Time.now
46
+ puts "Started at: #{t1}"
47
+ FileUtils.mkdir_p output unless Dir.exist? output
48
+ new(output, format).fetch
49
+ t2 = Time.now
50
+ puts "Stopped at: #{t2}"
51
+ puts "Done in: #{(t2 - t1).round} sec."
52
+ end
53
+
54
+ #
55
+ # Convert documents from `ieee-rawbib` dir (IEEE dataset) to BibYAML/BibXML
56
+ #
57
+ def fetch # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
58
+ Dir["ieee-rawbib/**/*.{xml,zip}"].reject { |f| f["Deleted_"] }.each do |f|
59
+ xml = case File.extname(f)
60
+ when ".zip" then read_zip f
61
+ when ".xml" then File.read f, encoding: "UTF-8"
62
+ end
63
+ fetch_doc xml, f
64
+ rescue StandardError => e
65
+ warn "File: #{f}"
66
+ warn e.message
67
+ warn e.backtrace
68
+ end
69
+ # File.write "normtitles.txt", @normtitles.join("\n")
70
+ update_relations
71
+ end
72
+
73
+ #
74
+ # Extract XML file from zip archive
75
+ #
76
+ # @param [String] file path to achive
77
+ #
78
+ # @return [String] file content
79
+ #
80
+ def read_zip(file)
81
+ Zip::File.open(file) do |zf|
82
+ entry = zf.glob("**/*.xml").first
83
+ entry.get_input_stream.read
84
+ end
85
+ end
86
+
87
+ #
88
+ # Parse document and save it
89
+ #
90
+ # @param [String] xml content
91
+ # @param [String] filename source file
92
+ #
93
+ def fetch_doc(xml, filename) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
94
+ doc = Nokogiri::XML(xml).at("/publication")
95
+ unless doc
96
+ warn "Empty file: #{filename}"
97
+ return
98
+ end
99
+ stdid = doc.at("./publicationinfo/standard_id").text
100
+ if stdid == "0"
101
+ # nt = doc&.at("./normtitle")&.text
102
+ # ntid = @normtitles.index nt
103
+ # @normtitles << nt if nt && !ntid
104
+ warn "Zero standard_id in #{filename}"
105
+ return
106
+ end
107
+ bib = DataParser.parse doc, self
108
+ if bib.docnumber.nil?
109
+ nt = doc&.at("./normtitle")&.text
110
+ warn "PubID parse error. Normtitle: #{nt}, file: #{filename}"
111
+ return
112
+ end
113
+ amsid = doc.at("./publicationinfo/amsid").text
114
+ if backrefs.value?(bib.docidentifier[0].id) && /updates\.\d+/ !~ filename
115
+ oamsid = backrefs.key bib.docidentifier[0].id
116
+ warn "Document exists ID: \"#{bib.docidentifier[0].id}\" AMSID: "\
117
+ "\"#{amsid}\" source: \"#{filename}\". Other AMSID: \"#{oamsid}\""
118
+ if bib.docidentifier[0].id.include?(doc.at("./publicationinfo/stdnumber").text)
119
+ save_doc bib # rewrite file if the PubID matches to the stdnumber
120
+ backrefs[amsid] = bib.docidentifier[0].id
121
+ end
122
+ else
123
+ save_doc bib
124
+ backrefs[amsid] = bib.docidentifier[0].id
125
+ end
126
+ end
127
+
128
+ #
129
+ # Save unresolved relation reference
130
+ #
131
+ # @param [String] docnumber of main document
132
+ # @param [Nokogiri::XML::Element] amsid relation data
133
+ #
134
+ def add_crossref(docnumber, amsid)
135
+ return if RELATION_TYPES[amsid[:type]] == false
136
+
137
+ ref = { amsid: amsid.text, type: amsid[:type] }
138
+ if @crossrefs[docnumber]
139
+ @crossrefs[docnumber] << ref
140
+ else @crossrefs[docnumber] = [ref]
141
+ end
142
+ end
143
+
144
+ #
145
+ # Save document to file
146
+ #
147
+ # @param [RelatonIeee::IeeeBibliographicItem] bib
148
+ #
149
+ def save_doc(bib)
150
+ c = case @format
151
+ when "xml" then bib.to_xml(bibdata: true)
152
+ when "yaml" then bib.to_hash.to_yaml
153
+ else bib.send("to_#{@format}")
154
+ end
155
+ File.write file_name(bib.docnumber), c, encoding: "UTF-8"
156
+ end
157
+
158
+ #
159
+ # Make filename from PubID
160
+ #
161
+ # @param [String] docnumber
162
+ #
163
+ # @return [String] filename
164
+ #
165
+ def file_name(docnumber)
166
+ name = docnumber.gsub(/\s-/, "-").gsub(/[\s,:\/]/, "_").squeeze("_").upcase
167
+ File.join @output, "#{name}.#{@ext}"
168
+ end
169
+
170
+ #
171
+ # Update unresoverd relations
172
+ #
173
+ def update_relations # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
174
+ @crossrefs.each do |dnum, rfs|
175
+ bib = nil
176
+ rfs.each do |rf|
177
+ if backrefs[rf[:amsid]]
178
+ rel = create_relation(rf[:type], backrefs[rf[:amsid]])
179
+ if rel
180
+ bib ||= read_bib(dnum)
181
+ bib.relation << rel
182
+ save_doc bib
183
+ end
184
+ else
185
+ warn "Unresolved relation: '#{rf[:amsid]}' type: '#{rf[:type]}' for '#{dnum}'"
186
+ end
187
+ end
188
+ end
189
+ end
190
+
191
+ #
192
+ # Create relation instance
193
+ #
194
+ # @param [String] type IEEE relation type
195
+ # @param [String] fref reference
196
+ #
197
+ # @return [RelatonBib::DocumentRelation]
198
+ #
199
+ def create_relation(type, fref)
200
+ return if RELATION_TYPES[type] == false
201
+
202
+ fr = RelatonBib::FormattedRef.new(content: fref)
203
+ bib = IeeeBibliographicItem.new formattedref: fr
204
+ desc = RELATION_TYPES[type][:description]
205
+ description = desc && RelatonBib::FormattedString.new(content: desc, language: "en", script: "Latn")
206
+ RelatonBib::DocumentRelation.new(
207
+ type: RELATION_TYPES[type][:type],
208
+ description: description,
209
+ bibitem: bib,
210
+ )
211
+ end
212
+
213
+ #
214
+ # Read document form BibXML/BibYAML file
215
+ #
216
+ # @param [String] docnumber
217
+ #
218
+ # @return [RelatonIeee::IeeeBibliographicItem]
219
+ #
220
+ def read_bib(docnumber)
221
+ c = File.read file_name(docnumber), encoding: "UTF-8"
222
+ case @format
223
+ when "xml" then XMLParser.from_xml c
224
+ when "bibxml" then BibXMLParser.parse c
225
+ else IeeeBibliographicItem.from_hash YAML.safe_load(c)
226
+ end
227
+ end
228
+ end
229
+ end