relaton-nist 0.9.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,104 +5,14 @@
5
5
  we cannot have a new default namespace: we will end up with a grammar with two different
6
6
  namespaces, one for isostandard and one for csand additions. And we do not want that.
7
7
  -->
8
- <include href="isostandard.rng">
8
+ <include href="isodoc.rng">
9
9
  <start>
10
10
  <ref name="nist-standard"/>
11
11
  </start>
12
- <define name="figure">
13
- <element name="figure">
14
- <attribute name="id">
15
- <data type="ID"/>
16
- </attribute>
17
- <optional>
18
- <ref name="tname"/>
19
- </optional>
20
- <choice>
21
- <ref name="image"/>
22
- <ref name="pre"/>
23
- <oneOrMore>
24
- <ref name="subfigure"/>
25
- </oneOrMore>
26
- </choice>
27
- <zeroOrMore>
28
- <ref name="fn"/>
29
- </zeroOrMore>
30
- <optional>
31
- <ref name="dl"/>
32
- </optional>
33
- <zeroOrMore>
34
- <ref name="note"/>
35
- </zeroOrMore>
36
- </element>
37
- </define>
38
- <define name="subfigure">
39
- <element name="figure">
40
- <attribute name="id">
41
- <data type="ID"/>
42
- </attribute>
43
- <optional>
44
- <ref name="tname"/>
45
- </optional>
46
- <choice>
47
- <ref name="image"/>
48
- <ref name="pre"/>
49
- </choice>
50
- </element>
51
- </define>
52
12
  <define name="DocumentType">
53
13
  <value>standard</value>
54
14
  </define>
55
- <define name="TitleType">
56
- <choice>
57
- <value>alternative</value>
58
- <value>original</value>
59
- <value>unofficial</value>
60
- <value>subtitle</value>
61
- <value>main</value>
62
- </choice>
63
- </define>
64
- <!-- DocRelationType |= "obsoletedBy" | "supersedes" | "supersededBy" -->
65
- <define name="DocRelationType">
66
- <choice>
67
- <value>obsoletes</value>
68
- <value>updates</value>
69
- <value>updatedBy</value>
70
- <value>complements</value>
71
- <value>derivedFrom</value>
72
- <value>translatedFrom</value>
73
- <value>adoptedFrom</value>
74
- <value>equivalent</value>
75
- <value>identical</value>
76
- <value>nonequivalent</value>
77
- <value>includedIn</value>
78
- <value>includes</value>
79
- <value>instance</value>
80
- <value>partOf</value>
81
- <value>hasDraft</value>
82
- <value>obsoletedBy</value>
83
- <value>supersedes</value>
84
- <value>supersededBy</value>
85
- </choice>
86
- </define>
87
- <!-- BibliographicDateType |= "abandoned" | "superseded" -->
88
- <define name="BibliographicDateType">
89
- <choice>
90
- <value>published</value>
91
- <value>accessed</value>
92
- <value>created</value>
93
- <value>implemented</value>
94
- <value>obsoleted</value>
95
- <value>confirmed</value>
96
- <value>updated</value>
97
- <value>issued</value>
98
- <value>transmitted</value>
99
- <value>copied</value>
100
- <value>unchanged</value>
101
- <value>circulated</value>
102
- <value>abandoned</value>
103
- <value>superseded</value>
104
- </choice>
105
- </define>
15
+ <!-- TitleType = ( "alternative" | "original" | "unofficial" | "subtitle" | "main" ) -->
106
16
  <define name="preface">
107
17
  <element name="preface">
108
18
  <optional>
@@ -112,7 +22,10 @@
112
22
  <ref name="foreword"/>
113
23
  </optional>
114
24
  <zeroOrMore>
115
- <ref name="clause"/>
25
+ <choice>
26
+ <ref name="clause"/>
27
+ <ref name="errata_clause"/>
28
+ </choice>
116
29
  </zeroOrMore>
117
30
  <optional>
118
31
  <ref name="reviewernote"/>
@@ -122,62 +35,41 @@
122
35
  </optional>
123
36
  </element>
124
37
  </define>
125
- <define name="annex">
126
- <element name="annex">
127
- <optional>
128
- <attribute name="id">
129
- <data type="ID"/>
130
- </attribute>
131
- </optional>
132
- <optional>
133
- <attribute name="language"/>
134
- </optional>
135
- <optional>
136
- <attribute name="script"/>
137
- </optional>
138
- <optional>
139
- <attribute name="inline-header">
140
- <data type="boolean"/>
141
- </attribute>
142
- </optional>
143
- <optional>
144
- <attribute name="obligation">
145
- <choice>
146
- <value>normative</value>
147
- <value>informative</value>
148
- </choice>
149
- </attribute>
150
- </optional>
151
- <optional>
152
- <ref name="section-title"/>
153
- </optional>
154
- <zeroOrMore>
155
- <!--
156
- allow hanging paragraps in annexes: they introduce lists
157
- ( paragraph-with-footnote | table | note | formula | admonition | ol | ul | dl | figure | quote | sourcecode | review | example )*,
158
- -->
159
- <ref name="BasicBlock"/>
160
- </zeroOrMore>
161
- <zeroOrMore>
162
- <ref name="note"/>
163
- </zeroOrMore>
164
- <choice>
165
- <zeroOrMore>
166
- <ref name="clause-hanging-paragraph-with-footnote"/>
167
- </zeroOrMore>
168
- <ref name="terms"/>
169
- </choice>
38
+ <define name="editorialgroup">
39
+ <element name="editorialgroup">
40
+ <oneOrMore>
41
+ <ref name="committee"/>
42
+ </oneOrMore>
170
43
  </element>
171
44
  </define>
172
45
  <define name="BibDataExtensionType">
173
46
  <optional>
174
47
  <ref name="doctype"/>
175
48
  </optional>
49
+ <optional>
50
+ <ref name="editorialgroup"/>
51
+ </optional>
52
+ <zeroOrMore>
53
+ <ref name="ics"/>
54
+ </zeroOrMore>
176
55
  <optional>
177
56
  <ref name="commentperiod"/>
178
57
  </optional>
179
58
  </define>
180
59
  </include>
60
+ <define name="DocRelationType" combine="choice">
61
+ <choice>
62
+ <value>obsoletedBy</value>
63
+ <value>supersedes</value>
64
+ <value>supersededBy</value>
65
+ </choice>
66
+ </define>
67
+ <define name="BibliographicDateType" combine="choice">
68
+ <choice>
69
+ <value>abandoned</value>
70
+ <value>superseded</value>
71
+ </choice>
72
+ </define>
181
73
  <define name="commentperiod">
182
74
  <element name="commentperiod">
183
75
  <element name="from">
@@ -205,6 +97,71 @@
205
97
  <ref name="Basic-Section"/>
206
98
  </element>
207
99
  </define>
100
+ <define name="committee">
101
+ <element name="committee">
102
+ <text/>
103
+ </element>
104
+ </define>
105
+ <define name="errata">
106
+ <element name="errata">
107
+ <oneOrMore>
108
+ <ref name="erratarow"/>
109
+ </oneOrMore>
110
+ </element>
111
+ </define>
112
+ <define name="erratarow">
113
+ <element name="row">
114
+ <element name="date">
115
+ <ref name="ISO8601Date"/>
116
+ </element>
117
+ <element name="type">
118
+ <text/>
119
+ </element>
120
+ <element name="change">
121
+ <oneOrMore>
122
+ <ref name="TextElement"/>
123
+ </oneOrMore>
124
+ </element>
125
+ <element name="pages">
126
+ <text/>
127
+ </element>
128
+ </element>
129
+ </define>
130
+ <define name="errata_clause">
131
+ <element name="clause">
132
+ <optional>
133
+ <attribute name="id">
134
+ <data type="ID"/>
135
+ </attribute>
136
+ </optional>
137
+ <optional>
138
+ <attribute name="language"/>
139
+ </optional>
140
+ <optional>
141
+ <attribute name="script"/>
142
+ </optional>
143
+ <optional>
144
+ <attribute name="obligation">
145
+ <choice>
146
+ <value>normative</value>
147
+ <value>informative</value>
148
+ </choice>
149
+ </attribute>
150
+ </optional>
151
+ <optional>
152
+ <ref name="section-title"/>
153
+ </optional>
154
+ <group>
155
+ <zeroOrMore>
156
+ <ref name="BasicBlock"/>
157
+ </zeroOrMore>
158
+ <zeroOrMore>
159
+ <ref name="note"/>
160
+ </zeroOrMore>
161
+ </group>
162
+ <ref name="errata"/>
163
+ </element>
164
+ </define>
208
165
  <define name="nist-standard">
209
166
  <element name="nist-standard">
210
167
  <ref name="bibdata"/>
@@ -221,7 +178,9 @@
221
178
  <zeroOrMore>
222
179
  <ref name="annex"/>
223
180
  </zeroOrMore>
224
- <ref name="bibliography"/>
181
+ <optional>
182
+ <ref name="bibliography"/>
183
+ </optional>
225
184
  </element>
226
185
  </define>
227
186
  </grammar>
@@ -111,21 +111,22 @@ module RelatonNist
111
111
  { years: missed_years }
112
112
  end
113
113
 
114
- def fetch_pages(s, n)
115
- workers = RelatonBib::WorkersPool.new n
114
+ def fetch_pages(hits, threads)
115
+ workers = RelatonBib::WorkersPool.new threads
116
116
  workers.worker { |w| { i: w[:i], hit: w[:hit].fetch } }
117
- s.each_with_index { |hit, i| workers << { i: i, hit: hit } }
117
+ hits.each_with_index { |hit, i| workers << { i: i, hit: hit } }
118
118
  workers.end
119
- workers.result.sort { |x, y| x[:i] <=> y[:i] }.map { |x| x[:hit] }
119
+ workers.result.sort_by { |a| a[:i] }.map { |x| x[:hit] }
120
120
  end
121
121
 
122
122
  def nistbib_search_filter(code, year, opts)
123
- docid = code.match(%r{[0-9-]{3,}}).to_s
123
+ idregex = %r{[0-9-]{3,}}
124
+ docid = code.match(idregex).to_s
124
125
  serie = code.match(%r{(FISP|SP|NISTIR)(?=\s)})
125
126
  warn "[relaton-nist] (\"#{code}\") fetching..."
126
127
  result = search(code, year, opts)
127
128
  result.select do |i|
128
- i.hit[:code]&.include?(docid) && (!serie || i.hit[:serie] == serie.to_s)
129
+ i.hit[:code]&.match(idregex).to_s == docid && (!serie || i.hit[:serie] == serie.to_s)
129
130
  end
130
131
  end
131
132
 
@@ -136,8 +137,8 @@ module RelatonNist
136
137
  warn "[relaton-nist] (There was no match for #{year}, though there were matches "\
137
138
  "found for #{missed_years.join(', ')}.)" unless missed_years.empty?
138
139
  if /\d-\d/ =~ code
139
- warn "[relaton-nist] The provided document part may not exist, or the document "\
140
- "may no longer be published in parts."
140
+ warn "[relaton-nist] The provided document part may not exist, "\
141
+ "or the document may no longer be published in parts."
141
142
  end
142
143
  nil
143
144
  end
@@ -16,10 +16,10 @@ module RelatonNist
16
16
  else
17
17
  from_csrs hit_data
18
18
  end
19
- doctype = "standard"
19
+ # doctype = "standard"
20
20
  titles = fetch_titles(hit_data)
21
21
  unless /^(SP|NISTIR|FIPS) / =~ item_data[:docid][0].id
22
- doctype = id_cleanup(item_data[:docid][0].id)
22
+ # doctype = id_cleanup(item_data[:docid][0].id)
23
23
  item_data[:docid][0] = RelatonBib::DocumentIdentifier.new(
24
24
  id: titles[0][:content].upcase, type: "NIST",
25
25
  )
@@ -78,9 +78,9 @@ module RelatonNist
78
78
  # Strip status from doc id
79
79
  # @param id String
80
80
  # @return String
81
- def id_cleanup(id)
82
- id.sub(/ \(WITHDRAWN\)/, "").sub(/ \(([^) ]+ )?DRAFT\)/i, "")
83
- end
81
+ # def id_cleanup(id)
82
+ # id.sub(/ \(WITHDRAWN\)/, "").sub(/ \(([^) ]+ )?DRAFT\)/i, "")
83
+ # end
84
84
 
85
85
  # Get page.
86
86
  # @param path [String] page's path
@@ -304,7 +304,7 @@ module RelatonNist
304
304
  def name_parts(part, lang, script)
305
305
  return [] unless part
306
306
 
307
- [RelatonBib::LocalizedString.new(name[part], lang, script)]
307
+ [RelatonBib::LocalizedString.new(part, lang, script)]
308
308
  end
309
309
 
310
310
  # @param doc [String, Hash]
@@ -325,7 +325,9 @@ module RelatonNist
325
325
  # @param doc [Nokigiri::HTML::Document]
326
326
  # @return [Array<Hash>]
327
327
  def fetch_abstract(doc)
328
- abstract_content = doc.xpath('//div[contains(@class, "pub-abstract-callout")]/div[1]/p').text
328
+ abstract_content = doc.xpath(
329
+ '//div[contains(@class, "pub-abstract-callout")]/div[1]/p',
330
+ ).text
329
331
  [{
330
332
  content: abstract_content,
331
333
  language: "en",
@@ -336,7 +338,7 @@ module RelatonNist
336
338
 
337
339
  # Fetch copyright.
338
340
  # @param doc [Nokogiri::HTL::Document, String]
339
- # @return [Hash]
341
+ # @return [Array<Hash>]
340
342
  def fetch_copyright(doc)
341
343
  name = "National Institute of Standards and Technology"
342
344
  url = "www.nist.gov"
@@ -345,9 +347,11 @@ module RelatonNist
345
347
  doc.at("//span[@id='pub-release-date']").text.strip
346
348
  end
347
349
  from = d.match(/\d{4}/).to_s
348
- { owner: { name: name, abbreviation: "NIST", url: url }, from: from }
350
+ [{ owner: [{ name: name, abbreviation: "NIST", url: url }], from: from }]
349
351
  end
350
352
 
353
+ # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
354
+
351
355
  # Fetch links.
352
356
  # @param doc [Nokogiri::HTML::Document, Hash]
353
357
  # @return [Array<Hash>]
@@ -365,6 +369,7 @@ module RelatonNist
365
369
  links << { type: "doi", content: doi } if doi
366
370
  links
367
371
  end
372
+ # rubocop:enable Metrics/MethodLength
368
373
 
369
374
  # Fetch relations.
370
375
  # @param doc [Nokogiri::HTML::Document]
@@ -382,6 +387,7 @@ module RelatonNist
382
387
  doc_relation "updates", r.text, DOMAIN + r[:href]
383
388
  end
384
389
  end
390
+ # rubocop:enable Metrics/AbcSize
385
391
 
386
392
  def fetch_relations_json(doc)
387
393
  relations = doc["supersedes"].map do |r|
@@ -409,6 +415,8 @@ module RelatonNist
409
415
  )
410
416
  end
411
417
 
418
+ # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
419
+
412
420
  # @param doc [Nokogiri::HTML::Document]
413
421
  # @return [Array<RelatonBib::Series>]
414
422
  def fetch_series(doc)
@@ -418,14 +426,14 @@ module RelatonNist
418
426
  next if s.name == "span"
419
427
 
420
428
  iter = if idx.zero? then "I"
421
- # elsif status == "final" && idx == (series.size - 1) then "F"
422
429
  else idx + 1
423
430
  end
424
431
 
425
432
  content = s.text.match(/^[^\(]+/).to_s.strip.squeeze " "
426
433
 
427
434
  ref = case s.text
428
- when /^Draft/ then content.match(/(?<=Draft\s).+/).to_s + " (#{iter}PD)"
435
+ when /^Draft/
436
+ content.match(/(?<=Draft\s).+/).to_s + " (#{iter}PD)"
429
437
  when /\(Draft\)/ then content + " (#{iter}PD)"
430
438
  else content
431
439
  end
@@ -436,6 +444,7 @@ module RelatonNist
436
444
  RelatonBib::Series.new(formattedref: fref)
437
445
  end.select { |s| s }
438
446
  end
447
+ # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
439
448
 
440
449
  # @param doc [Nokogiri::HTML::Document, Hash]
441
450
  # @return [Array<RelatonNist::Keyword>]
@@ -448,6 +457,7 @@ module RelatonNist
448
457
  kws.map { |kw| kw.is_a?(String) ? kw : kw.text }
449
458
  end
450
459
 
460
+ # rubocop:disable Metrics/AbcSize
451
461
  # @param doc [Nokogiri::HTML::Document]
452
462
  # @return [RelatonNist::CommentPeriod, NilClass]
453
463
  def fetch_commentperiod(doc)
@@ -459,11 +469,13 @@ module RelatonNist
459
469
  d = doc.at("//span[@id='pub-release-date']").text.strip
460
470
  from = Date.strptime(d, "%B %Y").to_s
461
471
 
462
- ex = doc.at "//strong[contains(.,'The comment closing date has been extended to')]"
472
+ ex = doc.at "//strong[contains(.,'The comment closing date has been "\
473
+ "extended to')]"
463
474
  ext = ex&.text&.match(/\w+\s\d{2},\s\d{4}/).to_s
464
475
  extended = ext.empty? ? nil : Date.strptime(ext, "%B %d, %Y")
465
476
  CommentPeriod.new from: from, to: to, extended: extended
466
477
  end
478
+ # rubocop:enable Metrics/AbcSize
467
479
 
468
480
  # @param json [Hash]
469
481
  # @return [RelatonNist::CommentPeriod, NilClass]