pubid-nist 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.txt +25 -0
- data/README.adoc +727 -0
- data/exe/pubid-nist +97 -0
- data/lib/pubid/nist/document.rb +286 -0
- data/lib/pubid/nist/document_parser.rb +34 -0
- data/lib/pubid/nist/document_transform.rb +21 -0
- data/lib/pubid/nist/edition.rb +26 -0
- data/lib/pubid/nist/errors.rb +5 -0
- data/lib/pubid/nist/nist_tech_pubs.rb +110 -0
- data/lib/pubid/nist/parsers/default.rb +107 -0
- data/lib/pubid/nist/parsers/fips_pub.rb +8 -0
- data/lib/pubid/nist/parsers/nbs_bh.rb +6 -0
- data/lib/pubid/nist/parsers/nbs_circ.rb +21 -0
- data/lib/pubid/nist/parsers/nbs_crpl.rb +17 -0
- data/lib/pubid/nist/parsers/nbs_csm.rb +11 -0
- data/lib/pubid/nist/parsers/nbs_fips.rb +14 -0
- data/lib/pubid/nist/parsers/nbs_hb.rb +31 -0
- data/lib/pubid/nist/parsers/nbs_ir.rb +29 -0
- data/lib/pubid/nist/parsers/nbs_lc.rb +21 -0
- data/lib/pubid/nist/parsers/nbs_mn.rb +6 -0
- data/lib/pubid/nist/parsers/nbs_mp.rb +9 -0
- data/lib/pubid/nist/parsers/nbs_rpt.rb +13 -0
- data/lib/pubid/nist/parsers/nbs_sp.rb +13 -0
- data/lib/pubid/nist/parsers/nbs_tn.rb +9 -0
- data/lib/pubid/nist/parsers/nist_gcr.rb +14 -0
- data/lib/pubid/nist/parsers/nist_hb.rb +8 -0
- data/lib/pubid/nist/parsers/nist_ir.rb +29 -0
- data/lib/pubid/nist/parsers/nist_ncstar.rb +7 -0
- data/lib/pubid/nist/parsers/nist_owmwp.rb +11 -0
- data/lib/pubid/nist/parsers/nist_sp.rb +53 -0
- data/lib/pubid/nist/parsers/nist_tn.rb +13 -0
- data/lib/pubid/nist/publisher.rb +43 -0
- data/lib/pubid/nist/serie.rb +27 -0
- data/lib/pubid/nist/stage.rb +28 -0
- data/lib/pubid/nist/version.rb +5 -0
- data/lib/pubid/nist.rb +33 -0
- data/lib/pubid-nist.rb +3 -0
- data/publishers.yaml +6 -0
- data/series.yaml +139 -0
- data/stages.yaml +5 -0
- data/update_codes.yaml +58 -0
- metadata +213 -0
data/exe/pubid-nist
ADDED
@@ -0,0 +1,97 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require_relative "../lib/pubid-nist"
|
5
|
+
require "thor"
|
6
|
+
require "csv"
|
7
|
+
require "lightly"
|
8
|
+
|
9
|
+
def render_report_doc(doc)
|
10
|
+
[doc[:finalPubId] != doc[:id],
|
11
|
+
doc[:finalPubId],
|
12
|
+
doc[:id],
|
13
|
+
doc[:mr] != doc[:doi],
|
14
|
+
doc[:mr],
|
15
|
+
doc[:doi],
|
16
|
+
doc[:title]]
|
17
|
+
end
|
18
|
+
|
19
|
+
class Pubid::Nist::CLI < Thor
|
20
|
+
desc "report", "Create report for NIST Tech Pubs database (fetches from GitHub)"
|
21
|
+
option :csv, aliases: "-c", type: :boolean, desc: "Export to CSV format"
|
22
|
+
option :updated, aliases: "-u", type: :boolean,
|
23
|
+
desc: "Return only updated identifiers",
|
24
|
+
default: false
|
25
|
+
option :pull, aliases: "-p", type: :boolean,
|
26
|
+
desc: "Update cache from NIST Tech Pubs database",
|
27
|
+
default: false
|
28
|
+
def report
|
29
|
+
heading = %w(
|
30
|
+
ID\ changed?
|
31
|
+
New\ PubID
|
32
|
+
Document\ ID
|
33
|
+
DOI\ changed?
|
34
|
+
New\ PubID-MR
|
35
|
+
DOI
|
36
|
+
Title
|
37
|
+
)
|
38
|
+
|
39
|
+
Lightly.clear "documents" if options[:pull]
|
40
|
+
# Pubid::Nist::NistTechPubs.fetch
|
41
|
+
if Lightly.cached? "documents"
|
42
|
+
warn "Using nist-tech-pubs.xml file from local cache"
|
43
|
+
else
|
44
|
+
warn "Cached nist-tech-pubs.xml not present, downloading from GitHub..."
|
45
|
+
end
|
46
|
+
|
47
|
+
puts options[:csv] && heading.to_csv || heading.join(" | ")
|
48
|
+
|
49
|
+
documents = Pubid::Nist::NistTechPubs.status
|
50
|
+
|
51
|
+
documents = documents.reject { |doc| doc[:finalPubId] == doc[:id] } if options[:updated]
|
52
|
+
|
53
|
+
documents.each do |doc|
|
54
|
+
if options[:csv]
|
55
|
+
puts render_report_doc(doc).to_csv
|
56
|
+
else
|
57
|
+
puts (render_report_doc(doc).map do |v|
|
58
|
+
case v
|
59
|
+
when false
|
60
|
+
" -"
|
61
|
+
when true
|
62
|
+
"✅"
|
63
|
+
else
|
64
|
+
v
|
65
|
+
end
|
66
|
+
end).join(" | ")
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
desc "convert", "Convert legacy NIST Tech Pubs ID to NIST PubID"
|
72
|
+
option :style, aliases: "-s", type: :string,
|
73
|
+
desc: "Convert to PubID style (short|long|mr|abbrev)",
|
74
|
+
default: "short"
|
75
|
+
option :format, aliases: "-f", type: :string,
|
76
|
+
desc: "Render in format (json|string)",
|
77
|
+
default: "string"
|
78
|
+
def convert(code)
|
79
|
+
unless %w[mr long short abbrev].include?(options[:style].downcase)
|
80
|
+
raise "Invalid PubID style"
|
81
|
+
end
|
82
|
+
|
83
|
+
raise "Invalid render format" unless %w[string json].include? options[:format].downcase
|
84
|
+
|
85
|
+
unless code.empty?
|
86
|
+
if options[:format] == "string"
|
87
|
+
puts Pubid::Nist::Document.parse(code).to_s(options[:style].to_sym)
|
88
|
+
else
|
89
|
+
puts Pubid::Nist::Document.parse(code).to_json
|
90
|
+
end
|
91
|
+
end
|
92
|
+
rescue Pubid::Nist::Errors::ParseError
|
93
|
+
puts "[Error] This does not seem to be a valid NIST Tech Pubs legacy identifier"
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
Pubid::Nist::CLI.start(ARGV)
|
@@ -0,0 +1,286 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "json"
|
4
|
+
|
5
|
+
UPDATE_CODES = YAML.load_file(File.join(File.dirname(__FILE__), "../../../update_codes.yaml"))
|
6
|
+
|
7
|
+
REVISION_DESC = {
|
8
|
+
long: ", Revision ",
|
9
|
+
abbrev: ", Rev. ",
|
10
|
+
short: "r",
|
11
|
+
mr: "r",
|
12
|
+
}.freeze
|
13
|
+
|
14
|
+
VERSION_DESC = {
|
15
|
+
long: ", Version ",
|
16
|
+
abbrev: ", Ver. ",
|
17
|
+
short: "ver",
|
18
|
+
mr: "ver",
|
19
|
+
}.freeze
|
20
|
+
|
21
|
+
VOLUME_DESC = {
|
22
|
+
long: ", Volume ",
|
23
|
+
abbrev: ", Vol. ",
|
24
|
+
short: "v",
|
25
|
+
mr: "v",
|
26
|
+
}.freeze
|
27
|
+
|
28
|
+
PART_DESC = {
|
29
|
+
long: " Part ",
|
30
|
+
abbrev: " Pt. ",
|
31
|
+
short: "pt",
|
32
|
+
mr: "pt",
|
33
|
+
}.freeze
|
34
|
+
|
35
|
+
EDITION_DESC = {
|
36
|
+
long: " Edition ",
|
37
|
+
abbrev: " Ed. ",
|
38
|
+
short: "e",
|
39
|
+
mr: "e",
|
40
|
+
}.freeze
|
41
|
+
|
42
|
+
SUPPLEMENT_DESC = {
|
43
|
+
long: " Supplement ",
|
44
|
+
abbrev: " Suppl. ",
|
45
|
+
short: "sup",
|
46
|
+
mr: "sup",
|
47
|
+
}.freeze
|
48
|
+
|
49
|
+
SECTION_DESC = {
|
50
|
+
long: " Section ",
|
51
|
+
abbrev: " Sec. ",
|
52
|
+
short: "sec",
|
53
|
+
mr: "sec",
|
54
|
+
}.freeze
|
55
|
+
|
56
|
+
APPENDIX_DESC = {
|
57
|
+
long: " Appendix ",
|
58
|
+
abbrev: " App. ",
|
59
|
+
short: "app",
|
60
|
+
mr: "app",
|
61
|
+
}.freeze
|
62
|
+
|
63
|
+
ERRATA_DESC = {
|
64
|
+
long: " Errata ",
|
65
|
+
abbrev: " Err. ",
|
66
|
+
short: "err",
|
67
|
+
mr: "err",
|
68
|
+
}.freeze
|
69
|
+
|
70
|
+
INDEX_DESC = {
|
71
|
+
long: " Index ",
|
72
|
+
abbrev: " Index. ",
|
73
|
+
short: "indx",
|
74
|
+
mr: "indx",
|
75
|
+
}.freeze
|
76
|
+
|
77
|
+
INSERT_DESC = {
|
78
|
+
long: " Insert ",
|
79
|
+
abbrev: " Ins. ",
|
80
|
+
short: "ins",
|
81
|
+
mr: "ins",
|
82
|
+
}.freeze
|
83
|
+
|
84
|
+
module Pubid::Nist
|
85
|
+
class Document
|
86
|
+
attr_accessor :serie, :code, :revision, :publisher, :version, :volume,
|
87
|
+
:part, :addendum, :stage, :translation, :update_number,
|
88
|
+
:edition, :supplement, :update_year, :update_month,
|
89
|
+
:section, :appendix, :errata, :index, :insert
|
90
|
+
|
91
|
+
def initialize(publisher:, serie:, docnumber:, stage: nil, supplement: nil,
|
92
|
+
edition_month: nil, edition_year: nil, edition_day: nil, **opts)
|
93
|
+
@publisher = publisher
|
94
|
+
@serie = serie
|
95
|
+
@code = docnumber
|
96
|
+
@stage = Stage.new(stage.to_s) if stage
|
97
|
+
@supplement = (supplement.is_a?(Array) && "") || supplement
|
98
|
+
@edition = parse_edition(edition_month, edition_year, edition_day) if edition_month || edition_year
|
99
|
+
opts.each { |key, value| send("#{key}=", value.to_s) }
|
100
|
+
end
|
101
|
+
|
102
|
+
def parse_edition(edition_month, edition_year, edition_day)
|
103
|
+
if edition_month
|
104
|
+
date = Date.parse("#{edition_day || '01'}/#{edition_month}/#{edition_year}")
|
105
|
+
if edition_day
|
106
|
+
Edition.new(month: date.month, year: date.year, day: date.day)
|
107
|
+
else
|
108
|
+
Edition.new(month: date.month, year: date.year)
|
109
|
+
end
|
110
|
+
else
|
111
|
+
Edition.new(year: edition_year.to_i)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
# returns weight based on amount of defined attributes
|
116
|
+
def weight
|
117
|
+
instance_variables.inject(0) do |sum, var|
|
118
|
+
sum + (instance_variable_get(var).nil? ? 0 : 1)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
def ==(other)
|
123
|
+
other.instance_variables.each do |var|
|
124
|
+
return false if instance_variable_get(var) != other.instance_variable_get(var)
|
125
|
+
end
|
126
|
+
true
|
127
|
+
end
|
128
|
+
|
129
|
+
def merge(document)
|
130
|
+
document.instance_variables.each do |var|
|
131
|
+
val = document.instance_variable_get(var)
|
132
|
+
current_val = instance_variable_get(var)
|
133
|
+
if [:@serie, :@publisher].include?(var) ||
|
134
|
+
(val && current_val.nil?) ||
|
135
|
+
(val && current_val.to_s.length < val.to_s.length)
|
136
|
+
instance_variable_set(var, val)
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
self
|
141
|
+
end
|
142
|
+
|
143
|
+
def self.update_old_code(code)
|
144
|
+
UPDATE_CODES.each do |from, to|
|
145
|
+
code = code.gsub(from, to)
|
146
|
+
end
|
147
|
+
code
|
148
|
+
end
|
149
|
+
|
150
|
+
def self.parse(code)
|
151
|
+
code = update_old_code(code)
|
152
|
+
DocumentTransform.new.apply(DocumentParser.new.parse(code))
|
153
|
+
rescue Parslet::ParseFailed => failure
|
154
|
+
raise Pubid::Nist::Errors::ParseError, "#{failure.message}\ncause: #{failure.parse_failure_cause.ascii_tree}"
|
155
|
+
end
|
156
|
+
|
157
|
+
def to_s(format = :short)
|
158
|
+
result = render_serie(format)
|
159
|
+
result += " " unless format == :short || stage.nil?
|
160
|
+
result += "#{stage&.to_s(format)}"\
|
161
|
+
" #{code}#{render_part(format)}#{render_edition(format)}"\
|
162
|
+
"#{render_localities(format)}"\
|
163
|
+
"#{render_update(format)}#{render_translation(format)}"
|
164
|
+
result = render_addendum(result, format)
|
165
|
+
|
166
|
+
return result.gsub(" ", ".") if format == :mr
|
167
|
+
|
168
|
+
result
|
169
|
+
end
|
170
|
+
|
171
|
+
def to_json(*args)
|
172
|
+
result = {
|
173
|
+
styles: {
|
174
|
+
short: to_s(:short),
|
175
|
+
abbrev: to_s(:abbrev),
|
176
|
+
long: to_s(:long),
|
177
|
+
mr: to_s(:mr),
|
178
|
+
}
|
179
|
+
}
|
180
|
+
|
181
|
+
instance_variables.each do |var|
|
182
|
+
val = instance_variable_get(var)
|
183
|
+
result[var.to_s.gsub('@', '')] = val unless val.nil?
|
184
|
+
end
|
185
|
+
result.to_json(*args)
|
186
|
+
end
|
187
|
+
|
188
|
+
def render_serie(format)
|
189
|
+
if serie.to_s(format).include?(publisher.to_s(format))
|
190
|
+
return serie.to_s(format)
|
191
|
+
end
|
192
|
+
|
193
|
+
"#{publisher.to_s(format)} #{serie.to_s(format)}"
|
194
|
+
end
|
195
|
+
|
196
|
+
def render_part(format)
|
197
|
+
result = ""
|
198
|
+
result += "#{VOLUME_DESC[format]}#{volume}" unless volume.nil?
|
199
|
+
result += "#{PART_DESC[format]}#{part}" unless part.nil?
|
200
|
+
result
|
201
|
+
end
|
202
|
+
|
203
|
+
def render_edition(format)
|
204
|
+
result = ""
|
205
|
+
|
206
|
+
result += "#{EDITION_DESC[format]}#{edition.to_s}" unless edition.nil?
|
207
|
+
result += "#{REVISION_DESC[format]}#{revision == '' ? '1' : revision}" if revision
|
208
|
+
result += "#{VERSION_DESC[format]}#{version}" unless version.nil?
|
209
|
+
result
|
210
|
+
end
|
211
|
+
|
212
|
+
def render_localities(format)
|
213
|
+
result = ""
|
214
|
+
result += "#{SUPPLEMENT_DESC[format]}#{supplement}" unless supplement.nil?
|
215
|
+
result += "#{SECTION_DESC[format]}#{section}" unless section.nil?
|
216
|
+
result += "#{APPENDIX_DESC[format]}" unless appendix.nil?
|
217
|
+
result += "#{ERRATA_DESC[format]}" unless errata.nil?
|
218
|
+
result += INDEX_DESC[format] unless index.nil?
|
219
|
+
result += INSERT_DESC[format] unless insert.nil?
|
220
|
+
|
221
|
+
result
|
222
|
+
end
|
223
|
+
|
224
|
+
def render_update(format)
|
225
|
+
return "" if update_year.nil?
|
226
|
+
|
227
|
+
if update_month && update_number.nil?
|
228
|
+
@update_number = "1"
|
229
|
+
end
|
230
|
+
|
231
|
+
if update_year&.length == 2
|
232
|
+
@update_year = "19#{update_year}"
|
233
|
+
end
|
234
|
+
|
235
|
+
if update_number.match?(/\d+/)
|
236
|
+
update_text = update_number
|
237
|
+
update_text += "-#{update_year}" if update_year && !update_year.empty?
|
238
|
+
if update_month
|
239
|
+
date = Date.parse("01/#{update_month}/#{update_year}")
|
240
|
+
update_text += sprintf("%02d", date.month)
|
241
|
+
end
|
242
|
+
else
|
243
|
+
update_text = "1"
|
244
|
+
end
|
245
|
+
|
246
|
+
case format
|
247
|
+
when :long
|
248
|
+
" Update #{update_text}"
|
249
|
+
when :abbrev
|
250
|
+
" Upd. #{update_text}"
|
251
|
+
when :short
|
252
|
+
"/Upd#{update_text}"
|
253
|
+
when :mr
|
254
|
+
".u#{update_text}"
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
def render_translation(format)
|
259
|
+
return "" if translation.nil?
|
260
|
+
|
261
|
+
case format
|
262
|
+
when :long, :abbrev
|
263
|
+
" (#{translation.upcase})"
|
264
|
+
when :mr
|
265
|
+
".#{translation}"
|
266
|
+
when :short
|
267
|
+
"(#{translation})"
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
271
|
+
def render_addendum(input, format)
|
272
|
+
return input unless addendum
|
273
|
+
|
274
|
+
case format
|
275
|
+
when :long
|
276
|
+
"Addendum to #{input}"
|
277
|
+
when :abbrev
|
278
|
+
"Add. to #{input}"
|
279
|
+
when :short
|
280
|
+
"#{input} Add."
|
281
|
+
when :mr
|
282
|
+
"#{input}.add-1"
|
283
|
+
end
|
284
|
+
end
|
285
|
+
end
|
286
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module Pubid::Nist
|
2
|
+
class DocumentParser < Parslet::Parser
|
3
|
+
attr_accessor :parsed
|
4
|
+
|
5
|
+
rule(:series) do
|
6
|
+
((SERIES["long"].keys
|
7
|
+
.sort_by(&:length).reverse
|
8
|
+
.flatten
|
9
|
+
.reduce do |acc, s|
|
10
|
+
(acc.is_a?(String) ? str(acc) : acc) | str(s)
|
11
|
+
end).as(:series) |
|
12
|
+
(SERIES["mr"].values.reduce do |acc, s|
|
13
|
+
(acc.is_a?(String) ? str(acc) : acc) | str(s)
|
14
|
+
end).as(:series_mr)) >> any.repeat.as(:remaining)
|
15
|
+
end
|
16
|
+
|
17
|
+
root(:series)
|
18
|
+
|
19
|
+
def parse(code)
|
20
|
+
parsed = super(code)
|
21
|
+
series = if parsed[:series]
|
22
|
+
parsed[:series].to_s
|
23
|
+
else
|
24
|
+
SERIES["mr"].key(parsed[:series_mr].to_s)
|
25
|
+
end
|
26
|
+
parser = find_parser(series)
|
27
|
+
parser.new.parse(parsed[:remaining].to_s).merge({ series: series })
|
28
|
+
end
|
29
|
+
|
30
|
+
def find_parser(series)
|
31
|
+
PARSERS_CLASSES[series] || Pubid::Nist::Parsers::Default
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Pubid::Nist
|
2
|
+
class DocumentTransform
|
3
|
+
def apply(tree, context = nil)
|
4
|
+
series = tree[:series].to_s.gsub(".", " ")
|
5
|
+
document_parameters = tree.reject do |k, _|
|
6
|
+
%i[report_number first_report_number second_report_number series parts].include?(k)
|
7
|
+
end
|
8
|
+
tree[:parts]&.each { |part| document_parameters.merge!(part) }
|
9
|
+
report_number = tree.values_at(:first_report_number,
|
10
|
+
:second_report_number).compact.join("-").upcase
|
11
|
+
|
12
|
+
# using :report_number when need to keep original words case
|
13
|
+
report_number = tree[:report_number] if report_number.empty?
|
14
|
+
|
15
|
+
Document.new(publisher: Publisher.parse(series),
|
16
|
+
serie: Serie.new(serie: series),
|
17
|
+
docnumber: report_number,
|
18
|
+
**document_parameters)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Pubid::Nist
|
2
|
+
class Edition
|
3
|
+
attr_accessor :year, :month, :day, :parsed, :sequence
|
4
|
+
|
5
|
+
def initialize(parsed: nil, year: nil, month: nil, day: nil, sequence: nil)
|
6
|
+
@parsed = parsed
|
7
|
+
@year = year
|
8
|
+
@month = month
|
9
|
+
@day = day
|
10
|
+
@sequence = sequence
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_s
|
14
|
+
result = (@sequence && [@sequence]) || []
|
15
|
+
if @day
|
16
|
+
result << Date.new(@year, @month, @day).strftime("%Y%m%d")
|
17
|
+
elsif @month
|
18
|
+
result << Date.new(@year, @month).strftime("%Y%m")
|
19
|
+
elsif @year
|
20
|
+
result << Date.new(@year).strftime("%Y")
|
21
|
+
end
|
22
|
+
|
23
|
+
result.join("-")
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,110 @@
|
|
1
|
+
require "relaton_nist/data_fetcher"
|
2
|
+
require "nokogiri"
|
3
|
+
require "open-uri"
|
4
|
+
require "lightly"
|
5
|
+
|
6
|
+
Lightly.life = "24h"
|
7
|
+
|
8
|
+
module Pubid::Nist
|
9
|
+
class NistTechPubs
|
10
|
+
URL = "https://raw.githubusercontent.com/usnistgov/NIST-Tech-Pubs/nist-pages/xml/allrecords.xml".freeze
|
11
|
+
|
12
|
+
@converted_id = @converted_doi = {}
|
13
|
+
|
14
|
+
class << self
|
15
|
+
|
16
|
+
attr_accessor :documents, :converted_id, :converted_doi
|
17
|
+
|
18
|
+
def fetch
|
19
|
+
Lightly.prune
|
20
|
+
@documents ||= Lightly.get "documents" do
|
21
|
+
Nokogiri::XML(URI.open(URL))
|
22
|
+
.xpath("/body/query/doi_record/report-paper/report-paper_metadata")
|
23
|
+
.map { |doc| parse_docid doc }
|
24
|
+
end
|
25
|
+
rescue StandardError => e
|
26
|
+
warn e.message
|
27
|
+
[]
|
28
|
+
end
|
29
|
+
|
30
|
+
def convert(doc)
|
31
|
+
id = @converted_id[doc[:id]] ||= Pubid::Nist::Document.parse(doc[:id])
|
32
|
+
return id unless doc.key?(:doi)
|
33
|
+
|
34
|
+
begin
|
35
|
+
doi = @converted_doi[doc[:doi]] ||=
|
36
|
+
Pubid::Nist::Document.parse(doc[:doi])
|
37
|
+
rescue Errors::ParseError
|
38
|
+
return id
|
39
|
+
end
|
40
|
+
# return more complete pubid
|
41
|
+
id.merge(doi)
|
42
|
+
rescue Errors::ParseError
|
43
|
+
@converted_doi[doc[:doi]] ||= Pubid::Nist::Document.parse(doc[:doi])
|
44
|
+
end
|
45
|
+
|
46
|
+
def parse_docid(doc)
|
47
|
+
id = doc.at("publisher_item/item_number", "publisher_item/identifier")
|
48
|
+
.text.sub(%r{^/}, "")
|
49
|
+
doi = doc.at("doi_data/doi").text.gsub("10.6028/", "")
|
50
|
+
title = doc.at("titles/title").text
|
51
|
+
title += " #{doc.at('titles/subtitle').text}" if doc.at("titles/subtitle")
|
52
|
+
case doi
|
53
|
+
when "10.6028/NBS.CIRC.12e2revjune" then id.sub!("13e", "12e")
|
54
|
+
when "10.6028/NBS.CIRC.36e2" then id.sub!("46e", "36e")
|
55
|
+
when "10.6028/NBS.HB.67suppJune1967" then id.sub!("1965", "1967")
|
56
|
+
when "10.6028/NBS.HB.105-1r1990" then id.sub!("105-1-1990", "105-1r1990")
|
57
|
+
when "10.6028/NIST.HB.150-10-1995" then id.sub!(/150-10$/, "150-10-1995")
|
58
|
+
end
|
59
|
+
|
60
|
+
{ id: id, doi: doi, title: title }
|
61
|
+
end
|
62
|
+
|
63
|
+
def comply_with_pubid
|
64
|
+
fetch.select do |doc|
|
65
|
+
convert(doc).to_s == doc[:id]
|
66
|
+
rescue Errors::ParseError
|
67
|
+
false
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def different_with_pubid
|
72
|
+
fetch.reject do |doc|
|
73
|
+
convert(doc).to_s == doc[:id]
|
74
|
+
rescue Errors::ParseError
|
75
|
+
true
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def parse_fail_with_pubid
|
80
|
+
fetch.select do |doc|
|
81
|
+
convert(doc).to_s && false
|
82
|
+
rescue Errors::ParseError
|
83
|
+
true
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# returning current document id, doi, title and final PubID
|
88
|
+
def status
|
89
|
+
fetch.lazy.map do |doc|
|
90
|
+
final_doc = convert(doc)
|
91
|
+
{
|
92
|
+
id: doc[:id],
|
93
|
+
doi: doc[:doi],
|
94
|
+
title: doc[:title],
|
95
|
+
finalPubId: final_doc.to_s,
|
96
|
+
mr: final_doc.to_s(:mr),
|
97
|
+
}
|
98
|
+
rescue Errors::ParseError
|
99
|
+
{
|
100
|
+
id: doc[:id],
|
101
|
+
doi: doc[:doi],
|
102
|
+
title: doc[:title],
|
103
|
+
finalPubId: "parse error",
|
104
|
+
mr: "parse_error",
|
105
|
+
}
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
module Pubid::Nist
|
2
|
+
module Parsers
|
3
|
+
class Default < Parslet::Parser
|
4
|
+
rule(:identifier) do
|
5
|
+
stage.maybe >> (str(" ") | str(".")) >> report_number >> parts.repeat.as(:parts)
|
6
|
+
end
|
7
|
+
|
8
|
+
rule(:digits) { match('\d').repeat(1) }
|
9
|
+
rule(:letters) { match('[A-Za-z]').repeat(1) }
|
10
|
+
rule(:year_digits) { match('\d').repeat(4, 4) }
|
11
|
+
rule(:month_letters) { match('[A-Za-z]').repeat(3, 3) }
|
12
|
+
rule(:number_suffix) { match("[aA-Z]") }
|
13
|
+
# rule(:small)
|
14
|
+
|
15
|
+
rule(:parts) do
|
16
|
+
(edition | revision | version | volume | part | update | addendum | translation |
|
17
|
+
supplement | errata | index | insert | section | appendix)
|
18
|
+
end
|
19
|
+
|
20
|
+
rule(:appendix) do
|
21
|
+
str("app").as(:appendix)
|
22
|
+
end
|
23
|
+
|
24
|
+
rule(:supplement) do
|
25
|
+
(str("supp") | str("sup")) >> match('\d').repeat.as(:supplement)
|
26
|
+
end
|
27
|
+
|
28
|
+
rule(:errata) do
|
29
|
+
str("-").maybe >> str("errata").as(:errata)
|
30
|
+
end
|
31
|
+
|
32
|
+
rule(:index) do
|
33
|
+
(str("index") | str("indx")).as(:index)
|
34
|
+
end
|
35
|
+
|
36
|
+
rule(:insert) do
|
37
|
+
(str("insert") | str("ins")).as(:insert)
|
38
|
+
end
|
39
|
+
|
40
|
+
rule(:stage) do
|
41
|
+
(str("(") >> (STAGES.keys.reduce do |acc, s|
|
42
|
+
(acc.is_a?(String) ? str(acc) : acc) | str(s)
|
43
|
+
end).as(:stage) >> str(")"))
|
44
|
+
end
|
45
|
+
|
46
|
+
rule(:digits_with_suffix) do
|
47
|
+
digits >> # do not match with 428P1
|
48
|
+
(number_suffix >> match('\d').absent?).maybe
|
49
|
+
end
|
50
|
+
|
51
|
+
rule(:first_report_number) do
|
52
|
+
digits_with_suffix.as(:first_report_number)
|
53
|
+
end
|
54
|
+
|
55
|
+
rule(:second_report_number) do
|
56
|
+
digits_with_suffix.as(:second_report_number)
|
57
|
+
end
|
58
|
+
|
59
|
+
rule(:report_number) do
|
60
|
+
first_report_number >> (str("-") >> second_report_number).maybe
|
61
|
+
end
|
62
|
+
|
63
|
+
rule(:part_prefixes) { str("pt") | str("p") }
|
64
|
+
|
65
|
+
rule(:part) do
|
66
|
+
part_prefixes >> digits.as(:part)
|
67
|
+
end
|
68
|
+
|
69
|
+
rule(:revision) do
|
70
|
+
str("r") >> ((digits >> match("[a-z]").maybe).maybe).as(:revision)
|
71
|
+
end
|
72
|
+
|
73
|
+
rule(:volume) do
|
74
|
+
str("v") >> digits.as(:volume)
|
75
|
+
end
|
76
|
+
|
77
|
+
rule(:version) do
|
78
|
+
str("ver") >> digits.as(:version)
|
79
|
+
end
|
80
|
+
|
81
|
+
rule(:update) do
|
82
|
+
(str("/Upd") | str("/upd")) >> digits.as(:update_number) >> str("-") >> digits.as(:update_year)
|
83
|
+
end
|
84
|
+
|
85
|
+
rule(:translation) do
|
86
|
+
(str("(") >> match('\w').repeat(3, 3).as(:translation) >> str(")")) |
|
87
|
+
(str(".") >> match('\w').repeat(3, 3).as(:translation))
|
88
|
+
end
|
89
|
+
|
90
|
+
rule(:edition_prefixes) { str("e") }
|
91
|
+
|
92
|
+
rule(:edition) do
|
93
|
+
edition_prefixes >> digits.as(:edition)
|
94
|
+
end
|
95
|
+
|
96
|
+
rule(:addendum) do
|
97
|
+
(str("-add") | str(".add-1")).as(:addendum)
|
98
|
+
end
|
99
|
+
|
100
|
+
rule(:section) do
|
101
|
+
str("sec") >> digits.as(:section)
|
102
|
+
end
|
103
|
+
|
104
|
+
root(:identifier)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|