pubid-nist 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +25 -0
- data/README.adoc +727 -0
- data/exe/pubid-nist +97 -0
- data/lib/pubid/nist/document.rb +286 -0
- data/lib/pubid/nist/document_parser.rb +34 -0
- data/lib/pubid/nist/document_transform.rb +21 -0
- data/lib/pubid/nist/edition.rb +26 -0
- data/lib/pubid/nist/errors.rb +5 -0
- data/lib/pubid/nist/nist_tech_pubs.rb +110 -0
- data/lib/pubid/nist/parsers/default.rb +107 -0
- data/lib/pubid/nist/parsers/fips_pub.rb +8 -0
- data/lib/pubid/nist/parsers/nbs_bh.rb +6 -0
- data/lib/pubid/nist/parsers/nbs_circ.rb +21 -0
- data/lib/pubid/nist/parsers/nbs_crpl.rb +17 -0
- data/lib/pubid/nist/parsers/nbs_csm.rb +11 -0
- data/lib/pubid/nist/parsers/nbs_fips.rb +14 -0
- data/lib/pubid/nist/parsers/nbs_hb.rb +31 -0
- data/lib/pubid/nist/parsers/nbs_ir.rb +29 -0
- data/lib/pubid/nist/parsers/nbs_lc.rb +21 -0
- data/lib/pubid/nist/parsers/nbs_mn.rb +6 -0
- data/lib/pubid/nist/parsers/nbs_mp.rb +9 -0
- data/lib/pubid/nist/parsers/nbs_rpt.rb +13 -0
- data/lib/pubid/nist/parsers/nbs_sp.rb +13 -0
- data/lib/pubid/nist/parsers/nbs_tn.rb +9 -0
- data/lib/pubid/nist/parsers/nist_gcr.rb +14 -0
- data/lib/pubid/nist/parsers/nist_hb.rb +8 -0
- data/lib/pubid/nist/parsers/nist_ir.rb +29 -0
- data/lib/pubid/nist/parsers/nist_ncstar.rb +7 -0
- data/lib/pubid/nist/parsers/nist_owmwp.rb +11 -0
- data/lib/pubid/nist/parsers/nist_sp.rb +53 -0
- data/lib/pubid/nist/parsers/nist_tn.rb +13 -0
- data/lib/pubid/nist/publisher.rb +43 -0
- data/lib/pubid/nist/serie.rb +27 -0
- data/lib/pubid/nist/stage.rb +28 -0
- data/lib/pubid/nist/version.rb +5 -0
- data/lib/pubid/nist.rb +33 -0
- data/lib/pubid-nist.rb +3 -0
- data/publishers.yaml +6 -0
- data/series.yaml +139 -0
- data/stages.yaml +5 -0
- data/update_codes.yaml +58 -0
- metadata +213 -0
data/exe/pubid-nist
ADDED
@@ -0,0 +1,97 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require_relative "../lib/pubid-nist"
|
5
|
+
require "thor"
|
6
|
+
require "csv"
|
7
|
+
require "lightly"
|
8
|
+
|
9
|
+
def render_report_doc(doc)
|
10
|
+
[doc[:finalPubId] != doc[:id],
|
11
|
+
doc[:finalPubId],
|
12
|
+
doc[:id],
|
13
|
+
doc[:mr] != doc[:doi],
|
14
|
+
doc[:mr],
|
15
|
+
doc[:doi],
|
16
|
+
doc[:title]]
|
17
|
+
end
|
18
|
+
|
19
|
+
class Pubid::Nist::CLI < Thor
|
20
|
+
desc "report", "Create report for NIST Tech Pubs database (fetches from GitHub)"
|
21
|
+
option :csv, aliases: "-c", type: :boolean, desc: "Export to CSV format"
|
22
|
+
option :updated, aliases: "-u", type: :boolean,
|
23
|
+
desc: "Return only updated identifiers",
|
24
|
+
default: false
|
25
|
+
option :pull, aliases: "-p", type: :boolean,
|
26
|
+
desc: "Update cache from NIST Tech Pubs database",
|
27
|
+
default: false
|
28
|
+
def report
|
29
|
+
heading = %w(
|
30
|
+
ID\ changed?
|
31
|
+
New\ PubID
|
32
|
+
Document\ ID
|
33
|
+
DOI\ changed?
|
34
|
+
New\ PubID-MR
|
35
|
+
DOI
|
36
|
+
Title
|
37
|
+
)
|
38
|
+
|
39
|
+
Lightly.clear "documents" if options[:pull]
|
40
|
+
# Pubid::Nist::NistTechPubs.fetch
|
41
|
+
if Lightly.cached? "documents"
|
42
|
+
warn "Using nist-tech-pubs.xml file from local cache"
|
43
|
+
else
|
44
|
+
warn "Cached nist-tech-pubs.xml not present, downloading from GitHub..."
|
45
|
+
end
|
46
|
+
|
47
|
+
puts options[:csv] && heading.to_csv || heading.join(" | ")
|
48
|
+
|
49
|
+
documents = Pubid::Nist::NistTechPubs.status
|
50
|
+
|
51
|
+
documents = documents.reject { |doc| doc[:finalPubId] == doc[:id] } if options[:updated]
|
52
|
+
|
53
|
+
documents.each do |doc|
|
54
|
+
if options[:csv]
|
55
|
+
puts render_report_doc(doc).to_csv
|
56
|
+
else
|
57
|
+
puts (render_report_doc(doc).map do |v|
|
58
|
+
case v
|
59
|
+
when false
|
60
|
+
" -"
|
61
|
+
when true
|
62
|
+
"✅"
|
63
|
+
else
|
64
|
+
v
|
65
|
+
end
|
66
|
+
end).join(" | ")
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
desc "convert", "Convert legacy NIST Tech Pubs ID to NIST PubID"
|
72
|
+
option :style, aliases: "-s", type: :string,
|
73
|
+
desc: "Convert to PubID style (short|long|mr|abbrev)",
|
74
|
+
default: "short"
|
75
|
+
option :format, aliases: "-f", type: :string,
|
76
|
+
desc: "Render in format (json|string)",
|
77
|
+
default: "string"
|
78
|
+
def convert(code)
|
79
|
+
unless %w[mr long short abbrev].include?(options[:style].downcase)
|
80
|
+
raise "Invalid PubID style"
|
81
|
+
end
|
82
|
+
|
83
|
+
raise "Invalid render format" unless %w[string json].include? options[:format].downcase
|
84
|
+
|
85
|
+
unless code.empty?
|
86
|
+
if options[:format] == "string"
|
87
|
+
puts Pubid::Nist::Document.parse(code).to_s(options[:style].to_sym)
|
88
|
+
else
|
89
|
+
puts Pubid::Nist::Document.parse(code).to_json
|
90
|
+
end
|
91
|
+
end
|
92
|
+
rescue Pubid::Nist::Errors::ParseError
|
93
|
+
puts "[Error] This does not seem to be a valid NIST Tech Pubs legacy identifier"
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
Pubid::Nist::CLI.start(ARGV)
|
@@ -0,0 +1,286 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "json"
|
4
|
+
|
5
|
+
UPDATE_CODES = YAML.load_file(File.join(File.dirname(__FILE__), "../../../update_codes.yaml"))
|
6
|
+
|
7
|
+
REVISION_DESC = {
|
8
|
+
long: ", Revision ",
|
9
|
+
abbrev: ", Rev. ",
|
10
|
+
short: "r",
|
11
|
+
mr: "r",
|
12
|
+
}.freeze
|
13
|
+
|
14
|
+
VERSION_DESC = {
|
15
|
+
long: ", Version ",
|
16
|
+
abbrev: ", Ver. ",
|
17
|
+
short: "ver",
|
18
|
+
mr: "ver",
|
19
|
+
}.freeze
|
20
|
+
|
21
|
+
VOLUME_DESC = {
|
22
|
+
long: ", Volume ",
|
23
|
+
abbrev: ", Vol. ",
|
24
|
+
short: "v",
|
25
|
+
mr: "v",
|
26
|
+
}.freeze
|
27
|
+
|
28
|
+
PART_DESC = {
|
29
|
+
long: " Part ",
|
30
|
+
abbrev: " Pt. ",
|
31
|
+
short: "pt",
|
32
|
+
mr: "pt",
|
33
|
+
}.freeze
|
34
|
+
|
35
|
+
EDITION_DESC = {
|
36
|
+
long: " Edition ",
|
37
|
+
abbrev: " Ed. ",
|
38
|
+
short: "e",
|
39
|
+
mr: "e",
|
40
|
+
}.freeze
|
41
|
+
|
42
|
+
SUPPLEMENT_DESC = {
|
43
|
+
long: " Supplement ",
|
44
|
+
abbrev: " Suppl. ",
|
45
|
+
short: "sup",
|
46
|
+
mr: "sup",
|
47
|
+
}.freeze
|
48
|
+
|
49
|
+
SECTION_DESC = {
|
50
|
+
long: " Section ",
|
51
|
+
abbrev: " Sec. ",
|
52
|
+
short: "sec",
|
53
|
+
mr: "sec",
|
54
|
+
}.freeze
|
55
|
+
|
56
|
+
APPENDIX_DESC = {
|
57
|
+
long: " Appendix ",
|
58
|
+
abbrev: " App. ",
|
59
|
+
short: "app",
|
60
|
+
mr: "app",
|
61
|
+
}.freeze
|
62
|
+
|
63
|
+
ERRATA_DESC = {
|
64
|
+
long: " Errata ",
|
65
|
+
abbrev: " Err. ",
|
66
|
+
short: "err",
|
67
|
+
mr: "err",
|
68
|
+
}.freeze
|
69
|
+
|
70
|
+
INDEX_DESC = {
|
71
|
+
long: " Index ",
|
72
|
+
abbrev: " Index. ",
|
73
|
+
short: "indx",
|
74
|
+
mr: "indx",
|
75
|
+
}.freeze
|
76
|
+
|
77
|
+
INSERT_DESC = {
|
78
|
+
long: " Insert ",
|
79
|
+
abbrev: " Ins. ",
|
80
|
+
short: "ins",
|
81
|
+
mr: "ins",
|
82
|
+
}.freeze
|
83
|
+
|
84
|
+
module Pubid::Nist
|
85
|
+
class Document
|
86
|
+
attr_accessor :serie, :code, :revision, :publisher, :version, :volume,
|
87
|
+
:part, :addendum, :stage, :translation, :update_number,
|
88
|
+
:edition, :supplement, :update_year, :update_month,
|
89
|
+
:section, :appendix, :errata, :index, :insert
|
90
|
+
|
91
|
+
def initialize(publisher:, serie:, docnumber:, stage: nil, supplement: nil,
|
92
|
+
edition_month: nil, edition_year: nil, edition_day: nil, **opts)
|
93
|
+
@publisher = publisher
|
94
|
+
@serie = serie
|
95
|
+
@code = docnumber
|
96
|
+
@stage = Stage.new(stage.to_s) if stage
|
97
|
+
@supplement = (supplement.is_a?(Array) && "") || supplement
|
98
|
+
@edition = parse_edition(edition_month, edition_year, edition_day) if edition_month || edition_year
|
99
|
+
opts.each { |key, value| send("#{key}=", value.to_s) }
|
100
|
+
end
|
101
|
+
|
102
|
+
def parse_edition(edition_month, edition_year, edition_day)
|
103
|
+
if edition_month
|
104
|
+
date = Date.parse("#{edition_day || '01'}/#{edition_month}/#{edition_year}")
|
105
|
+
if edition_day
|
106
|
+
Edition.new(month: date.month, year: date.year, day: date.day)
|
107
|
+
else
|
108
|
+
Edition.new(month: date.month, year: date.year)
|
109
|
+
end
|
110
|
+
else
|
111
|
+
Edition.new(year: edition_year.to_i)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
# returns weight based on amount of defined attributes
|
116
|
+
def weight
|
117
|
+
instance_variables.inject(0) do |sum, var|
|
118
|
+
sum + (instance_variable_get(var).nil? ? 0 : 1)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
def ==(other)
|
123
|
+
other.instance_variables.each do |var|
|
124
|
+
return false if instance_variable_get(var) != other.instance_variable_get(var)
|
125
|
+
end
|
126
|
+
true
|
127
|
+
end
|
128
|
+
|
129
|
+
def merge(document)
|
130
|
+
document.instance_variables.each do |var|
|
131
|
+
val = document.instance_variable_get(var)
|
132
|
+
current_val = instance_variable_get(var)
|
133
|
+
if [:@serie, :@publisher].include?(var) ||
|
134
|
+
(val && current_val.nil?) ||
|
135
|
+
(val && current_val.to_s.length < val.to_s.length)
|
136
|
+
instance_variable_set(var, val)
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
self
|
141
|
+
end
|
142
|
+
|
143
|
+
def self.update_old_code(code)
|
144
|
+
UPDATE_CODES.each do |from, to|
|
145
|
+
code = code.gsub(from, to)
|
146
|
+
end
|
147
|
+
code
|
148
|
+
end
|
149
|
+
|
150
|
+
def self.parse(code)
|
151
|
+
code = update_old_code(code)
|
152
|
+
DocumentTransform.new.apply(DocumentParser.new.parse(code))
|
153
|
+
rescue Parslet::ParseFailed => failure
|
154
|
+
raise Pubid::Nist::Errors::ParseError, "#{failure.message}\ncause: #{failure.parse_failure_cause.ascii_tree}"
|
155
|
+
end
|
156
|
+
|
157
|
+
def to_s(format = :short)
|
158
|
+
result = render_serie(format)
|
159
|
+
result += " " unless format == :short || stage.nil?
|
160
|
+
result += "#{stage&.to_s(format)}"\
|
161
|
+
" #{code}#{render_part(format)}#{render_edition(format)}"\
|
162
|
+
"#{render_localities(format)}"\
|
163
|
+
"#{render_update(format)}#{render_translation(format)}"
|
164
|
+
result = render_addendum(result, format)
|
165
|
+
|
166
|
+
return result.gsub(" ", ".") if format == :mr
|
167
|
+
|
168
|
+
result
|
169
|
+
end
|
170
|
+
|
171
|
+
def to_json(*args)
|
172
|
+
result = {
|
173
|
+
styles: {
|
174
|
+
short: to_s(:short),
|
175
|
+
abbrev: to_s(:abbrev),
|
176
|
+
long: to_s(:long),
|
177
|
+
mr: to_s(:mr),
|
178
|
+
}
|
179
|
+
}
|
180
|
+
|
181
|
+
instance_variables.each do |var|
|
182
|
+
val = instance_variable_get(var)
|
183
|
+
result[var.to_s.gsub('@', '')] = val unless val.nil?
|
184
|
+
end
|
185
|
+
result.to_json(*args)
|
186
|
+
end
|
187
|
+
|
188
|
+
def render_serie(format)
|
189
|
+
if serie.to_s(format).include?(publisher.to_s(format))
|
190
|
+
return serie.to_s(format)
|
191
|
+
end
|
192
|
+
|
193
|
+
"#{publisher.to_s(format)} #{serie.to_s(format)}"
|
194
|
+
end
|
195
|
+
|
196
|
+
def render_part(format)
|
197
|
+
result = ""
|
198
|
+
result += "#{VOLUME_DESC[format]}#{volume}" unless volume.nil?
|
199
|
+
result += "#{PART_DESC[format]}#{part}" unless part.nil?
|
200
|
+
result
|
201
|
+
end
|
202
|
+
|
203
|
+
def render_edition(format)
|
204
|
+
result = ""
|
205
|
+
|
206
|
+
result += "#{EDITION_DESC[format]}#{edition.to_s}" unless edition.nil?
|
207
|
+
result += "#{REVISION_DESC[format]}#{revision == '' ? '1' : revision}" if revision
|
208
|
+
result += "#{VERSION_DESC[format]}#{version}" unless version.nil?
|
209
|
+
result
|
210
|
+
end
|
211
|
+
|
212
|
+
def render_localities(format)
|
213
|
+
result = ""
|
214
|
+
result += "#{SUPPLEMENT_DESC[format]}#{supplement}" unless supplement.nil?
|
215
|
+
result += "#{SECTION_DESC[format]}#{section}" unless section.nil?
|
216
|
+
result += "#{APPENDIX_DESC[format]}" unless appendix.nil?
|
217
|
+
result += "#{ERRATA_DESC[format]}" unless errata.nil?
|
218
|
+
result += INDEX_DESC[format] unless index.nil?
|
219
|
+
result += INSERT_DESC[format] unless insert.nil?
|
220
|
+
|
221
|
+
result
|
222
|
+
end
|
223
|
+
|
224
|
+
def render_update(format)
|
225
|
+
return "" if update_year.nil?
|
226
|
+
|
227
|
+
if update_month && update_number.nil?
|
228
|
+
@update_number = "1"
|
229
|
+
end
|
230
|
+
|
231
|
+
if update_year&.length == 2
|
232
|
+
@update_year = "19#{update_year}"
|
233
|
+
end
|
234
|
+
|
235
|
+
if update_number.match?(/\d+/)
|
236
|
+
update_text = update_number
|
237
|
+
update_text += "-#{update_year}" if update_year && !update_year.empty?
|
238
|
+
if update_month
|
239
|
+
date = Date.parse("01/#{update_month}/#{update_year}")
|
240
|
+
update_text += sprintf("%02d", date.month)
|
241
|
+
end
|
242
|
+
else
|
243
|
+
update_text = "1"
|
244
|
+
end
|
245
|
+
|
246
|
+
case format
|
247
|
+
when :long
|
248
|
+
" Update #{update_text}"
|
249
|
+
when :abbrev
|
250
|
+
" Upd. #{update_text}"
|
251
|
+
when :short
|
252
|
+
"/Upd#{update_text}"
|
253
|
+
when :mr
|
254
|
+
".u#{update_text}"
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
def render_translation(format)
|
259
|
+
return "" if translation.nil?
|
260
|
+
|
261
|
+
case format
|
262
|
+
when :long, :abbrev
|
263
|
+
" (#{translation.upcase})"
|
264
|
+
when :mr
|
265
|
+
".#{translation}"
|
266
|
+
when :short
|
267
|
+
"(#{translation})"
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
271
|
+
def render_addendum(input, format)
|
272
|
+
return input unless addendum
|
273
|
+
|
274
|
+
case format
|
275
|
+
when :long
|
276
|
+
"Addendum to #{input}"
|
277
|
+
when :abbrev
|
278
|
+
"Add. to #{input}"
|
279
|
+
when :short
|
280
|
+
"#{input} Add."
|
281
|
+
when :mr
|
282
|
+
"#{input}.add-1"
|
283
|
+
end
|
284
|
+
end
|
285
|
+
end
|
286
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module Pubid::Nist
|
2
|
+
class DocumentParser < Parslet::Parser
|
3
|
+
attr_accessor :parsed
|
4
|
+
|
5
|
+
rule(:series) do
|
6
|
+
((SERIES["long"].keys
|
7
|
+
.sort_by(&:length).reverse
|
8
|
+
.flatten
|
9
|
+
.reduce do |acc, s|
|
10
|
+
(acc.is_a?(String) ? str(acc) : acc) | str(s)
|
11
|
+
end).as(:series) |
|
12
|
+
(SERIES["mr"].values.reduce do |acc, s|
|
13
|
+
(acc.is_a?(String) ? str(acc) : acc) | str(s)
|
14
|
+
end).as(:series_mr)) >> any.repeat.as(:remaining)
|
15
|
+
end
|
16
|
+
|
17
|
+
root(:series)
|
18
|
+
|
19
|
+
def parse(code)
|
20
|
+
parsed = super(code)
|
21
|
+
series = if parsed[:series]
|
22
|
+
parsed[:series].to_s
|
23
|
+
else
|
24
|
+
SERIES["mr"].key(parsed[:series_mr].to_s)
|
25
|
+
end
|
26
|
+
parser = find_parser(series)
|
27
|
+
parser.new.parse(parsed[:remaining].to_s).merge({ series: series })
|
28
|
+
end
|
29
|
+
|
30
|
+
def find_parser(series)
|
31
|
+
PARSERS_CLASSES[series] || Pubid::Nist::Parsers::Default
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Pubid::Nist
|
2
|
+
class DocumentTransform
|
3
|
+
def apply(tree, context = nil)
|
4
|
+
series = tree[:series].to_s.gsub(".", " ")
|
5
|
+
document_parameters = tree.reject do |k, _|
|
6
|
+
%i[report_number first_report_number second_report_number series parts].include?(k)
|
7
|
+
end
|
8
|
+
tree[:parts]&.each { |part| document_parameters.merge!(part) }
|
9
|
+
report_number = tree.values_at(:first_report_number,
|
10
|
+
:second_report_number).compact.join("-").upcase
|
11
|
+
|
12
|
+
# using :report_number when need to keep original words case
|
13
|
+
report_number = tree[:report_number] if report_number.empty?
|
14
|
+
|
15
|
+
Document.new(publisher: Publisher.parse(series),
|
16
|
+
serie: Serie.new(serie: series),
|
17
|
+
docnumber: report_number,
|
18
|
+
**document_parameters)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Pubid::Nist
|
2
|
+
class Edition
|
3
|
+
attr_accessor :year, :month, :day, :parsed, :sequence
|
4
|
+
|
5
|
+
def initialize(parsed: nil, year: nil, month: nil, day: nil, sequence: nil)
|
6
|
+
@parsed = parsed
|
7
|
+
@year = year
|
8
|
+
@month = month
|
9
|
+
@day = day
|
10
|
+
@sequence = sequence
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_s
|
14
|
+
result = (@sequence && [@sequence]) || []
|
15
|
+
if @day
|
16
|
+
result << Date.new(@year, @month, @day).strftime("%Y%m%d")
|
17
|
+
elsif @month
|
18
|
+
result << Date.new(@year, @month).strftime("%Y%m")
|
19
|
+
elsif @year
|
20
|
+
result << Date.new(@year).strftime("%Y")
|
21
|
+
end
|
22
|
+
|
23
|
+
result.join("-")
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,110 @@
|
|
1
|
+
require "relaton_nist/data_fetcher"
|
2
|
+
require "nokogiri"
|
3
|
+
require "open-uri"
|
4
|
+
require "lightly"
|
5
|
+
|
6
|
+
Lightly.life = "24h"
|
7
|
+
|
8
|
+
module Pubid::Nist
|
9
|
+
class NistTechPubs
|
10
|
+
URL = "https://raw.githubusercontent.com/usnistgov/NIST-Tech-Pubs/nist-pages/xml/allrecords.xml".freeze
|
11
|
+
|
12
|
+
@converted_id = @converted_doi = {}
|
13
|
+
|
14
|
+
class << self
|
15
|
+
|
16
|
+
attr_accessor :documents, :converted_id, :converted_doi
|
17
|
+
|
18
|
+
def fetch
|
19
|
+
Lightly.prune
|
20
|
+
@documents ||= Lightly.get "documents" do
|
21
|
+
Nokogiri::XML(URI.open(URL))
|
22
|
+
.xpath("/body/query/doi_record/report-paper/report-paper_metadata")
|
23
|
+
.map { |doc| parse_docid doc }
|
24
|
+
end
|
25
|
+
rescue StandardError => e
|
26
|
+
warn e.message
|
27
|
+
[]
|
28
|
+
end
|
29
|
+
|
30
|
+
def convert(doc)
|
31
|
+
id = @converted_id[doc[:id]] ||= Pubid::Nist::Document.parse(doc[:id])
|
32
|
+
return id unless doc.key?(:doi)
|
33
|
+
|
34
|
+
begin
|
35
|
+
doi = @converted_doi[doc[:doi]] ||=
|
36
|
+
Pubid::Nist::Document.parse(doc[:doi])
|
37
|
+
rescue Errors::ParseError
|
38
|
+
return id
|
39
|
+
end
|
40
|
+
# return more complete pubid
|
41
|
+
id.merge(doi)
|
42
|
+
rescue Errors::ParseError
|
43
|
+
@converted_doi[doc[:doi]] ||= Pubid::Nist::Document.parse(doc[:doi])
|
44
|
+
end
|
45
|
+
|
46
|
+
def parse_docid(doc)
|
47
|
+
id = doc.at("publisher_item/item_number", "publisher_item/identifier")
|
48
|
+
.text.sub(%r{^/}, "")
|
49
|
+
doi = doc.at("doi_data/doi").text.gsub("10.6028/", "")
|
50
|
+
title = doc.at("titles/title").text
|
51
|
+
title += " #{doc.at('titles/subtitle').text}" if doc.at("titles/subtitle")
|
52
|
+
case doi
|
53
|
+
when "10.6028/NBS.CIRC.12e2revjune" then id.sub!("13e", "12e")
|
54
|
+
when "10.6028/NBS.CIRC.36e2" then id.sub!("46e", "36e")
|
55
|
+
when "10.6028/NBS.HB.67suppJune1967" then id.sub!("1965", "1967")
|
56
|
+
when "10.6028/NBS.HB.105-1r1990" then id.sub!("105-1-1990", "105-1r1990")
|
57
|
+
when "10.6028/NIST.HB.150-10-1995" then id.sub!(/150-10$/, "150-10-1995")
|
58
|
+
end
|
59
|
+
|
60
|
+
{ id: id, doi: doi, title: title }
|
61
|
+
end
|
62
|
+
|
63
|
+
def comply_with_pubid
|
64
|
+
fetch.select do |doc|
|
65
|
+
convert(doc).to_s == doc[:id]
|
66
|
+
rescue Errors::ParseError
|
67
|
+
false
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def different_with_pubid
|
72
|
+
fetch.reject do |doc|
|
73
|
+
convert(doc).to_s == doc[:id]
|
74
|
+
rescue Errors::ParseError
|
75
|
+
true
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def parse_fail_with_pubid
|
80
|
+
fetch.select do |doc|
|
81
|
+
convert(doc).to_s && false
|
82
|
+
rescue Errors::ParseError
|
83
|
+
true
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# returning current document id, doi, title and final PubID
|
88
|
+
def status
|
89
|
+
fetch.lazy.map do |doc|
|
90
|
+
final_doc = convert(doc)
|
91
|
+
{
|
92
|
+
id: doc[:id],
|
93
|
+
doi: doc[:doi],
|
94
|
+
title: doc[:title],
|
95
|
+
finalPubId: final_doc.to_s,
|
96
|
+
mr: final_doc.to_s(:mr),
|
97
|
+
}
|
98
|
+
rescue Errors::ParseError
|
99
|
+
{
|
100
|
+
id: doc[:id],
|
101
|
+
doi: doc[:doi],
|
102
|
+
title: doc[:title],
|
103
|
+
finalPubId: "parse error",
|
104
|
+
mr: "parse_error",
|
105
|
+
}
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
module Pubid::Nist
|
2
|
+
module Parsers
|
3
|
+
class Default < Parslet::Parser
|
4
|
+
rule(:identifier) do
|
5
|
+
stage.maybe >> (str(" ") | str(".")) >> report_number >> parts.repeat.as(:parts)
|
6
|
+
end
|
7
|
+
|
8
|
+
rule(:digits) { match('\d').repeat(1) }
|
9
|
+
rule(:letters) { match('[A-Za-z]').repeat(1) }
|
10
|
+
rule(:year_digits) { match('\d').repeat(4, 4) }
|
11
|
+
rule(:month_letters) { match('[A-Za-z]').repeat(3, 3) }
|
12
|
+
rule(:number_suffix) { match("[aA-Z]") }
|
13
|
+
# rule(:small)
|
14
|
+
|
15
|
+
rule(:parts) do
|
16
|
+
(edition | revision | version | volume | part | update | addendum | translation |
|
17
|
+
supplement | errata | index | insert | section | appendix)
|
18
|
+
end
|
19
|
+
|
20
|
+
rule(:appendix) do
|
21
|
+
str("app").as(:appendix)
|
22
|
+
end
|
23
|
+
|
24
|
+
rule(:supplement) do
|
25
|
+
(str("supp") | str("sup")) >> match('\d').repeat.as(:supplement)
|
26
|
+
end
|
27
|
+
|
28
|
+
rule(:errata) do
|
29
|
+
str("-").maybe >> str("errata").as(:errata)
|
30
|
+
end
|
31
|
+
|
32
|
+
rule(:index) do
|
33
|
+
(str("index") | str("indx")).as(:index)
|
34
|
+
end
|
35
|
+
|
36
|
+
rule(:insert) do
|
37
|
+
(str("insert") | str("ins")).as(:insert)
|
38
|
+
end
|
39
|
+
|
40
|
+
rule(:stage) do
|
41
|
+
(str("(") >> (STAGES.keys.reduce do |acc, s|
|
42
|
+
(acc.is_a?(String) ? str(acc) : acc) | str(s)
|
43
|
+
end).as(:stage) >> str(")"))
|
44
|
+
end
|
45
|
+
|
46
|
+
rule(:digits_with_suffix) do
|
47
|
+
digits >> # do not match with 428P1
|
48
|
+
(number_suffix >> match('\d').absent?).maybe
|
49
|
+
end
|
50
|
+
|
51
|
+
rule(:first_report_number) do
|
52
|
+
digits_with_suffix.as(:first_report_number)
|
53
|
+
end
|
54
|
+
|
55
|
+
rule(:second_report_number) do
|
56
|
+
digits_with_suffix.as(:second_report_number)
|
57
|
+
end
|
58
|
+
|
59
|
+
rule(:report_number) do
|
60
|
+
first_report_number >> (str("-") >> second_report_number).maybe
|
61
|
+
end
|
62
|
+
|
63
|
+
rule(:part_prefixes) { str("pt") | str("p") }
|
64
|
+
|
65
|
+
rule(:part) do
|
66
|
+
part_prefixes >> digits.as(:part)
|
67
|
+
end
|
68
|
+
|
69
|
+
rule(:revision) do
|
70
|
+
str("r") >> ((digits >> match("[a-z]").maybe).maybe).as(:revision)
|
71
|
+
end
|
72
|
+
|
73
|
+
rule(:volume) do
|
74
|
+
str("v") >> digits.as(:volume)
|
75
|
+
end
|
76
|
+
|
77
|
+
rule(:version) do
|
78
|
+
str("ver") >> digits.as(:version)
|
79
|
+
end
|
80
|
+
|
81
|
+
rule(:update) do
|
82
|
+
(str("/Upd") | str("/upd")) >> digits.as(:update_number) >> str("-") >> digits.as(:update_year)
|
83
|
+
end
|
84
|
+
|
85
|
+
rule(:translation) do
|
86
|
+
(str("(") >> match('\w').repeat(3, 3).as(:translation) >> str(")")) |
|
87
|
+
(str(".") >> match('\w').repeat(3, 3).as(:translation))
|
88
|
+
end
|
89
|
+
|
90
|
+
rule(:edition_prefixes) { str("e") }
|
91
|
+
|
92
|
+
rule(:edition) do
|
93
|
+
edition_prefixes >> digits.as(:edition)
|
94
|
+
end
|
95
|
+
|
96
|
+
rule(:addendum) do
|
97
|
+
(str("-add") | str(".add-1")).as(:addendum)
|
98
|
+
end
|
99
|
+
|
100
|
+
rule(:section) do
|
101
|
+
str("sec") >> digits.as(:section)
|
102
|
+
end
|
103
|
+
|
104
|
+
root(:identifier)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|