relaton-nist 1.19.0 → 1.19.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/relaton_nist/data_fetcher.rb +10 -6
- data/lib/relaton_nist/mods_parser.rb +209 -0
- data/lib/relaton_nist/nist_bibliography.rb +1 -1
- data/lib/relaton_nist/version.rb +1 -1
- data/relaton_nist.gemspec +2 -1
- metadata +19 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 84f25c99f7dfa62a09294c042a3774332fdb018841028aa7ef0dde5b07c9788c
|
4
|
+
data.tar.gz: 618fca8f81fe125d4c265c02d1cf4a65c8188b960f7dd91332e91b4ec22bb05d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2e4c3cfdec6fbe121b8b669e7beca74df804bbda725c358fc0be0bccc8853d12644f79937edf9eebd06fb06a0a91b92b37d61ad9267784ed6a3f476d2f3ea8ac
|
7
|
+
data.tar.gz: 517f957beaf97d42b0cf0d8fc9749ef9af2f99f2db0510a994be4e4ac56003f2d6c5ad2fa46d6f574b0868c520e79797911dbbe5d6a1516382f6625f486b6e17
|
@@ -1,10 +1,12 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require "yaml"
|
4
|
+
require "loc_mods"
|
5
|
+
require_relative "mods_parser"
|
4
6
|
|
5
7
|
module RelatonNist
|
6
8
|
class DataFetcher
|
7
|
-
URL = "https://
|
9
|
+
URL = "https://github.com/usnistgov/NIST-Tech-Pubs/releases/download/May2024/allrecords-MODS.xml"
|
8
10
|
|
9
11
|
def initialize(output, format)
|
10
12
|
@output = output
|
@@ -68,16 +70,18 @@ module RelatonNist
|
|
68
70
|
end
|
69
71
|
|
70
72
|
def fetch_tech_pubs
|
71
|
-
docs =
|
72
|
-
docs.xpath(
|
73
|
-
|
74
|
-
|
73
|
+
docs = LocMods::Collection.from_xml OpenURI.open_uri(URL)
|
74
|
+
# docs.xpath(
|
75
|
+
# "/body/query/doi_record/report-paper/report-paper_metadata",
|
76
|
+
# )
|
77
|
+
docs.mods.each { |doc| write_file ModsParser.new(doc, series).parse }
|
75
78
|
end
|
76
79
|
|
77
80
|
def add_static_files
|
78
81
|
Dir["./static/*.yaml"].each do |file|
|
79
82
|
hash = YAML.load_file file
|
80
|
-
|
83
|
+
bib = RelatonNist::NistBibliographicItem.from_hash(hash)
|
84
|
+
index.add_or_update bib.docidentifier[0].id, file
|
81
85
|
end
|
82
86
|
end
|
83
87
|
|
@@ -0,0 +1,209 @@
|
|
1
|
+
module RelatonNist
|
2
|
+
class ModsParser
|
3
|
+
RELATION_TYPES = {
|
4
|
+
"otherVersion" => "editionOf",
|
5
|
+
"preceding" => "updates",
|
6
|
+
"succeeding" => "updatedBy",
|
7
|
+
}.freeze
|
8
|
+
|
9
|
+
ATTRS = %i[type docid title link abstract date doctype contributor relation place series].freeze
|
10
|
+
|
11
|
+
def initialize(doc, series)
|
12
|
+
@doc = doc
|
13
|
+
@series = series
|
14
|
+
end
|
15
|
+
|
16
|
+
# @return [RelatonNist::NistBibliographicItem]
|
17
|
+
def parse
|
18
|
+
args = ATTRS.each_with_object({}) do |attr, hash|
|
19
|
+
hash[attr] = send("parse_#{attr}")
|
20
|
+
end
|
21
|
+
NistBibliographicItem.new(**args)
|
22
|
+
end
|
23
|
+
|
24
|
+
def parse_type
|
25
|
+
"standard"
|
26
|
+
end
|
27
|
+
|
28
|
+
# @return [Array<RelatonBib::DocumentIdentifier>]
|
29
|
+
def parse_docid
|
30
|
+
[
|
31
|
+
{ type: "NIST", id: pub_id, primary: true },
|
32
|
+
{ type: "DOI", id: doi },
|
33
|
+
].map { |id| RelatonBib::DocumentIdentifier.new(**id) }
|
34
|
+
end
|
35
|
+
|
36
|
+
# @return [String]
|
37
|
+
def pub_id
|
38
|
+
get_id_from_str doi
|
39
|
+
end
|
40
|
+
|
41
|
+
def get_id_from_str(str)
|
42
|
+
str.match(/\/((?:NBS|NIST).+)/)[1].gsub(".", " ")
|
43
|
+
end
|
44
|
+
|
45
|
+
# @return [String]
|
46
|
+
def doi
|
47
|
+
url = @doc.location.reduce(nil) { |m, l| m || l.url.detect { |u| u.usage == "primary display" } }
|
48
|
+
id = url.content.match(/10\.6028\/.+/)[0]
|
49
|
+
case id
|
50
|
+
when "10.6028/NBS.CIRC.sup" then "10.6028/NBS.CIRC.24e7sup"
|
51
|
+
when "10.6028/NBS.CIRC.supJun1925-Jun1926" then "10.6028/NBS.CIRC.24e7sup2"
|
52
|
+
when "10.6028/NBS.CIRC.supJun1925-Jun1927" then "10.6028/NBS.CIRC.24e7sup3"
|
53
|
+
when "10.6028/NBS.CIRC.24supJuly1922" then "10.6028/NBS.CIRC.24e6sup"
|
54
|
+
when "10.6028/NBS.CIRC.24supJan1924" then "10.6028/NBS.CIRC.24e6sup2"
|
55
|
+
else id
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# @return [Array<RelatonBib::TypedTitleString>]
|
60
|
+
def parse_title
|
61
|
+
title = @doc.title_info.reduce([]) do |a, ti|
|
62
|
+
next a if ti.type == "alternative"
|
63
|
+
|
64
|
+
a += ti.title.map { |t| create_title(t, "title-main", ti.non_sort[0]) }
|
65
|
+
a + ti.sub_title.map { |t| create_title(t, "title-part") }
|
66
|
+
end
|
67
|
+
if title.size > 1
|
68
|
+
content = title.map { |t| t.title.content }.join(" - ")
|
69
|
+
title << create_title(content, "main")
|
70
|
+
elsif title.size == 1
|
71
|
+
title[0].instance_variable_set :@type, "main"
|
72
|
+
end
|
73
|
+
title
|
74
|
+
end
|
75
|
+
|
76
|
+
def create_title(title, type, non_sort = nil)
|
77
|
+
content = title.gsub("\n", " ").squeeze(" ").strip
|
78
|
+
content = "#{non_sort.content}#{content}".squeeze(" ") if non_sort
|
79
|
+
RelatonBib::TypedTitleString.new content: content, type: type, language: "en", script: "Latn"
|
80
|
+
end
|
81
|
+
|
82
|
+
def parse_link
|
83
|
+
@doc.location.map do |location|
|
84
|
+
url = location.url.first
|
85
|
+
type = url.usage == "primary display" ? "doi" : "src"
|
86
|
+
RelatonBib::TypedUri.new content: url.content, type: type
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def parse_abstract
|
91
|
+
@doc.abstract.map do |a|
|
92
|
+
content = a.content.gsub("\n", " ").squeeze(" ").strip
|
93
|
+
RelatonBib::FormattedString.new content: content, language: "en", script: "Latn"
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def parse_date
|
98
|
+
date = @doc.origin_info[0].date_issued.map do |di|
|
99
|
+
create_date(di, "issued")
|
100
|
+
# end + @doc.record_info[0].record_creation_date.map do |rcd|
|
101
|
+
# create_date(rcd, "created")
|
102
|
+
# end + @doc.record_info[0].record_change_date.map do |rcd|
|
103
|
+
# create_date(rcd, "updated")
|
104
|
+
end
|
105
|
+
date.compact
|
106
|
+
end
|
107
|
+
|
108
|
+
def create_date(date, type)
|
109
|
+
RelatonBib::BibliographicDate.new type: type, on: decode_date(date)
|
110
|
+
rescue Date::Error
|
111
|
+
end
|
112
|
+
|
113
|
+
def decode_date(date)
|
114
|
+
if date.encoding == "marc" && date.content.size == 6
|
115
|
+
Date.strptime(date.content, "%y%m%d").to_s
|
116
|
+
elsif date.encoding == "iso8601"
|
117
|
+
Date.strptime(date.content, "%Y%m%d").to_s
|
118
|
+
else date.content
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
def parse_doctype
|
123
|
+
RelatonBib::DocumentType.new(type: "standard")
|
124
|
+
end
|
125
|
+
|
126
|
+
def parse_contributor
|
127
|
+
# eaxclude primary contributors to avoid duplication
|
128
|
+
@doc.name.reject { |n| n.usage == "primary" }.map do |name|
|
129
|
+
entity, default_role = create_entity(name)
|
130
|
+
next unless entity
|
131
|
+
|
132
|
+
role = name.role.reduce([]) do |a, r|
|
133
|
+
a + r.role_term.map { |rt| { type: rt.content } }
|
134
|
+
end
|
135
|
+
role << { type: default_role } if role.empty?
|
136
|
+
RelatonBib::ContributionInfo.new entity: entity, role: role
|
137
|
+
end.compact
|
138
|
+
end
|
139
|
+
|
140
|
+
def create_entity(name)
|
141
|
+
case name.type
|
142
|
+
when "personal" then [create_person(name), "author"]
|
143
|
+
when "corporate" then [create_org(name), "publisher"]
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
def create_person(name)
|
148
|
+
# exclude typed name parts because they are not actual name parts
|
149
|
+
cname = name.name_part.reject(&:type).map(&:content).join(" ")
|
150
|
+
complatename = RelatonBib::LocalizedString.new cname, "en"
|
151
|
+
fname = RelatonBib::FullName.new completename: complatename
|
152
|
+
name_id = name.name_identifier[0]
|
153
|
+
identifier = RelatonBib::PersonIdentifier.new "uri", name_id.content if name_id
|
154
|
+
RelatonBib::Person.new name: fname, identifier: [identifier]
|
155
|
+
end
|
156
|
+
|
157
|
+
def create_org(name)
|
158
|
+
names = name.name_part.reject(&:type).map { |n| n.content.gsub("\n", " ").squeeze(" ").strip }
|
159
|
+
url = name.name_identifier[0]&.content
|
160
|
+
id = RelatonBib::OrgIdentifier.new "uri", url if url
|
161
|
+
RelatonBib::Organization.new name: names, identifier: [id]
|
162
|
+
end
|
163
|
+
|
164
|
+
def parse_relation
|
165
|
+
@doc.related_item.reject { |ri| ri.type == "series" }.map do |ri|
|
166
|
+
type = RELATION_TYPES[ri.type]
|
167
|
+
RelatonBib::DocumentRelation.new(type: type, bibitem: create_related_item(ri))
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
def create_related_item(item)
|
172
|
+
item_id = get_id_from_str related_item_id(item)
|
173
|
+
docid = RelatonBib::DocumentIdentifier.new type: "NIST", id: item_id
|
174
|
+
fref = RelatonBib::FormattedRef.new content: item_id
|
175
|
+
NistBibliographicItem.new(docid: [docid], formattedref: fref)
|
176
|
+
end
|
177
|
+
|
178
|
+
def related_item_id(item)
|
179
|
+
if item.other_type && item.other_type[0..6] == "10.6028"
|
180
|
+
item.other_type
|
181
|
+
else
|
182
|
+
item.name[0].name_part[0].content
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
def parse_place
|
187
|
+
@doc.origin_info.select { |p| p.event_type == "publisher"}.map do |p|
|
188
|
+
place = p.place[0].place_term[0].content
|
189
|
+
/(?<city>\w+), (?<state>\w+)/ =~ place
|
190
|
+
RelatonBib::Place.new city: city, region: create_region(state)
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
def create_region(state)
|
195
|
+
[RelatonBib::Place::RegionType.new(iso: state)]
|
196
|
+
rescue ArgumentError
|
197
|
+
[]
|
198
|
+
end
|
199
|
+
|
200
|
+
def parse_series
|
201
|
+
@doc.related_item.select { |ri| ri.type == "series" }.map do |ri|
|
202
|
+
tinfo = ri.title_info[0]
|
203
|
+
tcontent = tinfo.title[0].strip
|
204
|
+
title = RelatonBib::TypedTitleString.new content: tcontent
|
205
|
+
RelatonBib::Series.new title: title, number: tinfo.part_number[0]
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
@@ -123,7 +123,7 @@ module RelatonNist
|
|
123
123
|
next if iter && r.status.iteration != iteration
|
124
124
|
return { ret: r } if !year
|
125
125
|
|
126
|
-
r.date.select { |d| d.type == "published" }.each do |d|
|
126
|
+
r.date.select { |d| d.type == "published" || d.type == "issued" }.each do |d|
|
127
127
|
return { ret: r } if year.to_i == d.on(:year)
|
128
128
|
|
129
129
|
missed_years << d.on(:year)
|
data/lib/relaton_nist/version.rb
CHANGED
data/relaton_nist.gemspec
CHANGED
@@ -24,7 +24,8 @@ Gem::Specification.new do |spec|
|
|
24
24
|
spec.required_ruby_version = Gem::Requirement.new(">= 2.7.0")
|
25
25
|
|
26
26
|
spec.add_dependency "base64"
|
27
|
-
spec.add_dependency "
|
27
|
+
spec.add_dependency "loc_mods", "~> 0.2.0"
|
28
|
+
spec.add_dependency "relaton-bib", "~> 1.19.2"
|
28
29
|
spec.add_dependency "relaton-index", "~> 0.2.0"
|
29
30
|
spec.add_dependency "rubyzip"
|
30
31
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-nist
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.19.
|
4
|
+
version: 1.19.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-07-
|
11
|
+
date: 2024-07-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: base64
|
@@ -24,20 +24,34 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: loc_mods
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.2.0
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.2.0
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: relaton-bib
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
30
44
|
requirements:
|
31
45
|
- - "~>"
|
32
46
|
- !ruby/object:Gem::Version
|
33
|
-
version: 1.19.
|
47
|
+
version: 1.19.2
|
34
48
|
type: :runtime
|
35
49
|
prerelease: false
|
36
50
|
version_requirements: !ruby/object:Gem::Requirement
|
37
51
|
requirements:
|
38
52
|
- - "~>"
|
39
53
|
- !ruby/object:Gem::Version
|
40
|
-
version: 1.19.
|
54
|
+
version: 1.19.2
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: relaton-index
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -98,6 +112,7 @@ files:
|
|
98
112
|
- lib/relaton_nist/hash_converter.rb
|
99
113
|
- lib/relaton_nist/hit.rb
|
100
114
|
- lib/relaton_nist/hit_collection.rb
|
115
|
+
- lib/relaton_nist/mods_parser.rb
|
101
116
|
- lib/relaton_nist/nist_bibliographic_item.rb
|
102
117
|
- lib/relaton_nist/nist_bibliography.rb
|
103
118
|
- lib/relaton_nist/processor.rb
|