relaton-nist 1.19.0 → 1.19.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cd31209270fb62d1b7dabbddf7f379eb7b33a21832f3e290dd9ed49c94d715c4
4
- data.tar.gz: 8f15ad43f7d1edfd0a9135a6b750bf1414c3fd24216fd30c730586b9cd342f5c
3
+ metadata.gz: 84f25c99f7dfa62a09294c042a3774332fdb018841028aa7ef0dde5b07c9788c
4
+ data.tar.gz: 618fca8f81fe125d4c265c02d1cf4a65c8188b960f7dd91332e91b4ec22bb05d
5
5
  SHA512:
6
- metadata.gz: ea6daf8959d56b98ab2e02e63315c2ecf7bd483824ec2ad0832fc9239e32d57e556942d86edbaa4c88655904d2e04e9a05c50cb4bbf38486773ab3cb0986c851
7
- data.tar.gz: 2ca580e13556eb72455cc61f9a783d30894675d3f98630dd2de589a3a2a894380bc96a26453eb345e727eec37a7ecb8535b35aa011b34cab9580b858fc0e70ae
6
+ metadata.gz: 2e4c3cfdec6fbe121b8b669e7beca74df804bbda725c358fc0be0bccc8853d12644f79937edf9eebd06fb06a0a91b92b37d61ad9267784ed6a3f476d2f3ea8ac
7
+ data.tar.gz: 517f957beaf97d42b0cf0d8fc9749ef9af2f99f2db0510a994be4e4ac56003f2d6c5ad2fa46d6f574b0868c520e79797911dbbe5d6a1516382f6625f486b6e17
@@ -1,10 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "yaml"
4
+ require "loc_mods"
5
+ require_relative "mods_parser"
4
6
 
5
7
  module RelatonNist
6
8
  class DataFetcher
7
- URL = "https://raw.githubusercontent.com/usnistgov/NIST-Tech-Pubs/nist-pages/xml/allrecords.xml"
9
+ URL = "https://github.com/usnistgov/NIST-Tech-Pubs/releases/download/May2024/allrecords-MODS.xml"
8
10
 
9
11
  def initialize(output, format)
10
12
  @output = output
@@ -68,16 +70,18 @@ module RelatonNist
68
70
  end
69
71
 
70
72
  def fetch_tech_pubs
71
- docs = Nokogiri::XML OpenURI.open_uri URL
72
- docs.xpath(
73
- "/body/query/doi_record/report-paper/report-paper_metadata",
74
- ).each { |doc| write_file TechPubsParser.parse(doc, series) }
73
+ docs = LocMods::Collection.from_xml OpenURI.open_uri(URL)
74
+ # docs.xpath(
75
+ # "/body/query/doi_record/report-paper/report-paper_metadata",
76
+ # )
77
+ docs.mods.each { |doc| write_file ModsParser.new(doc, series).parse }
75
78
  end
76
79
 
77
80
  def add_static_files
78
81
  Dir["./static/*.yaml"].each do |file|
79
82
  hash = YAML.load_file file
80
- write_file RelatonNist::NistBibliographicItem.from_hash(hash)
83
+ bib = RelatonNist::NistBibliographicItem.from_hash(hash)
84
+ index.add_or_update bib.docidentifier[0].id, file
81
85
  end
82
86
  end
83
87
 
@@ -0,0 +1,209 @@
1
+ module RelatonNist
2
+ class ModsParser
3
+ RELATION_TYPES = {
4
+ "otherVersion" => "editionOf",
5
+ "preceding" => "updates",
6
+ "succeeding" => "updatedBy",
7
+ }.freeze
8
+
9
+ ATTRS = %i[type docid title link abstract date doctype contributor relation place series].freeze
10
+
11
+ def initialize(doc, series)
12
+ @doc = doc
13
+ @series = series
14
+ end
15
+
16
+ # @return [RelatonNist::NistBibliographicItem]
17
+ def parse
18
+ args = ATTRS.each_with_object({}) do |attr, hash|
19
+ hash[attr] = send("parse_#{attr}")
20
+ end
21
+ NistBibliographicItem.new(**args)
22
+ end
23
+
24
+ def parse_type
25
+ "standard"
26
+ end
27
+
28
+ # @return [Array<RelatonBib::DocumentIdentifier>]
29
+ def parse_docid
30
+ [
31
+ { type: "NIST", id: pub_id, primary: true },
32
+ { type: "DOI", id: doi },
33
+ ].map { |id| RelatonBib::DocumentIdentifier.new(**id) }
34
+ end
35
+
36
+ # @return [String]
37
+ def pub_id
38
+ get_id_from_str doi
39
+ end
40
+
41
+ def get_id_from_str(str)
42
+ str.match(/\/((?:NBS|NIST).+)/)[1].gsub(".", " ")
43
+ end
44
+
45
+ # @return [String]
46
+ def doi
47
+ url = @doc.location.reduce(nil) { |m, l| m || l.url.detect { |u| u.usage == "primary display" } }
48
+ id = url.content.match(/10\.6028\/.+/)[0]
49
+ case id
50
+ when "10.6028/NBS.CIRC.sup" then "10.6028/NBS.CIRC.24e7sup"
51
+ when "10.6028/NBS.CIRC.supJun1925-Jun1926" then "10.6028/NBS.CIRC.24e7sup2"
52
+ when "10.6028/NBS.CIRC.supJun1925-Jun1927" then "10.6028/NBS.CIRC.24e7sup3"
53
+ when "10.6028/NBS.CIRC.24supJuly1922" then "10.6028/NBS.CIRC.24e6sup"
54
+ when "10.6028/NBS.CIRC.24supJan1924" then "10.6028/NBS.CIRC.24e6sup2"
55
+ else id
56
+ end
57
+ end
58
+
59
+ # @return [Array<RelatonBib::TypedTitleString>]
60
+ def parse_title
61
+ title = @doc.title_info.reduce([]) do |a, ti|
62
+ next a if ti.type == "alternative"
63
+
64
+ a += ti.title.map { |t| create_title(t, "title-main", ti.non_sort[0]) }
65
+ a + ti.sub_title.map { |t| create_title(t, "title-part") }
66
+ end
67
+ if title.size > 1
68
+ content = title.map { |t| t.title.content }.join(" - ")
69
+ title << create_title(content, "main")
70
+ elsif title.size == 1
71
+ title[0].instance_variable_set :@type, "main"
72
+ end
73
+ title
74
+ end
75
+
76
+ def create_title(title, type, non_sort = nil)
77
+ content = title.gsub("\n", " ").squeeze(" ").strip
78
+ content = "#{non_sort.content}#{content}".squeeze(" ") if non_sort
79
+ RelatonBib::TypedTitleString.new content: content, type: type, language: "en", script: "Latn"
80
+ end
81
+
82
+ def parse_link
83
+ @doc.location.map do |location|
84
+ url = location.url.first
85
+ type = url.usage == "primary display" ? "doi" : "src"
86
+ RelatonBib::TypedUri.new content: url.content, type: type
87
+ end
88
+ end
89
+
90
+ def parse_abstract
91
+ @doc.abstract.map do |a|
92
+ content = a.content.gsub("\n", " ").squeeze(" ").strip
93
+ RelatonBib::FormattedString.new content: content, language: "en", script: "Latn"
94
+ end
95
+ end
96
+
97
+ def parse_date
98
+ date = @doc.origin_info[0].date_issued.map do |di|
99
+ create_date(di, "issued")
100
+ # end + @doc.record_info[0].record_creation_date.map do |rcd|
101
+ # create_date(rcd, "created")
102
+ # end + @doc.record_info[0].record_change_date.map do |rcd|
103
+ # create_date(rcd, "updated")
104
+ end
105
+ date.compact
106
+ end
107
+
108
+ def create_date(date, type)
109
+ RelatonBib::BibliographicDate.new type: type, on: decode_date(date)
110
+ rescue Date::Error
111
+ end
112
+
113
+ def decode_date(date)
114
+ if date.encoding == "marc" && date.content.size == 6
115
+ Date.strptime(date.content, "%y%m%d").to_s
116
+ elsif date.encoding == "iso8601"
117
+ Date.strptime(date.content, "%Y%m%d").to_s
118
+ else date.content
119
+ end
120
+ end
121
+
122
+ def parse_doctype
123
+ RelatonBib::DocumentType.new(type: "standard")
124
+ end
125
+
126
+ def parse_contributor
127
+ # eaxclude primary contributors to avoid duplication
128
+ @doc.name.reject { |n| n.usage == "primary" }.map do |name|
129
+ entity, default_role = create_entity(name)
130
+ next unless entity
131
+
132
+ role = name.role.reduce([]) do |a, r|
133
+ a + r.role_term.map { |rt| { type: rt.content } }
134
+ end
135
+ role << { type: default_role } if role.empty?
136
+ RelatonBib::ContributionInfo.new entity: entity, role: role
137
+ end.compact
138
+ end
139
+
140
+ def create_entity(name)
141
+ case name.type
142
+ when "personal" then [create_person(name), "author"]
143
+ when "corporate" then [create_org(name), "publisher"]
144
+ end
145
+ end
146
+
147
+ def create_person(name)
148
+ # exclude typed name parts because they are not actual name parts
149
+ cname = name.name_part.reject(&:type).map(&:content).join(" ")
150
+ complatename = RelatonBib::LocalizedString.new cname, "en"
151
+ fname = RelatonBib::FullName.new completename: complatename
152
+ name_id = name.name_identifier[0]
153
+ identifier = RelatonBib::PersonIdentifier.new "uri", name_id.content if name_id
154
+ RelatonBib::Person.new name: fname, identifier: [identifier]
155
+ end
156
+
157
+ def create_org(name)
158
+ names = name.name_part.reject(&:type).map { |n| n.content.gsub("\n", " ").squeeze(" ").strip }
159
+ url = name.name_identifier[0]&.content
160
+ id = RelatonBib::OrgIdentifier.new "uri", url if url
161
+ RelatonBib::Organization.new name: names, identifier: [id]
162
+ end
163
+
164
+ def parse_relation
165
+ @doc.related_item.reject { |ri| ri.type == "series" }.map do |ri|
166
+ type = RELATION_TYPES[ri.type]
167
+ RelatonBib::DocumentRelation.new(type: type, bibitem: create_related_item(ri))
168
+ end
169
+ end
170
+
171
+ def create_related_item(item)
172
+ item_id = get_id_from_str related_item_id(item)
173
+ docid = RelatonBib::DocumentIdentifier.new type: "NIST", id: item_id
174
+ fref = RelatonBib::FormattedRef.new content: item_id
175
+ NistBibliographicItem.new(docid: [docid], formattedref: fref)
176
+ end
177
+
178
+ def related_item_id(item)
179
+ if item.other_type && item.other_type[0..6] == "10.6028"
180
+ item.other_type
181
+ else
182
+ item.name[0].name_part[0].content
183
+ end
184
+ end
185
+
186
+ def parse_place
187
+ @doc.origin_info.select { |p| p.event_type == "publisher"}.map do |p|
188
+ place = p.place[0].place_term[0].content
189
+ /(?<city>\w+), (?<state>\w+)/ =~ place
190
+ RelatonBib::Place.new city: city, region: create_region(state)
191
+ end
192
+ end
193
+
194
+ def create_region(state)
195
+ [RelatonBib::Place::RegionType.new(iso: state)]
196
+ rescue ArgumentError
197
+ []
198
+ end
199
+
200
+ def parse_series
201
+ @doc.related_item.select { |ri| ri.type == "series" }.map do |ri|
202
+ tinfo = ri.title_info[0]
203
+ tcontent = tinfo.title[0].strip
204
+ title = RelatonBib::TypedTitleString.new content: tcontent
205
+ RelatonBib::Series.new title: title, number: tinfo.part_number[0]
206
+ end
207
+ end
208
+ end
209
+ end
@@ -123,7 +123,7 @@ module RelatonNist
123
123
  next if iter && r.status.iteration != iteration
124
124
  return { ret: r } if !year
125
125
 
126
- r.date.select { |d| d.type == "published" }.each do |d|
126
+ r.date.select { |d| d.type == "published" || d.type == "issued" }.each do |d|
127
127
  return { ret: r } if year.to_i == d.on(:year)
128
128
 
129
129
  missed_years << d.on(:year)
@@ -1,3 +1,3 @@
1
1
  module RelatonNist
2
- VERSION = "1.19.0".freeze
2
+ VERSION = "1.19.1".freeze
3
3
  end
data/relaton_nist.gemspec CHANGED
@@ -24,7 +24,8 @@ Gem::Specification.new do |spec|
24
24
  spec.required_ruby_version = Gem::Requirement.new(">= 2.7.0")
25
25
 
26
26
  spec.add_dependency "base64"
27
- spec.add_dependency "relaton-bib", "~> 1.19.0"
27
+ spec.add_dependency "loc_mods", "~> 0.2.0"
28
+ spec.add_dependency "relaton-bib", "~> 1.19.2"
28
29
  spec.add_dependency "relaton-index", "~> 0.2.0"
29
30
  spec.add_dependency "rubyzip"
30
31
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-nist
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.19.0
4
+ version: 1.19.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-07-03 00:00:00.000000000 Z
11
+ date: 2024-07-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: base64
@@ -24,20 +24,34 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: loc_mods
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.2.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 0.2.0
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: relaton-bib
29
43
  requirement: !ruby/object:Gem::Requirement
30
44
  requirements:
31
45
  - - "~>"
32
46
  - !ruby/object:Gem::Version
33
- version: 1.19.0
47
+ version: 1.19.2
34
48
  type: :runtime
35
49
  prerelease: false
36
50
  version_requirements: !ruby/object:Gem::Requirement
37
51
  requirements:
38
52
  - - "~>"
39
53
  - !ruby/object:Gem::Version
40
- version: 1.19.0
54
+ version: 1.19.2
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: relaton-index
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -98,6 +112,7 @@ files:
98
112
  - lib/relaton_nist/hash_converter.rb
99
113
  - lib/relaton_nist/hit.rb
100
114
  - lib/relaton_nist/hit_collection.rb
115
+ - lib/relaton_nist/mods_parser.rb
101
116
  - lib/relaton_nist/nist_bibliographic_item.rb
102
117
  - lib/relaton_nist/nist_bibliography.rb
103
118
  - lib/relaton_nist/processor.rb