traject 3.8.1 → 3.8.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +1 -1
- data/CHANGES.md +8 -1
- data/lib/tasks/load_maps.rake +51 -8
- data/lib/traject/macros/marc21_semantics.rb +7 -1
- data/lib/traject/version.rb +1 -1
- data/lib/translation_maps/marc_languages.yaml +7500 -6
- data/test/indexer/macros/macros_marc21_semantics_test.rb +20 -1
- data/test/test_support/iso639-3_lang.marc +1 -0
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5e46b17b653fcdad34dbd5f7cf1d203c8d63a5c43eed9691bdb13b1f83d737a5
|
4
|
+
data.tar.gz: e2c7c086ffa93c167dd03be87953a9b2fd5b852ee1ff5695a9269da989c176f7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d43267258736b94dcb8befdeec486892195239144855bb3afd7756e9d626af44c3cb511b7fab5da5328f4844ffc8c0bfa1dd9dc89295e96e3ac59e3cc39cc89d
|
7
|
+
data.tar.gz: 304de8c8c6bcd89d4cbc6d98bb3ff9a9a18e117d51df2c6869443c70c3c8dbea9d498fe92f04b192a42100ee979c4fe97ba91895ee5faa7645fa70f4166f09c1
|
data/.github/workflows/ruby.yml
CHANGED
@@ -12,7 +12,7 @@ jobs:
|
|
12
12
|
strategy:
|
13
13
|
fail-fast: false
|
14
14
|
matrix:
|
15
|
-
ruby: [ '2.4', '2.5', '2.6', '2.7', '3.0', '3.1', 'jruby-9.
|
15
|
+
ruby: [ '2.4', '2.5', '2.6', '2.7', '3.0', '3.1', '3.2', '3.3', 'jruby-9.2', 'jruby-9.3', 'jruby-9.4' ]
|
16
16
|
name: Ruby ${{ matrix.ruby }}
|
17
17
|
steps:
|
18
18
|
- uses: actions/checkout@v2
|
data/CHANGES.md
CHANGED
@@ -4,7 +4,14 @@
|
|
4
4
|
|
5
5
|
*
|
6
6
|
|
7
|
-
|
7
|
+
## 3.8.2
|
8
|
+
|
9
|
+
Bug fix for the `#filing_version` logic, which was incorrectly assuming the
|
10
|
+
first subfield in a field would hold content (e.g., `$a`) and thus failed
|
11
|
+
when it held a pointer to a linking field (e.g., `$6 245-01`)
|
12
|
+
|
13
|
+
```
|
14
|
+
|
8
15
|
|
9
16
|
## 3.8.1
|
10
17
|
|
data/lib/tasks/load_maps.rake
CHANGED
@@ -1,20 +1,16 @@
|
|
1
1
|
require 'net/http'
|
2
2
|
require 'open-uri'
|
3
|
+
require 'csv'
|
3
4
|
|
4
5
|
|
5
6
|
|
7
|
+
CODELIST_NS = 'info:lc/xmlns/codelist-v1'
|
6
8
|
|
7
9
|
namespace :load_maps do
|
8
10
|
|
9
11
|
desc "Load MARC geo codes by screen-scraping LC"
|
10
|
-
task :marc_geographic do
|
11
|
-
|
12
|
-
require 'nokogiri'
|
13
|
-
rescue LoadError => e
|
14
|
-
$stderr.puts "\n load_maps:marc_geographic task requires nokogiri"
|
15
|
-
$stderr.puts " Try `gem install nokogiri` and try again. Exiting...\n\n"
|
16
|
-
exit 1
|
17
|
-
end
|
12
|
+
task :marc_geographic do |task|
|
13
|
+
require_nokogiri(task)
|
18
14
|
|
19
15
|
source_url = "http://www.loc.gov/marc/geoareas/gacs_code.html"
|
20
16
|
|
@@ -45,4 +41,51 @@ namespace :load_maps do
|
|
45
41
|
end
|
46
42
|
$stderr.puts "Done."
|
47
43
|
end
|
44
|
+
|
45
|
+
desc "Load MARC language codes from LOC and SIL"
|
46
|
+
task :marc_languages do |task|
|
47
|
+
require_nokogiri(task)
|
48
|
+
filename = ENV["OUTPUT_TO"] || File.expand_path("../../translation_maps/marc_languages.yaml", __FILE__)
|
49
|
+
file = File.open(filename, "w:utf-8")
|
50
|
+
$stderr.puts "Writing to `#{filename}` ..."
|
51
|
+
file.puts("# Map Language Codes (in 008[35-37], 041) to User Friendly Term\r")
|
52
|
+
|
53
|
+
marc_language_source_url = 'https://www.loc.gov/standards/codelists/languages.xml'
|
54
|
+
doc = Nokogiri::XML(URI.parse(marc_language_source_url).open)
|
55
|
+
marc_language_hash = doc.xpath('//codelist:language', codelist: CODELIST_NS)
|
56
|
+
.to_h do |node|
|
57
|
+
[node.xpath('./codelist:code/text()', codelist: CODELIST_NS).to_s,
|
58
|
+
node.xpath('./codelist:name/text()', codelist: CODELIST_NS).to_s]
|
59
|
+
end.reject { |key, _val| %w[und zxx].include? key }
|
60
|
+
|
61
|
+
file.puts "\r"
|
62
|
+
file.puts("# MARC language codes (including obsolete codes), from #{marc_language_source_url}\r\n")
|
63
|
+
marc_language_hash.sort_by { |k, _v| k }.each do |key, value|
|
64
|
+
file.puts("#{key}: #{escape_special_yaml_chars(value)}\r")
|
65
|
+
end
|
66
|
+
|
67
|
+
iso_639_3_url = 'https://iso639-3.sil.org/sites/iso639-3/files/downloads/iso-639-3.tab'
|
68
|
+
parsed_url = URI.parse(iso_639_3_url)
|
69
|
+
iso_languages = CSV.parse(parsed_url.read(encoding: 'UTF-8'), headers: true, col_sep: "\t", encoding: "UTF-8")
|
70
|
+
iso_language_hash = iso_languages.to_h { |row| [row['Id'], row['Ref_Name']] }
|
71
|
+
.reject { |key, _val| %w[und zxx].include? key }
|
72
|
+
.reject { |key, _val| marc_language_hash.keys.include? key }
|
73
|
+
file.puts "\r"
|
74
|
+
file.puts("# ISO 639-3 codes, from #{iso_639_3_url}\r")
|
75
|
+
iso_language_hash.sort_by { |k, _v| k }.each do |key, value|
|
76
|
+
file.puts("#{key}: #{escape_special_yaml_chars(value)}\r")
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def require_nokogiri(task)
|
81
|
+
require 'nokogiri'
|
82
|
+
rescue LoadError
|
83
|
+
$stderr.puts "\n #{task&.name} task requires nokogiri"
|
84
|
+
$stderr.puts " Try `gem install nokogiri` and try again. Exiting...\n\n"
|
85
|
+
exit 1
|
86
|
+
end
|
87
|
+
|
88
|
+
def escape_special_yaml_chars(string)
|
89
|
+
string.match(/[\,\']/) ? %Q{"#{string}"} : string
|
90
|
+
end
|
48
91
|
end
|
@@ -167,7 +167,13 @@ module Traject::Macros
|
|
167
167
|
# (b) include the first subfield in the record
|
168
168
|
|
169
169
|
subs = spec.subfields
|
170
|
-
|
170
|
+
|
171
|
+
# Get the code for the first alphabetic subfield, which would be
|
172
|
+
# the one getting characters shifted off
|
173
|
+
|
174
|
+
first_alpha_code = field.subfields.first{|sf| sf.code =~ /[a-z]/}.code
|
175
|
+
|
176
|
+
return str unless subs && subs.include?(first_alpha_code)
|
171
177
|
|
172
178
|
# OK. If we got this far we actually need to strip characters off the string
|
173
179
|
|
data/lib/traject/version.rb
CHANGED