traject 3.8.1 → 3.8.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +10 -2
- data/CHANGES.md +15 -3
- data/lib/tasks/load_maps.rake +51 -8
- data/lib/traject/macros/marc21_semantics.rb +7 -1
- data/lib/traject/version.rb +1 -1
- data/lib/translation_maps/marc_languages.yaml +7500 -6
- data/test/command_line_test.rb +3 -5
- data/test/indexer/error_handler_test.rb +2 -2
- data/test/indexer/macros/macros_marc21_semantics_test.rb +20 -1
- data/test/test_support/iso639-3_lang.marc +1 -0
- data/traject.gemspec +3 -0
- metadata +33 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c4434e6a85dc5176d84dfb2c6e42c0882937cdffa37c24e5f5f06844ad0e542d
|
4
|
+
data.tar.gz: fd2da1d197ace61ab86e2308298e77ed64866ef0f1ffc49b88b344578900f76e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3aad25487edbb9489a30f6d8a9c6c2fdb467683c6cd6a96199d7de10538f861fd92c2f25ec8213b5a753ef85bc7440c1d27ee48d65fc05d2f36e735a15a7b6a6
|
7
|
+
data.tar.gz: 7641b36d876e01d3b8188add71a4121a768116a8bd2462c1fcfb0e2652af7ddebfc9a17d03dd24c68fa56b8810e2b7cd865eea53ff905a53fb5f7675c5dc99e4
|
data/.github/workflows/ruby.yml
CHANGED
@@ -2,7 +2,7 @@ name: CI
|
|
2
2
|
|
3
3
|
on:
|
4
4
|
push:
|
5
|
-
branches: [
|
5
|
+
branches: [ main ]
|
6
6
|
pull_request:
|
7
7
|
branches: ['**']
|
8
8
|
|
@@ -12,7 +12,15 @@ jobs:
|
|
12
12
|
strategy:
|
13
13
|
fail-fast: false
|
14
14
|
matrix:
|
15
|
-
ruby:
|
15
|
+
ruby:
|
16
|
+
- '2.7'
|
17
|
+
- '3.0'
|
18
|
+
- '3.1'
|
19
|
+
- '3.2'
|
20
|
+
- '3.3'
|
21
|
+
- '3.4'
|
22
|
+
- 'jruby-9.3'
|
23
|
+
- 'jruby-9.4'
|
16
24
|
name: Ruby ${{ matrix.ruby }}
|
17
25
|
steps:
|
18
26
|
- uses: actions/checkout@v2
|
data/CHANGES.md
CHANGED
@@ -1,10 +1,22 @@
|
|
1
1
|
# Changes
|
2
2
|
|
3
|
-
##
|
3
|
+
## 3.8.3 Compatibility release for ruby 3.4
|
4
4
|
|
5
|
-
|
5
|
+
Note that some gems, `ruby-marc` in particular, will throw zillions of
|
6
|
+
warnings about literal strings being frozen in future versions of ruby.
|
6
7
|
|
7
|
-
|
8
|
+
- Update CI testing matrix
|
9
|
+
- removed MRI ruby 2.4, 2.5, 2.6, and jruby-9.2
|
10
|
+
- added MRI ruby 3.4
|
11
|
+
- Add gem dependencies for `csv` and `mutex_m`, both of which were
|
12
|
+
part of the standard library prior to ruby 3.4
|
13
|
+
|
14
|
+
|
15
|
+
## 3.8.2
|
16
|
+
|
17
|
+
Bug fix for the `#filing_version` logic, which was incorrectly assuming the
|
18
|
+
first subfield in a field would hold content (e.g., `$a`) and thus failed
|
19
|
+
when it held a pointer to a linking field (e.g., `$6 245-01`)
|
8
20
|
|
9
21
|
## 3.8.1
|
10
22
|
|
data/lib/tasks/load_maps.rake
CHANGED
@@ -1,20 +1,16 @@
|
|
1
1
|
require 'net/http'
|
2
2
|
require 'open-uri'
|
3
|
+
require 'csv'
|
3
4
|
|
4
5
|
|
5
6
|
|
7
|
+
CODELIST_NS = 'info:lc/xmlns/codelist-v1'
|
6
8
|
|
7
9
|
namespace :load_maps do
|
8
10
|
|
9
11
|
desc "Load MARC geo codes by screen-scraping LC"
|
10
|
-
task :marc_geographic do
|
11
|
-
|
12
|
-
require 'nokogiri'
|
13
|
-
rescue LoadError => e
|
14
|
-
$stderr.puts "\n load_maps:marc_geographic task requires nokogiri"
|
15
|
-
$stderr.puts " Try `gem install nokogiri` and try again. Exiting...\n\n"
|
16
|
-
exit 1
|
17
|
-
end
|
12
|
+
task :marc_geographic do |task|
|
13
|
+
require_nokogiri(task)
|
18
14
|
|
19
15
|
source_url = "http://www.loc.gov/marc/geoareas/gacs_code.html"
|
20
16
|
|
@@ -45,4 +41,51 @@ namespace :load_maps do
|
|
45
41
|
end
|
46
42
|
$stderr.puts "Done."
|
47
43
|
end
|
44
|
+
|
45
|
+
desc "Load MARC language codes from LOC and SIL"
|
46
|
+
task :marc_languages do |task|
|
47
|
+
require_nokogiri(task)
|
48
|
+
filename = ENV["OUTPUT_TO"] || File.expand_path("../../translation_maps/marc_languages.yaml", __FILE__)
|
49
|
+
file = File.open(filename, "w:utf-8")
|
50
|
+
$stderr.puts "Writing to `#{filename}` ..."
|
51
|
+
file.puts("# Map Language Codes (in 008[35-37], 041) to User Friendly Term\r")
|
52
|
+
|
53
|
+
marc_language_source_url = 'https://www.loc.gov/standards/codelists/languages.xml'
|
54
|
+
doc = Nokogiri::XML(URI.parse(marc_language_source_url).open)
|
55
|
+
marc_language_hash = doc.xpath('//codelist:language', codelist: CODELIST_NS)
|
56
|
+
.to_h do |node|
|
57
|
+
[node.xpath('./codelist:code/text()', codelist: CODELIST_NS).to_s,
|
58
|
+
node.xpath('./codelist:name/text()', codelist: CODELIST_NS).to_s]
|
59
|
+
end.reject { |key, _val| %w[und zxx].include? key }
|
60
|
+
|
61
|
+
file.puts "\r"
|
62
|
+
file.puts("# MARC language codes (including obsolete codes), from #{marc_language_source_url}\r\n")
|
63
|
+
marc_language_hash.sort_by { |k, _v| k }.each do |key, value|
|
64
|
+
file.puts("#{key}: #{escape_special_yaml_chars(value)}\r")
|
65
|
+
end
|
66
|
+
|
67
|
+
iso_639_3_url = 'https://iso639-3.sil.org/sites/iso639-3/files/downloads/iso-639-3.tab'
|
68
|
+
parsed_url = URI.parse(iso_639_3_url)
|
69
|
+
iso_languages = CSV.parse(parsed_url.read(encoding: 'UTF-8'), headers: true, col_sep: "\t", encoding: "UTF-8")
|
70
|
+
iso_language_hash = iso_languages.to_h { |row| [row['Id'], row['Ref_Name']] }
|
71
|
+
.reject { |key, _val| %w[und zxx].include? key }
|
72
|
+
.reject { |key, _val| marc_language_hash.keys.include? key }
|
73
|
+
file.puts "\r"
|
74
|
+
file.puts("# ISO 639-3 codes, from #{iso_639_3_url}\r")
|
75
|
+
iso_language_hash.sort_by { |k, _v| k }.each do |key, value|
|
76
|
+
file.puts("#{key}: #{escape_special_yaml_chars(value)}\r")
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def require_nokogiri(task)
|
81
|
+
require 'nokogiri'
|
82
|
+
rescue LoadError
|
83
|
+
$stderr.puts "\n #{task&.name} task requires nokogiri"
|
84
|
+
$stderr.puts " Try `gem install nokogiri` and try again. Exiting...\n\n"
|
85
|
+
exit 1
|
86
|
+
end
|
87
|
+
|
88
|
+
def escape_special_yaml_chars(string)
|
89
|
+
string.match(/[\,\']/) ? %Q{"#{string}"} : string
|
90
|
+
end
|
48
91
|
end
|
@@ -167,7 +167,13 @@ module Traject::Macros
|
|
167
167
|
# (b) include the first subfield in the record
|
168
168
|
|
169
169
|
subs = spec.subfields
|
170
|
-
|
170
|
+
|
171
|
+
# Get the code for the first alphabetic subfield, which would be
|
172
|
+
# the one getting characters shifted off
|
173
|
+
|
174
|
+
first_alpha_code = field.subfields.first{|sf| sf.code =~ /[a-z]/}.code
|
175
|
+
|
176
|
+
return str unless subs && subs.include?(first_alpha_code)
|
171
177
|
|
172
178
|
# OK. If we got this far we actually need to strip characters off the string
|
173
179
|
|
data/lib/traject/version.rb
CHANGED