cul_hydra 1.9.1 → 1.9.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/app/models/concept.rb +10 -2
- data/app/models/cul/hydra/datastreams/encoded_text_datastream.rb +40 -0
- data/app/models/generic_resource.rb +2 -1
- data/fixtures/spec/BLOB/description-cp1252.txt +1 -0
- data/fixtures/spec/BLOB/description-utf8.txt +1 -0
- data/fixtures/spec/CUL_MODS/mods-titles-extended.xml +2 -0
- data/fixtures/spec/CUL_SOLR/mods-001.xml +1 -1
- data/fixtures/spec/CUL_SOLR/mods-001.yml +1 -1
- data/lib/cul_hydra/indexer.rb +39 -9
- data/lib/cul_hydra/solrizer/mods_fieldable.rb +12 -2
- data/lib/cul_hydra/version.rb +1 -1
- data/lib/tasks/index.rake +5 -3
- metadata +12 -12
- data/config/fedora.yml +0 -26
- data/config/solr.yml +0 -17
- data/config/subs.yml +0 -10
- data/fixtures/spec/CUL_MODS/mods-archival-context-2.xml +0 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fee2757a484cdb805382ca83fcc8c21354353c85a0d05e55709f090f37d049e3
|
4
|
+
data.tar.gz: 2a642263bb0225de2cb0f6bc2b1b566de3cd8d7e34e78d4dc4be0b76b3d002ab
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7ac25bc59536aa135ae8a5eda1a3d26faf2dfdbc540e8f272ec73a208a956af3043a85ec7ba2741b466461c431ebfc7cc2abb65398e1ad3c8047bc84ba4cb17a
|
7
|
+
data.tar.gz: c8175c178d6deab4403fa453bb7e441563aff35f1b67e17ee29d215658020aa4d2645d59808d6019766f17a8a600451787911ba62e1b9fe1cf67b80769523274
|
data/app/models/concept.rb
CHANGED
@@ -9,7 +9,7 @@ class Concept < GenericAggregator
|
|
9
9
|
rdf_types(RDF::CUL.Aggregator)
|
10
10
|
rdf_types(RDF::PCDM.Object)
|
11
11
|
|
12
|
-
has_file_datastream :name => "descriptionText", :type
|
12
|
+
has_file_datastream :name => "descriptionText", :type=>Cul::Hydra::Datastreams::EncodedTextDatastream,
|
13
13
|
:versionable => false, :label => 'Textual Description of Concept',
|
14
14
|
:mimeType => 'text/markdown'
|
15
15
|
|
@@ -107,7 +107,15 @@ class Concept < GenericAggregator
|
|
107
107
|
|
108
108
|
def to_solr(solr_doc = Hash.new, opts={})
|
109
109
|
solr_doc = super(solr_doc, opts)
|
110
|
-
|
110
|
+
description.tap do |description_value|
|
111
|
+
if description_value
|
112
|
+
unless description_ds.is_a? Cul::Hydra::Datastreams::EncodedTextDatastream
|
113
|
+
description_value = Cul::Hydra::Datastreams::EncodedTextDatastream.utf8able!(description_value).encode(Encoding::UTF_8)
|
114
|
+
end
|
115
|
+
description_field_name = ::ActiveFedora::SolrService.solr_name(:description_text, :displayable)
|
116
|
+
solr_doc[description_field_name] = description_value
|
117
|
+
end
|
118
|
+
end
|
111
119
|
solr_doc
|
112
120
|
end
|
113
121
|
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module Cul
|
2
|
+
module Hydra
|
3
|
+
module Datastreams
|
4
|
+
class EncodedTextDatastream < ::ActiveFedora::Datastream
|
5
|
+
DEFAULT_PRIORITIES = [ Encoding::UTF_8, Encoding::WINDOWS_1252, Encoding::ISO_8859_1 ]
|
6
|
+
|
7
|
+
def initialize(digital_object=nil, dsid=nil, options={})
|
8
|
+
@encoding_priorities = options.delete(:encodings) || DEFAULT_PRIORITIES
|
9
|
+
super
|
10
|
+
end
|
11
|
+
|
12
|
+
def content=(value)
|
13
|
+
super(utf8able!(value).encode!(Encoding::UTF_8))
|
14
|
+
end
|
15
|
+
|
16
|
+
def content
|
17
|
+
utf8able!(super.to_s).encode!(Encoding::UTF_8)
|
18
|
+
end
|
19
|
+
|
20
|
+
def utf8able!(data)
|
21
|
+
EncodedTextDatastream.utf8able!(data, @encoding_priorities)
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.utf8able!(data, encoding_priorities = DEFAULT_PRIORITIES)
|
25
|
+
return unless data
|
26
|
+
data = data.read if data.is_a? IO
|
27
|
+
content_encoding = encoding_priorities.detect do |enc|
|
28
|
+
begin
|
29
|
+
data.force_encoding(enc).valid_encoding?
|
30
|
+
rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError
|
31
|
+
false
|
32
|
+
end
|
33
|
+
end
|
34
|
+
raise "could not encode text datastream content" unless content_encoding
|
35
|
+
data.force_encoding(content_encoding)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -68,7 +68,8 @@ class GenericResource < ::ActiveFedora::Base
|
|
68
68
|
solr_doc["fulltext_tesim"] = []
|
69
69
|
unless self.datastreams["fulltext"].nil?
|
70
70
|
solr_doc["fulltext_tesim"].concat(solr_doc["title_display_ssm"]) unless solr_doc["title_display_ssm"].nil? or solr_doc["title_display_ssm"].length == 0
|
71
|
-
|
71
|
+
utf8able = Cul::Hydra::Datastreams::EncodedTextDatastream.utf8able!(self.datastreams["fulltext"].content)
|
72
|
+
solr_doc["fulltext_tesim"] << utf8able.encode(Encoding::UTF_8)
|
72
73
|
end
|
73
74
|
relationships(:original_name).each do |original_name|
|
74
75
|
solr_doc["original_name_tesim"] ||= []
|
@@ -0,0 +1 @@
|
|
1
|
+
In 1917, Harrison founded the first organization (The Liberty League) and the first newspaper (The Voice) of the �New Negro Movement� and he published his first book, The Negro and the Nation. He opposed positions taken by Joel E. Spingarn and W.E.B. Du Bois of the NAACP during the First World War and, along with William Monroe Trotter and others he organized the 1918 Liberty Congress. The Congress, the major Black protest effort during the war, demanded enforcement of the Thirteenth, Fourteenth, and Fifteenth Amendments and federal anti-lynching legislation. Beginning in 1920, he became the principal editor of Marcus Garvey's Negro World, which he reshaped into a leading political and literary publication of the era. In its pages, he discussed history, politics, theater, international affairs, religion, and science. He also created a "Poetry for the People" feature, a �West Indian News Notes� column, and what he described as the first regular book review section by a Black author in �Negro newspaperdom.� In 1920 he also published his second book, When Africa Awakes: The �Inside Story� of the Stirrings and Strivings of the New Negro in the Western World. Later, he would criticize Garvey's methods and actions.
|
@@ -0,0 +1 @@
|
|
1
|
+
In 1917, Harrison founded the first organization (The Liberty League) and the first newspaper (The Voice) of the “New Negro Movement” and he published his first book, The Negro and the Nation. He opposed positions taken by Joel E. Spingarn and W.E.B. Du Bois of the NAACP during the First World War and, along with William Monroe Trotter and others he organized the 1918 Liberty Congress. The Congress, the major Black protest effort during the war, demanded enforcement of the Thirteenth, Fourteenth, and Fifteenth Amendments and federal anti-lynching legislation. Beginning in 1920, he became the principal editor of Marcus Garvey's Negro World, which he reshaped into a leading political and literary publication of the era. In its pages, he discussed history, politics, theater, international affairs, religion, and science. He also created a "Poetry for the People" feature, a “West Indian News Notes” column, and what he described as the first regular book review section by a Black author in “Negro newspaperdom.” In 1920 he also published his second book, When Africa Awakes: The “Inside Story” of the Stirrings and Strivings of the New Negro in the Western World. Later, he would criticize Garvey's methods and actions.
|
@@ -0,0 +1,2 @@
|
|
1
|
+
<?xml version='1.0' encoding='UTF-8'?>
|
2
|
+
<mods xmlns='http://www.loc.gov/mods/v3' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-4.xsd'><titleInfo><nonSort>The </nonSort><title xml:lang="eng">(ġhotogråphs) ゐ </title></titleInfo></mods>
|
@@ -1 +1 @@
|
|
1
|
-
<doc xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:oai="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:mods="http://www.loc.gov/mods/v3" xmlns="http://solr.apache.org" xmlns:index="http://repository.cul.columbia.edu/namespace/index/1_0/" xmlns:local="http://local.org/custom"><field name="format">multipartitem</field><field name="collection_h">Columbia University Libraries Content/Online Projects/PRD Customer Orders/</field><field name="internal_h">ldpd:91655/ldpd:91656/ldpd:101479/ldpd:102275/</field><field name="format_h">multipartitem/</field><field name="id">ldpd:102275@ldpd:103434</field><field name="pid_ssi">ldpd:102275</field><field name="descriptor">mods</field><field name="object_ssm">https://repository2.cul.columbia.edu:8443/fedora/get/ldpd:102275</field><field name="identifier_ssi">prd.custord.070017</field><field name="all_text_teim">AN070017</field><field name="all_text_teim">prd.custord.070017</field><field name="lib_collection_sim">Pulitzer Prizes</field><field name="lib_collection_ssm">Pulitzer Prizes</field><field name="lib_project_sim">Pres Orders</field><field name="lib_project_ssm">Customer Order Collection</field><field name="lib_project_teim">Customer Order Collection</field><field name="title_sort">
|
1
|
+
<doc xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:oai="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:mods="http://www.loc.gov/mods/v3" xmlns="http://solr.apache.org" xmlns:index="http://repository.cul.columbia.edu/namespace/index/1_0/" xmlns:local="http://local.org/custom"><field name="format">multipartitem</field><field name="collection_h">Columbia University Libraries Content/Online Projects/PRD Customer Orders/</field><field name="internal_h">ldpd:91655/ldpd:91656/ldpd:101479/ldpd:102275/</field><field name="format_h">multipartitem/</field><field name="id">ldpd:102275@ldpd:103434</field><field name="pid_ssi">ldpd:102275</field><field name="descriptor">mods</field><field name="object_ssm">https://repository2.cul.columbia.edu:8443/fedora/get/ldpd:102275</field><field name="identifier_ssi">prd.custord.070017</field><field name="all_text_teim">AN070017</field><field name="all_text_teim">prd.custord.070017</field><field name="lib_collection_sim">Pulitzer Prizes</field><field name="lib_collection_ssm">Pulitzer Prizes</field><field name="lib_project_sim">Pres Orders</field><field name="lib_project_ssm">Customer Order Collection</field><field name="lib_project_teim">Customer Order Collection</field><field name="title_sort">PHOTOGRAPHS</field><field name="title_ssm">Photographs</field><field name="title_teim">Photographs</field><field name="lib_name_sim">Kennerly, David</field><field name="lib_name_teim">Kennerly, David</field><field name="lib_name_ssm">Kennerly, David</field><field name="lib_repo_long_ssim">Rare Book and Manuscript Library</field><field name="lib_repo_sim">RBML</field><field name="lib_repo_teim">RBML</field><field name="lib_format_ssm">photographs</field><field name="lib_format_sim">photographs</field><field name="all_text_teim">Original PRD customer order number: 070017</field><field name="extent_teim">2 items</field><field name="access_condition_ssi">Columbia Libraries Staff Use Only</field><field name="all_text_teim">Box 8, JP2 Feature Photography</field></doc>
|
@@ -19,7 +19,7 @@ lib_collection_sim: ['Pulitzer Prizes']
|
|
19
19
|
lib_collection_ssm: ['Pulitzer Prizes']
|
20
20
|
lib_project_short_ssim: ['Pres Orders']
|
21
21
|
lib_project_full_ssim: ['Customer Order Collection']
|
22
|
-
title_si: '
|
22
|
+
title_si: 'PHOTOGRAPHS'
|
23
23
|
title_display_ssm: ['The Photographs']
|
24
24
|
lib_format_sim: ['photographs']
|
25
25
|
lib_format_ssm: ['photographs']
|
data/lib/cul_hydra/indexer.rb
CHANGED
@@ -1,8 +1,12 @@
|
|
1
|
+
require "active-fedora"
|
2
|
+
|
1
3
|
module Cul::Hydra::Indexer
|
2
4
|
|
3
5
|
NUM_FEDORA_RETRY_ATTEMPTS = 3
|
4
6
|
DELAY_BETWEEN_FEDORA_RETRY_ATTEMPTS = 5.seconds
|
5
|
-
|
7
|
+
DEFAULT_INDEX_OPTS = {
|
8
|
+
skip_generic_resources: false, verbose_output: false, softcommit: true, reraise: false
|
9
|
+
}.freeze
|
6
10
|
def self.descend_from(pid, pids_to_omit=nil, verbose_output=false)
|
7
11
|
if pid.blank?
|
8
12
|
raise 'Please supply a pid (e.g. rake recursively_index_fedora_objects pid=ldpd:123)'
|
@@ -60,36 +64,59 @@ module Cul::Hydra::Indexer
|
|
60
64
|
end
|
61
65
|
def self.recursively_index_fedora_objects(top_pid, pids_to_omit=nil, skip_generic_resources=false, verbose_output=false)
|
62
66
|
|
67
|
+
index_opts = { skip_generic_resources: skip_generic_resources, verbose_output: verbose_output }
|
63
68
|
descend_from(top_pid, pids_to_omit, verbose_output) do |pid|
|
64
|
-
self.index_pid(pid,
|
69
|
+
self.index_pid(pid, index_opts)
|
65
70
|
end
|
71
|
+
end
|
66
72
|
|
73
|
+
# this is a compatibility method for bridging the previously used postional arguments to
|
74
|
+
# keyword arguments by extracting an opts hash from varargs
|
75
|
+
# legacy positional opts signature: skip_resources = false, verbose_output = false, softcommit = true
|
76
|
+
# keyword defaults are in DEFAULT_INDEX_OPTS
|
77
|
+
# @param args [Array] a list of arguments ending with an options hash
|
78
|
+
# @return options hash
|
79
|
+
def self.extract_index_opts(args)
|
80
|
+
args = args.dup # do not modify the original list
|
81
|
+
# extract opts hash
|
82
|
+
index_opts = (args.last.is_a? Hash) ? args.pop : {}
|
83
|
+
# symbolize keys and reverse merge defaults
|
84
|
+
index_opts = index_opts.map {|k,v| [k.to_sym, v] }.to_h
|
85
|
+
index_opts = DEFAULT_INDEX_OPTS.merge(index_opts)
|
86
|
+
# assign any legacy positional arguments, permitting explicit nils
|
87
|
+
unless args.empty?
|
88
|
+
index_opts[:skip_generic_resources] = args[0] if args.length > 0
|
89
|
+
index_opts[:verbose_output] = args[1] if args.length > 1
|
90
|
+
index_opts[:softcommit] = args[2] if args.length > 2
|
91
|
+
end
|
92
|
+
index_opts
|
67
93
|
end
|
68
94
|
|
69
|
-
def self.index_pid(pid,
|
95
|
+
def self.index_pid(pid, *args)
|
70
96
|
# We found an object with the desired PID. Let's reindex it
|
97
|
+
index_opts = extract_index_opts(args)
|
71
98
|
begin
|
72
99
|
active_fedora_object = nil
|
73
100
|
|
74
101
|
NUM_FEDORA_RETRY_ATTEMPTS.times do |i|
|
75
102
|
begin
|
76
103
|
active_fedora_object = ActiveFedora::Base.find(pid, :cast => true)
|
77
|
-
if skip_generic_resources && active_fedora_object.is_a?(GenericResource)
|
104
|
+
if index_opts[:skip_generic_resources] && active_fedora_object.is_a?(GenericResource)
|
78
105
|
puts 'Object was skipped because GenericResources are being skipped and it is a GenericResource.'
|
79
106
|
else
|
80
|
-
if softcommit
|
107
|
+
if index_opts[:softcommit]
|
81
108
|
active_fedora_object.update_index
|
82
109
|
else
|
83
110
|
# Using direct solr query to update document without soft commiting
|
84
111
|
ActiveFedora::SolrService.add(active_fedora_object.to_solr)
|
85
112
|
end
|
86
|
-
puts 'done.' if verbose_output
|
113
|
+
puts 'done.' if index_opts[:verbose_output]
|
87
114
|
end
|
88
115
|
break
|
89
116
|
rescue RestClient::RequestTimeout, Errno::EHOSTUNREACH => e
|
90
117
|
remaining_attempts = (NUM_FEDORA_RETRY_ATTEMPTS-1) - i
|
91
118
|
if remaining_attempts == 0
|
92
|
-
raise
|
119
|
+
raise
|
93
120
|
else
|
94
121
|
Rails.logger.error "Error: Could not connect to fedora. (#{e.class.to_s + ': ' + e.message}). Will retry #{remaining_attempts} more #{remaining_attempts == 1 ? 'time' : 'times'} (after a #{DELAY_BETWEEN_FEDORA_RETRY_ATTEMPTS} second delay)."
|
95
122
|
sleep DELAY_BETWEEN_FEDORA_RETRY_ATTEMPTS
|
@@ -102,15 +129,18 @@ module Cul::Hydra::Indexer
|
|
102
129
|
sleep 5
|
103
130
|
else
|
104
131
|
# Other RuntimeErrors should be passed on
|
105
|
-
raise
|
132
|
+
raise
|
106
133
|
end
|
107
134
|
end
|
108
135
|
end
|
109
136
|
rescue SystemExit, Interrupt => e
|
110
137
|
# Allow system interrupt (ctrl+c)
|
111
|
-
raise
|
138
|
+
raise
|
112
139
|
rescue Exception => e
|
113
140
|
puts "Encountered problem with #{pid}. Skipping record. Exception class: #{e.class.name}. Message: #{e.message}"
|
141
|
+
if index_opts[:reraise]
|
142
|
+
raise
|
143
|
+
end
|
114
144
|
end
|
115
145
|
end
|
116
146
|
end
|
@@ -39,7 +39,11 @@ module Cul::Hydra::Solrizer
|
|
39
39
|
n_t = n_t.sub(/^"(.*)"$/, "\\1")
|
40
40
|
n_t = n_t.sub(/^'(.*)'$/, "\\1")
|
41
41
|
is_negative_number = n_t =~ /^-\d+$/
|
42
|
-
|
42
|
+
if strip_punctuation == :all
|
43
|
+
n_t = n_t.gsub(/[[:punct:]]/, '')
|
44
|
+
else
|
45
|
+
n_t = n_t.sub(/^[[:punct:]]+/, '')
|
46
|
+
end
|
43
47
|
# this may have 'created' leading/trailing space, so strip
|
44
48
|
n_t.strip!
|
45
49
|
n_t = '-' + n_t if is_negative_number
|
@@ -92,7 +96,13 @@ module Cul::Hydra::Solrizer
|
|
92
96
|
base_text << child.text unless child.name == 'nonSort'
|
93
97
|
end
|
94
98
|
end
|
95
|
-
base_text = ModsFieldable.normalize(base_text,
|
99
|
+
base_text = ModsFieldable.normalize(base_text, :all)
|
100
|
+
# decompose and strip unicode combining characters
|
101
|
+
base_text = base_text.unicode_normalize(:nfd)
|
102
|
+
base_text.gsub!(/[\u0300-\u036F]/,'')
|
103
|
+
# uppercase per Unicode, for ASCII/Latin
|
104
|
+
# TODO: decide whether to use full Unicode case, other language options (Turkish, Lithuanian, etc.)
|
105
|
+
base_text = base_text.upcase(:ascii)
|
96
106
|
base_text = nil if base_text.empty?
|
97
107
|
base_text
|
98
108
|
end
|
data/lib/cul_hydra/version.rb
CHANGED
data/lib/tasks/index.rake
CHANGED
@@ -91,10 +91,11 @@ namespace :cul_hydra do
|
|
91
91
|
pool = Thread.pool(thread_pool_size)
|
92
92
|
mutex = Mutex.new
|
93
93
|
|
94
|
+
index_opts = { skip_generic_resources: skip_generic_resources, verbose_output: false }
|
94
95
|
pids.each do |pid|
|
95
96
|
pool.process {
|
96
97
|
|
97
|
-
Cul::Hydra::Indexer.index_pid(pid,
|
98
|
+
Cul::Hydra::Indexer.index_pid(pid, index_opts)
|
98
99
|
|
99
100
|
mutex.synchronize do
|
100
101
|
counter += 1
|
@@ -124,7 +125,8 @@ namespace :cul_hydra do
|
|
124
125
|
next
|
125
126
|
end
|
126
127
|
|
127
|
-
|
128
|
+
index_opts = { verbose_output: false }
|
129
|
+
index_opts[:skip_generic_resources] = (ENV['skip_generic_resources'] == 'true')
|
128
130
|
|
129
131
|
start_time = Time.now
|
130
132
|
pids = Cul::Hydra::RisearchMembers.get_publish_target_member_pids(publish_target_pid, true)
|
@@ -133,7 +135,7 @@ namespace :cul_hydra do
|
|
133
135
|
counter = 0
|
134
136
|
|
135
137
|
pids.each do |pid|
|
136
|
-
Cul::Hydra::Indexer.index_pid(pid,
|
138
|
+
Cul::Hydra::Indexer.index_pid(pid, index_opts)
|
137
139
|
counter += 1
|
138
140
|
puts "Indexed #{counter} of #{total} | #{Time.now - start_time} seconds"
|
139
141
|
end
|
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cul_hydra
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.9.
|
4
|
+
version: 1.9.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Benjamin Armintor
|
8
8
|
- Eric O'Hanlon
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2020-
|
12
|
+
date: 2020-11-17 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rails
|
@@ -31,14 +31,14 @@ dependencies:
|
|
31
31
|
requirements:
|
32
32
|
- - "~>"
|
33
33
|
- !ruby/object:Gem::Version
|
34
|
-
version: 3.
|
34
|
+
version: 3.4.1
|
35
35
|
type: :runtime
|
36
36
|
prerelease: false
|
37
37
|
version_requirements: !ruby/object:Gem::Requirement
|
38
38
|
requirements:
|
39
39
|
- - "~>"
|
40
40
|
- !ruby/object:Gem::Version
|
41
|
-
version: 3.
|
41
|
+
version: 3.4.1
|
42
42
|
- !ruby/object:Gem::Dependency
|
43
43
|
name: blacklight
|
44
44
|
requirement: !ruby/object:Gem::Requirement
|
@@ -394,6 +394,7 @@ files:
|
|
394
394
|
- app/models/content_aggregator.rb
|
395
395
|
- app/models/cul/hydra/datastreams/access_control_metadata.rb
|
396
396
|
- app/models/cul/hydra/datastreams/dc_metadata.rb
|
397
|
+
- app/models/cul/hydra/datastreams/encoded_text_datastream.rb
|
397
398
|
- app/models/cul/hydra/datastreams/mods_document.rb
|
398
399
|
- app/models/cul/hydra/datastreams/rels_int.rb
|
399
400
|
- app/models/cul/hydra/datastreams/struct_metadata.rb
|
@@ -402,14 +403,11 @@ files:
|
|
402
403
|
- app/models/generic_object.rb
|
403
404
|
- app/models/generic_resource.rb
|
404
405
|
- bin/rails
|
405
|
-
- config/fedora.yml
|
406
406
|
- config/jetty.yml
|
407
407
|
- config/locales/ldpd_hydra.en.yml
|
408
408
|
- config/predicate_mappings.yml
|
409
|
-
- config/solr.yml
|
410
409
|
- config/solr_mappings.yml
|
411
410
|
- config/solr_value_maps.yml
|
412
|
-
- config/subs.yml
|
413
411
|
- fixtures/cmodels/ldpd_ADLMetadata.xml
|
414
412
|
- fixtures/cmodels/ldpd_AESMetadata.xml
|
415
413
|
- fixtures/cmodels/ldpd_BagAggregator.xml
|
@@ -453,6 +451,8 @@ files:
|
|
453
451
|
- fixtures/cmodels/ldpd_sdep.StaticImageCore.xml
|
454
452
|
- fixtures/cmodels/ore_Proxy.xml
|
455
453
|
- fixtures/cmodels/pcdm_Collection.xml
|
454
|
+
- fixtures/spec/BLOB/description-cp1252.txt
|
455
|
+
- fixtures/spec/BLOB/description-utf8.txt
|
456
456
|
- fixtures/spec/BLOB/dlc.md
|
457
457
|
- fixtures/spec/BLOB/test001.jpg
|
458
458
|
- fixtures/spec/CUL_ACCESS/access-closed.xml
|
@@ -462,7 +462,6 @@ files:
|
|
462
462
|
- fixtures/spec/CUL_MODS/mods-001.xml
|
463
463
|
- fixtures/spec/CUL_MODS/mods-access-condition.xml
|
464
464
|
- fixtures/spec/CUL_MODS/mods-all.xml
|
465
|
-
- fixtures/spec/CUL_MODS/mods-archival-context-2.xml
|
466
465
|
- fixtures/spec/CUL_MODS/mods-archival-context.xml
|
467
466
|
- fixtures/spec/CUL_MODS/mods-bad-repo.xml
|
468
467
|
- fixtures/spec/CUL_MODS/mods-date-created-range.xml
|
@@ -493,6 +492,7 @@ files:
|
|
493
492
|
- fixtures/spec/CUL_MODS/mods-subjects.xml
|
494
493
|
- fixtures/spec/CUL_MODS/mods-textual-date.xml
|
495
494
|
- fixtures/spec/CUL_MODS/mods-textual-dates-with-unusual-chars.xml
|
495
|
+
- fixtures/spec/CUL_MODS/mods-titles-extended.xml
|
496
496
|
- fixtures/spec/CUL_MODS/mods-titles.xml
|
497
497
|
- fixtures/spec/CUL_MODS/mods-top-level-location-vs-relateditem-location.xml
|
498
498
|
- fixtures/spec/CUL_MODS/mods-unmapped-project.xml
|
@@ -548,7 +548,7 @@ files:
|
|
548
548
|
homepage: https://github.com/cul/cul_hydra
|
549
549
|
licenses: []
|
550
550
|
metadata: {}
|
551
|
-
post_install_message:
|
551
|
+
post_install_message:
|
552
552
|
rdoc_options: []
|
553
553
|
require_paths:
|
554
554
|
- app
|
@@ -566,8 +566,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
566
566
|
- !ruby/object:Gem::Version
|
567
567
|
version: '0'
|
568
568
|
requirements: []
|
569
|
-
rubygems_version: 3.0.
|
570
|
-
signing_key:
|
569
|
+
rubygems_version: 3.0.8
|
570
|
+
signing_key:
|
571
571
|
specification_version: 4
|
572
572
|
summary: ActiveFedora, OM, and Solrizer implementations for CUL repository apps
|
573
573
|
test_files: []
|
data/config/fedora.yml
DELETED
@@ -1,26 +0,0 @@
|
|
1
|
-
development:
|
2
|
-
user: fedoraAdmin
|
3
|
-
password: f+BULUS*^
|
4
|
-
url: http://repository.cul.columbia.edu:8080/fedora
|
5
|
-
datastreams_root: /ifs/cul/repo/archive/repo-datastreams
|
6
|
-
time_zone: "America/New_York"
|
7
|
-
test: &TEST
|
8
|
-
user: fedoraAdmin
|
9
|
-
password: fedoraAdmin
|
10
|
-
url: <%= "http://127.0.0.1:#{ENV['TEST_JETTY_PORT'] || 8983}/fedora-test" %>
|
11
|
-
datastreams_root: /ifs/cul/repo/archive/repo-datastreams
|
12
|
-
time_zone: "America/New_York"
|
13
|
-
production:
|
14
|
-
user: fedoraAdmin
|
15
|
-
password: f+BULUS*^
|
16
|
-
url: http://repository.cul.columbia.edu:8080/fedora
|
17
|
-
datastreams_root: /ifs/cul/repo/archive/repo-datastreams
|
18
|
-
time_zone: "America/New_York"
|
19
|
-
dcv_test:
|
20
|
-
user: fedoraAdmin
|
21
|
-
password: f+BULUS*^
|
22
|
-
url: http://repository.cul.columbia.edu:8080/fedora
|
23
|
-
datastreams_root: /ifs/cul/repo/archive/repo-datastreams
|
24
|
-
time_zone: "America/New_York"
|
25
|
-
cucumber:
|
26
|
-
<<: *TEST
|
data/config/solr.yml
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
# This is a sample config file that does not have multiple solr instances. You will also need to be sure to
|
2
|
-
# edit the fedora.yml file to match the solr URL for active-fedora.
|
3
|
-
# url: http://katana.cul.columbia.edu:8080/solr-4.7/dcv_private_dev
|
4
|
-
test: &TEST
|
5
|
-
url: <%= "http://127.0.0.1:#{ENV['TEST_JETTY_PORT'] || 8983}/solr/test" %>
|
6
|
-
dcv_dev: &DEV
|
7
|
-
url: http://spatha.cul.columbia.edu:8080/solr-4.7/dcv_private_test
|
8
|
-
dcv_core: 'dcv_private_dev'
|
9
|
-
dcv_test: &IFP
|
10
|
-
url: http://spatha.cul.columbia.edu:8080/solr-4.7/dcv_private_test
|
11
|
-
dcv_core: 'dcv_private_test'
|
12
|
-
cucumber:
|
13
|
-
<<: *TEST
|
14
|
-
production: &PROD
|
15
|
-
url: http://spatha.cul.columbia.edu:8080/solr-4.7/dcv_prod
|
16
|
-
development:
|
17
|
-
<<: *PROD
|
data/config/subs.yml
DELETED
@@ -1,10 +0,0 @@
|
|
1
|
-
old:
|
2
|
-
fedora_server: http://sayers.cul.columbia.edu:8080
|
3
|
-
php_server: http://bach.cul.columbia.edu/dev
|
4
|
-
djatoka_server: http://iris.cul.columbia.edu:8080
|
5
|
-
development:
|
6
|
-
fedora_server: http://repository.cul.columbia.edu:8080
|
7
|
-
php_server: http://fedora-svc.cul.columbia.edu/dev
|
8
|
-
test:
|
9
|
-
fedora_server: http://repository.cul.columbia.edu:8080
|
10
|
-
php_server: http://fedora-svc.cul.columbia.edu/dev
|
@@ -1,22 +0,0 @@
|
|
1
|
-
<?xml version="1.0"?>
|
2
|
-
<mods:mods xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:mods="http://www.loc.gov/mods/v3" version="3.6" xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-6.xsd">
|
3
|
-
<mods:titleInfo>
|
4
|
-
<mods:title>Agreement between Gunnar Myrdal and Harper & Brothers for the publication of "An American Dilemma," January 8, 1943</mods:title>
|
5
|
-
</mods:titleInfo>
|
6
|
-
<mods:relatedItem type="host" displayLabel="Project">
|
7
|
-
<mods:titleInfo>
|
8
|
-
<mods:title>Carnegie Digital Past and Future</mods:title>
|
9
|
-
</mods:titleInfo>
|
10
|
-
</mods:relatedItem>
|
11
|
-
<mods:relatedItem type="host" displayLabel="Collection">
|
12
|
-
<mods:titleInfo valueURI="http://id.library.columbia.edu/term/72fcfd07-f7db-4a18-bdb2-beb0abce071c">
|
13
|
-
<mods:title>Carnegie Corporation of New York Records</mods:title>
|
14
|
-
</mods:titleInfo>
|
15
|
-
<mods:identifier type="CLIO">4079753</mods:identifier>
|
16
|
-
<mods:part>
|
17
|
-
<mods:detail type="Series" level="1">
|
18
|
-
<mods:title>Series II. Files on Microfilm</mods:title>
|
19
|
-
</mods:detail>
|
20
|
-
</mods:part>
|
21
|
-
</mods:relatedItem>
|
22
|
-
</mods:mods>
|