ds-convert 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +294 -0
- data/Rakefile +12 -0
- data/config/settings.yml +150 -0
- data/exe/ds-convert +149 -0
- data/exe/ds-recon +275 -0
- data/exe/ds-validate-csv +40 -0
- data/exe/marc-mrc-to-xml.rb +80 -0
- data/lib/ds/cli.rb +102 -0
- data/lib/ds/constants.rb +166 -0
- data/lib/ds/converter/converter.rb +124 -0
- data/lib/ds/converter/writer.rb +50 -0
- data/lib/ds/converter.rb +7 -0
- data/lib/ds/csv_util.rb +43 -0
- data/lib/ds/data/berkeley-arks.txt +4000 -0
- data/lib/ds/data/getty-aat-centuries.csv +71 -0
- data/lib/ds/data/iiif_manifests.csv +122 -0
- data/lib/ds/data/legacy-iiif-manifests.csv +77 -0
- data/lib/ds/ds_error.rb +1 -0
- data/lib/ds/extractor/base_record_locator.rb +24 -0
- data/lib/ds/extractor/base_term.rb +79 -0
- data/lib/ds/extractor/csv_record_locator.rb +13 -0
- data/lib/ds/extractor/ds_csv_extractor.rb +695 -0
- data/lib/ds/extractor/ds_mets_xml_extractor.rb +1114 -0
- data/lib/ds/extractor/genre.rb +45 -0
- data/lib/ds/extractor/language.rb +31 -0
- data/lib/ds/extractor/marc_xml_extractor.rb +1172 -0
- data/lib/ds/extractor/material.rb +12 -0
- data/lib/ds/extractor/name.rb +50 -0
- data/lib/ds/extractor/place.rb +11 -0
- data/lib/ds/extractor/subject.rb +58 -0
- data/lib/ds/extractor/tei_xml_extractor.rb +687 -0
- data/lib/ds/extractor/title.rb +52 -0
- data/lib/ds/extractor/xml_record_locator.rb +38 -0
- data/lib/ds/extractor.rb +24 -0
- data/lib/ds/institutions.rb +55 -0
- data/lib/ds/manifest/base_id_validator.rb +76 -0
- data/lib/ds/manifest/constants.rb +67 -0
- data/lib/ds/manifest/ds_csv_id_validator.rb +15 -0
- data/lib/ds/manifest/entry.rb +133 -0
- data/lib/ds/manifest/manifest.rb +74 -0
- data/lib/ds/manifest/manifest_validator.rb +256 -0
- data/lib/ds/manifest/simple_xml_id_validator.rb +42 -0
- data/lib/ds/manifest.rb +30 -0
- data/lib/ds/mapper/base_mapper.rb +221 -0
- data/lib/ds/mapper/ds_csv_mapper.rb +77 -0
- data/lib/ds/mapper/ds_mets_mapper.rb +85 -0
- data/lib/ds/mapper/marc_mapper.rb +87 -0
- data/lib/ds/mapper/tei_xml_mapper.rb +79 -0
- data/lib/ds/mapper.rb +13 -0
- data/lib/ds/recon/constants.rb +56 -0
- data/lib/ds/recon/ds_csv_enumerator.rb +16 -0
- data/lib/ds/recon/ds_mets_xml_enumerator.rb +14 -0
- data/lib/ds/recon/marc_xml_enumerator.rb +15 -0
- data/lib/ds/recon/recon_builder.rb +183 -0
- data/lib/ds/recon/recon_data.rb +37 -0
- data/lib/ds/recon/recon_manager.rb +92 -0
- data/lib/ds/recon/source_enumerator.rb +21 -0
- data/lib/ds/recon/tei_xml_enumerator.rb +14 -0
- data/lib/ds/recon/type/all_subjects.rb +18 -0
- data/lib/ds/recon/type/genres.rb +50 -0
- data/lib/ds/recon/type/languages.rb +38 -0
- data/lib/ds/recon/type/materials.rb +40 -0
- data/lib/ds/recon/type/named_subjects.rb +20 -0
- data/lib/ds/recon/type/names.rb +65 -0
- data/lib/ds/recon/type/places.rb +40 -0
- data/lib/ds/recon/type/recon_type.rb +136 -0
- data/lib/ds/recon/type/splits.rb +34 -0
- data/lib/ds/recon/type/subjects.rb +65 -0
- data/lib/ds/recon/type/titles.rb +38 -0
- data/lib/ds/recon/url_lookup.rb +52 -0
- data/lib/ds/recon.rb +292 -0
- data/lib/ds/source/base_source.rb +32 -0
- data/lib/ds/source/ds_csv.rb +18 -0
- data/lib/ds/source/ds_mets_xml.rb +20 -0
- data/lib/ds/source/marc_xml.rb +22 -0
- data/lib/ds/source/source_cache.rb +69 -0
- data/lib/ds/source/tei_xml.rb +22 -0
- data/lib/ds/source.rb +20 -0
- data/lib/ds/util/cache.rb +111 -0
- data/lib/ds/util/csv_validator.rb +209 -0
- data/lib/ds/util/csv_writer.rb +42 -0
- data/lib/ds/util/strings.rb +194 -0
- data/lib/ds/util.rb +37 -0
- data/lib/ds/version.rb +5 -0
- data/lib/ds.rb +237 -0
- metadata +246 -0
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module DS
|
4
|
+
module Extractor
|
5
|
+
class Genre < BaseTerm
|
6
|
+
|
7
|
+
attr_accessor :vocab
|
8
|
+
attr_accessor :source_authority_uri
|
9
|
+
|
10
|
+
# Initializes a new Genre object.
|
11
|
+
#
|
12
|
+
# @param as_recorded [String] the recorded data
|
13
|
+
# @param source_authority_uri [String, nil] the source authority URI (default is nil)
|
14
|
+
# @param vocab [String, nil] the vocab (default is nil)
|
15
|
+
# @return [void]
|
16
|
+
def initialize(
|
17
|
+
as_recorded:,
|
18
|
+
source_authority_uri: nil,
|
19
|
+
vocab: nil
|
20
|
+
)
|
21
|
+
@source_authority_uri = source_authority_uri
|
22
|
+
@vocab = vocab
|
23
|
+
super(as_recorded: as_recorded)
|
24
|
+
end
|
25
|
+
|
26
|
+
# Returns an array containing the recorded data, vocab, and source authority URI.
|
27
|
+
# @return [Array<String>]
|
28
|
+
def to_a
|
29
|
+
[as_recorded, vocab, source_authority_uri]
|
30
|
+
end
|
31
|
+
|
32
|
+
# Returns a hash representation of the Genre object.
|
33
|
+
#
|
34
|
+
# @return [Hash<Symbol,String>] a hash with keys +:as_recorded+, +:source_authority_uri+, and +:vocab+
|
35
|
+
def to_h
|
36
|
+
{
|
37
|
+
genre_as_recorded: as_recorded,
|
38
|
+
as_recorded: as_recorded,
|
39
|
+
source_authority_uri: source_authority_uri,
|
40
|
+
vocab: vocab
|
41
|
+
}
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module DS
|
4
|
+
module Extractor
|
5
|
+
|
6
|
+
class Language < BaseTerm
|
7
|
+
|
8
|
+
attr_accessor :codes
|
9
|
+
|
10
|
+
# Initializes the Language object with the provided as_recorded and codes.
|
11
|
+
# @param as_recorded [String] the as_recorded value
|
12
|
+
# @param codes [Set] the language codes
|
13
|
+
def initialize as_recorded:, codes: Set.new
|
14
|
+
@codes = codes
|
15
|
+
super(as_recorded: as_recorded)
|
16
|
+
end
|
17
|
+
|
18
|
+
def to_a
|
19
|
+
[as_recorded, codes]
|
20
|
+
end
|
21
|
+
|
22
|
+
def to_h
|
23
|
+
{
|
24
|
+
language_as_recorded: as_recorded,
|
25
|
+
as_recorded: as_recorded,
|
26
|
+
language_code: codes.join(';')
|
27
|
+
}
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|