ds-convert 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +294 -0
  3. data/Rakefile +12 -0
  4. data/config/settings.yml +150 -0
  5. data/exe/ds-convert +149 -0
  6. data/exe/ds-recon +275 -0
  7. data/exe/ds-validate-csv +40 -0
  8. data/exe/marc-mrc-to-xml.rb +80 -0
  9. data/lib/ds/cli.rb +102 -0
  10. data/lib/ds/constants.rb +166 -0
  11. data/lib/ds/converter/converter.rb +124 -0
  12. data/lib/ds/converter/writer.rb +50 -0
  13. data/lib/ds/converter.rb +7 -0
  14. data/lib/ds/csv_util.rb +43 -0
  15. data/lib/ds/data/berkeley-arks.txt +4000 -0
  16. data/lib/ds/data/getty-aat-centuries.csv +71 -0
  17. data/lib/ds/data/iiif_manifests.csv +122 -0
  18. data/lib/ds/data/legacy-iiif-manifests.csv +77 -0
  19. data/lib/ds/ds_error.rb +1 -0
  20. data/lib/ds/extractor/base_record_locator.rb +24 -0
  21. data/lib/ds/extractor/base_term.rb +79 -0
  22. data/lib/ds/extractor/csv_record_locator.rb +13 -0
  23. data/lib/ds/extractor/ds_csv_extractor.rb +695 -0
  24. data/lib/ds/extractor/ds_mets_xml_extractor.rb +1114 -0
  25. data/lib/ds/extractor/genre.rb +45 -0
  26. data/lib/ds/extractor/language.rb +31 -0
  27. data/lib/ds/extractor/marc_xml_extractor.rb +1172 -0
  28. data/lib/ds/extractor/material.rb +12 -0
  29. data/lib/ds/extractor/name.rb +50 -0
  30. data/lib/ds/extractor/place.rb +11 -0
  31. data/lib/ds/extractor/subject.rb +58 -0
  32. data/lib/ds/extractor/tei_xml_extractor.rb +687 -0
  33. data/lib/ds/extractor/title.rb +52 -0
  34. data/lib/ds/extractor/xml_record_locator.rb +38 -0
  35. data/lib/ds/extractor.rb +24 -0
  36. data/lib/ds/institutions.rb +55 -0
  37. data/lib/ds/manifest/base_id_validator.rb +76 -0
  38. data/lib/ds/manifest/constants.rb +67 -0
  39. data/lib/ds/manifest/ds_csv_id_validator.rb +15 -0
  40. data/lib/ds/manifest/entry.rb +133 -0
  41. data/lib/ds/manifest/manifest.rb +74 -0
  42. data/lib/ds/manifest/manifest_validator.rb +256 -0
  43. data/lib/ds/manifest/simple_xml_id_validator.rb +42 -0
  44. data/lib/ds/manifest.rb +30 -0
  45. data/lib/ds/mapper/base_mapper.rb +221 -0
  46. data/lib/ds/mapper/ds_csv_mapper.rb +77 -0
  47. data/lib/ds/mapper/ds_mets_mapper.rb +85 -0
  48. data/lib/ds/mapper/marc_mapper.rb +87 -0
  49. data/lib/ds/mapper/tei_xml_mapper.rb +79 -0
  50. data/lib/ds/mapper.rb +13 -0
  51. data/lib/ds/recon/constants.rb +56 -0
  52. data/lib/ds/recon/ds_csv_enumerator.rb +16 -0
  53. data/lib/ds/recon/ds_mets_xml_enumerator.rb +14 -0
  54. data/lib/ds/recon/marc_xml_enumerator.rb +15 -0
  55. data/lib/ds/recon/recon_builder.rb +183 -0
  56. data/lib/ds/recon/recon_data.rb +37 -0
  57. data/lib/ds/recon/recon_manager.rb +92 -0
  58. data/lib/ds/recon/source_enumerator.rb +21 -0
  59. data/lib/ds/recon/tei_xml_enumerator.rb +14 -0
  60. data/lib/ds/recon/type/all_subjects.rb +18 -0
  61. data/lib/ds/recon/type/genres.rb +50 -0
  62. data/lib/ds/recon/type/languages.rb +38 -0
  63. data/lib/ds/recon/type/materials.rb +40 -0
  64. data/lib/ds/recon/type/named_subjects.rb +20 -0
  65. data/lib/ds/recon/type/names.rb +65 -0
  66. data/lib/ds/recon/type/places.rb +40 -0
  67. data/lib/ds/recon/type/recon_type.rb +136 -0
  68. data/lib/ds/recon/type/splits.rb +34 -0
  69. data/lib/ds/recon/type/subjects.rb +65 -0
  70. data/lib/ds/recon/type/titles.rb +38 -0
  71. data/lib/ds/recon/url_lookup.rb +52 -0
  72. data/lib/ds/recon.rb +292 -0
  73. data/lib/ds/source/base_source.rb +32 -0
  74. data/lib/ds/source/ds_csv.rb +18 -0
  75. data/lib/ds/source/ds_mets_xml.rb +20 -0
  76. data/lib/ds/source/marc_xml.rb +22 -0
  77. data/lib/ds/source/source_cache.rb +69 -0
  78. data/lib/ds/source/tei_xml.rb +22 -0
  79. data/lib/ds/source.rb +20 -0
  80. data/lib/ds/util/cache.rb +111 -0
  81. data/lib/ds/util/csv_validator.rb +209 -0
  82. data/lib/ds/util/csv_writer.rb +42 -0
  83. data/lib/ds/util/strings.rb +194 -0
  84. data/lib/ds/util.rb +37 -0
  85. data/lib/ds/version.rb +5 -0
  86. data/lib/ds.rb +237 -0
  87. metadata +246 -0
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DS
4
+ module Extractor
5
+
6
+ class Material < BaseTerm
7
+ def to_h
8
+ super.to_h.merge({ material_as_recorded: as_recorded })
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DS
4
+ module Extractor
5
+ class Name < BaseTerm
6
+ attr_accessor :role
7
+ attr_accessor :vernacular
8
+ attr_accessor :ref
9
+
10
+ # Initializes a Name object with the provided parameters.
11
+ #
12
+ # Parameters:
13
+ # @param as_recorded [String] the recorded name
14
+ # @param role [String, NilClass] the role associated with the name
15
+ # @param vernacular [String, NilClass] the vernacular name
16
+ # @param ref [String, NilClass] the source authority URI
17
+ # @return void
18
+ def initialize as_recorded:, role: nil, vernacular: nil, ref: nil
19
+ @role = role
20
+ @vernacular = vernacular
21
+ @ref = ref
22
+ super(as_recorded: as_recorded)
23
+ end
24
+
25
+ # Returns an array representation of the name.
26
+ #
27
+ # [as_recorded, role, vernacular, ref]
28
+ #
29
+ # @return [Array] the name as an array
30
+ def to_a
31
+ [as_recorded, role, vernacular, ref]
32
+ end
33
+
34
+ # Returns a hash representation of the name object.
35
+ #
36
+ # Keys are :as_recorded, :role, :name_agr, :source_authority_uri
37
+ #
38
+ # @return [Hash] the name as a hash
39
+ def to_h
40
+ {
41
+ name_as_recorded: as_recorded,
42
+ as_recorded: as_recorded,
43
+ role: role,
44
+ name_agr: vernacular,
45
+ source_authority_uri: ref
46
+ }
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DS
4
+ module Extractor
5
+ class Place < BaseTerm
6
+ def to_h
7
+ super.to_h.merge({ place_as_recorded: as_recorded })
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DS
4
+ module Extractor
5
+
6
+ class Subject < BaseTerm
7
+
8
+ attr_accessor :subfield_codes
9
+ attr_accessor :source_authority_uri
10
+ attr_accessor :vocab
11
+
12
+ # Initializes a new Subject instance with the provided parameters.
13
+ #
14
+ # @param as_recorded [String] The recorded data.
15
+ # @param subfield_codes [String, nil] The subfield codes.
16
+ # @param vocab [String, nil] The vocabulary.
17
+ # @param source_authority_uri [String, nil] The source authority URI.
18
+ # @return [void]
19
+ def initialize(
20
+ as_recorded:,
21
+ subfield_codes: nil,
22
+ vocab: nil,
23
+ source_authority_uri: nil
24
+ )
25
+
26
+ @subfield_codes = subfield_codes
27
+ @vocab = vocab
28
+ @source_authority_uri = source_authority_uri
29
+
30
+ super as_recorded: as_recorded
31
+ end
32
+
33
+ # Returns an array representation of the Subject instance.
34
+ #
35
+ # Values are: [as_recorded, subfield_codes, vocab, source_authority_uri]
36
+ #
37
+ # @return [Array<String>] An array containing the recorded data, subfield codes, vocabulary, and source authority URI.
38
+ def to_a
39
+ [as_recorded, subfield_codes, vocab, source_authority_uri]
40
+ end
41
+
42
+ # Returns a hash representation of the Subject instance.
43
+ #
44
+ # Keys are :as_recorded, :subfield_codes, :vocab, :source_authority_uri
45
+ #
46
+ # @return [Hash<Symbol,String>] A hash containing the recorded data, subfield codes, vocabulary, and source authority URI.
47
+ def to_h
48
+ {
49
+ subject_as_recorded: as_recorded,
50
+ as_recorded: as_recorded,
51
+ subfield_codes: subfield_codes,
52
+ vocab: vocab,
53
+ source_authority_uri: source_authority_uri
54
+ }
55
+ end
56
+ end
57
+ end
58
+ end