berkeley_library-tind 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/build.yml +18 -0
  3. data/.gitignore +388 -0
  4. data/.idea/inspectionProfiles/Project_Default.xml +20 -0
  5. data/.idea/misc.xml +4 -0
  6. data/.idea/modules.xml +8 -0
  7. data/.idea/tind.iml +138 -0
  8. data/.idea/vcs.xml +6 -0
  9. data/.rubocop.yml +334 -0
  10. data/.ruby-version +1 -0
  11. data/.simplecov +8 -0
  12. data/.yardopts +1 -0
  13. data/CHANGES.md +58 -0
  14. data/Dockerfile +57 -0
  15. data/Gemfile +3 -0
  16. data/Jenkinsfile +18 -0
  17. data/LICENSE.md +21 -0
  18. data/README.md +73 -0
  19. data/Rakefile +20 -0
  20. data/berkeley_library-tind.gemspec +50 -0
  21. data/bin/tind-export +14 -0
  22. data/docker-compose.yml +15 -0
  23. data/lib/berkeley_library/tind.rb +3 -0
  24. data/lib/berkeley_library/tind/api.rb +1 -0
  25. data/lib/berkeley_library/tind/api/api.rb +132 -0
  26. data/lib/berkeley_library/tind/api/api_exception.rb +131 -0
  27. data/lib/berkeley_library/tind/api/collection.rb +82 -0
  28. data/lib/berkeley_library/tind/api/date_range.rb +67 -0
  29. data/lib/berkeley_library/tind/api/format.rb +32 -0
  30. data/lib/berkeley_library/tind/api/search.rb +100 -0
  31. data/lib/berkeley_library/tind/config.rb +103 -0
  32. data/lib/berkeley_library/tind/export.rb +1 -0
  33. data/lib/berkeley_library/tind/export/column.rb +54 -0
  34. data/lib/berkeley_library/tind/export/column_group.rb +144 -0
  35. data/lib/berkeley_library/tind/export/column_group_list.rb +131 -0
  36. data/lib/berkeley_library/tind/export/column_width_calculator.rb +76 -0
  37. data/lib/berkeley_library/tind/export/config.rb +154 -0
  38. data/lib/berkeley_library/tind/export/csv_exporter.rb +29 -0
  39. data/lib/berkeley_library/tind/export/export.rb +47 -0
  40. data/lib/berkeley_library/tind/export/export_command.rb +168 -0
  41. data/lib/berkeley_library/tind/export/export_exception.rb +8 -0
  42. data/lib/berkeley_library/tind/export/export_format.rb +67 -0
  43. data/lib/berkeley_library/tind/export/exporter.rb +105 -0
  44. data/lib/berkeley_library/tind/export/filter.rb +52 -0
  45. data/lib/berkeley_library/tind/export/no_results_error.rb +7 -0
  46. data/lib/berkeley_library/tind/export/ods_exporter.rb +138 -0
  47. data/lib/berkeley_library/tind/export/row.rb +24 -0
  48. data/lib/berkeley_library/tind/export/row_metrics.rb +18 -0
  49. data/lib/berkeley_library/tind/export/table.rb +175 -0
  50. data/lib/berkeley_library/tind/export/table_metrics.rb +116 -0
  51. data/lib/berkeley_library/tind/marc.rb +1 -0
  52. data/lib/berkeley_library/tind/marc/xml_reader.rb +144 -0
  53. data/lib/berkeley_library/tind/module_info.rb +14 -0
  54. data/lib/berkeley_library/util/arrays.rb +178 -0
  55. data/lib/berkeley_library/util/logging.rb +1 -0
  56. data/lib/berkeley_library/util/ods/spreadsheet.rb +170 -0
  57. data/lib/berkeley_library/util/ods/xml/content_doc.rb +26 -0
  58. data/lib/berkeley_library/util/ods/xml/document_node.rb +57 -0
  59. data/lib/berkeley_library/util/ods/xml/element_node.rb +106 -0
  60. data/lib/berkeley_library/util/ods/xml/loext/table_protection.rb +26 -0
  61. data/lib/berkeley_library/util/ods/xml/manifest/file_entry.rb +42 -0
  62. data/lib/berkeley_library/util/ods/xml/manifest/manifest.rb +73 -0
  63. data/lib/berkeley_library/util/ods/xml/manifest_doc.rb +26 -0
  64. data/lib/berkeley_library/util/ods/xml/namespace.rb +46 -0
  65. data/lib/berkeley_library/util/ods/xml/office/automatic_styles.rb +181 -0
  66. data/lib/berkeley_library/util/ods/xml/office/body.rb +17 -0
  67. data/lib/berkeley_library/util/ods/xml/office/document_content.rb +98 -0
  68. data/lib/berkeley_library/util/ods/xml/office/document_styles.rb +39 -0
  69. data/lib/berkeley_library/util/ods/xml/office/font_face_decls.rb +30 -0
  70. data/lib/berkeley_library/util/ods/xml/office/scripts.rb +17 -0
  71. data/lib/berkeley_library/util/ods/xml/office/spreadsheet.rb +37 -0
  72. data/lib/berkeley_library/util/ods/xml/office/styles.rb +39 -0
  73. data/lib/berkeley_library/util/ods/xml/style/cell_style.rb +58 -0
  74. data/lib/berkeley_library/util/ods/xml/style/column_style.rb +36 -0
  75. data/lib/berkeley_library/util/ods/xml/style/default_style.rb +31 -0
  76. data/lib/berkeley_library/util/ods/xml/style/family.rb +85 -0
  77. data/lib/berkeley_library/util/ods/xml/style/font_face.rb +46 -0
  78. data/lib/berkeley_library/util/ods/xml/style/paragraph_properties.rb +30 -0
  79. data/lib/berkeley_library/util/ods/xml/style/row_style.rb +37 -0
  80. data/lib/berkeley_library/util/ods/xml/style/style.rb +44 -0
  81. data/lib/berkeley_library/util/ods/xml/style/table_cell_properties.rb +40 -0
  82. data/lib/berkeley_library/util/ods/xml/style/table_column_properties.rb +30 -0
  83. data/lib/berkeley_library/util/ods/xml/style/table_properties.rb +25 -0
  84. data/lib/berkeley_library/util/ods/xml/style/table_row_properties.rb +28 -0
  85. data/lib/berkeley_library/util/ods/xml/style/table_style.rb +27 -0
  86. data/lib/berkeley_library/util/ods/xml/style/text_properties.rb +52 -0
  87. data/lib/berkeley_library/util/ods/xml/styles_doc.rb +26 -0
  88. data/lib/berkeley_library/util/ods/xml/table/named_expressions.rb +17 -0
  89. data/lib/berkeley_library/util/ods/xml/table/repeatable.rb +38 -0
  90. data/lib/berkeley_library/util/ods/xml/table/table.rb +193 -0
  91. data/lib/berkeley_library/util/ods/xml/table/table_cell.rb +46 -0
  92. data/lib/berkeley_library/util/ods/xml/table/table_column.rb +43 -0
  93. data/lib/berkeley_library/util/ods/xml/table/table_row.rb +136 -0
  94. data/lib/berkeley_library/util/ods/xml/text/p.rb +118 -0
  95. data/lib/berkeley_library/util/paths.rb +111 -0
  96. data/lib/berkeley_library/util/stringios.rb +30 -0
  97. data/lib/berkeley_library/util/strings.rb +42 -0
  98. data/lib/berkeley_library/util/sys_exits.rb +15 -0
  99. data/lib/berkeley_library/util/times.rb +22 -0
  100. data/lib/berkeley_library/util/uris.rb +44 -0
  101. data/lib/berkeley_library/util/uris/appender.rb +162 -0
  102. data/lib/berkeley_library/util/uris/requester.rb +62 -0
  103. data/lib/berkeley_library/util/uris/validator.rb +32 -0
  104. data/rakelib/bundle.rake +8 -0
  105. data/rakelib/coverage.rake +11 -0
  106. data/rakelib/gem.rake +54 -0
  107. data/rakelib/rubocop.rake +18 -0
  108. data/rakelib/spec.rake +2 -0
  109. data/spec/.rubocop.yml +40 -0
  110. data/spec/berkeley_library/tind/api/api_exception_spec.rb +91 -0
  111. data/spec/berkeley_library/tind/api/api_spec.rb +143 -0
  112. data/spec/berkeley_library/tind/api/collection_spec.rb +74 -0
  113. data/spec/berkeley_library/tind/api/date_range_spec.rb +110 -0
  114. data/spec/berkeley_library/tind/api/format_spec.rb +54 -0
  115. data/spec/berkeley_library/tind/api/search_spec.rb +364 -0
  116. data/spec/berkeley_library/tind/config_spec.rb +86 -0
  117. data/spec/berkeley_library/tind/export/column_group_spec.rb +29 -0
  118. data/spec/berkeley_library/tind/export/column_spec.rb +43 -0
  119. data/spec/berkeley_library/tind/export/config_spec.rb +206 -0
  120. data/spec/berkeley_library/tind/export/export_command_spec.rb +169 -0
  121. data/spec/berkeley_library/tind/export/export_format_spec.rb +59 -0
  122. data/spec/berkeley_library/tind/export/export_matcher.rb +112 -0
  123. data/spec/berkeley_library/tind/export/export_spec.rb +150 -0
  124. data/spec/berkeley_library/tind/export/exporter_spec.rb +125 -0
  125. data/spec/berkeley_library/tind/export/row_spec.rb +118 -0
  126. data/spec/berkeley_library/tind/export/table_spec.rb +322 -0
  127. data/spec/berkeley_library/tind/marc/xml_reader_spec.rb +93 -0
  128. data/spec/berkeley_library/util/arrays_spec.rb +340 -0
  129. data/spec/berkeley_library/util/ods/spreadsheet_spec.rb +124 -0
  130. data/spec/berkeley_library/util/ods/xml/content_doc_spec.rb +121 -0
  131. data/spec/berkeley_library/util/ods/xml/manifest/file_entry_spec.rb +27 -0
  132. data/spec/berkeley_library/util/ods/xml/manifest/manifest_spec.rb +33 -0
  133. data/spec/berkeley_library/util/ods/xml/office/document_content_spec.rb +60 -0
  134. data/spec/berkeley_library/util/ods/xml/style/automatic_styles_spec.rb +37 -0
  135. data/spec/berkeley_library/util/ods/xml/style/family_spec.rb +57 -0
  136. data/spec/berkeley_library/util/ods/xml/table/table_row_spec.rb +179 -0
  137. data/spec/berkeley_library/util/ods/xml/table/table_spec.rb +218 -0
  138. data/spec/berkeley_library/util/paths_spec.rb +90 -0
  139. data/spec/berkeley_library/util/stringios_spec.rb +34 -0
  140. data/spec/berkeley_library/util/strings_spec.rb +27 -0
  141. data/spec/berkeley_library/util/times_spec.rb +39 -0
  142. data/spec/berkeley_library/util/uris_spec.rb +118 -0
  143. data/spec/data/collection-names.txt +438 -0
  144. data/spec/data/collections.json +4827 -0
  145. data/spec/data/disjoint-records.xml +187 -0
  146. data/spec/data/record-184453.xml +58 -0
  147. data/spec/data/record-184458.xml +63 -0
  148. data/spec/data/record-187888.xml +78 -0
  149. data/spec/data/records-api-search-cjk-p1.xml +6381 -0
  150. data/spec/data/records-api-search-cjk-p2.xml +5 -0
  151. data/spec/data/records-api-search-p1.xml +4506 -0
  152. data/spec/data/records-api-search-p2.xml +4509 -0
  153. data/spec/data/records-api-search-p3.xml +4506 -0
  154. data/spec/data/records-api-search-p4.xml +4509 -0
  155. data/spec/data/records-api-search-p5.xml +4506 -0
  156. data/spec/data/records-api-search-p6.xml +2436 -0
  157. data/spec/data/records-api-search-p7.xml +5 -0
  158. data/spec/data/records-api-search.xml +234 -0
  159. data/spec/data/records-manual-search.xml +547 -0
  160. data/spec/spec_helper.rb +30 -0
  161. data/test/profile/table_from_records_profile.rb +46 -0
  162. metadata +585 -0
@@ -0,0 +1,29 @@
1
+ require 'csv'
2
+ require 'berkeley_library/tind/export/exporter'
3
+
4
+ module BerkeleyLibrary
5
+ module TIND
6
+ module Export
7
+ # Exporter for CSV (comma-separated value) text
8
+ class CSVExporter < Exporter
9
+ # Exports {ExportBase#collection} as CSV
10
+ # @overload export
11
+ # Exports to a new string.
12
+ # @return [String] the CSV string
13
+ # @overload export(out)
14
+ # Exports to the specified output stream.
15
+ # @param out [IO] the output stream
16
+ # @return[void]
17
+ # @overload export(path)
18
+ # Exports to the specified file.
19
+ # @param path [String, Pathname] the path to the output file
20
+ # @return[void]
21
+ def export(out = nil)
22
+ # noinspection RubyYardReturnMatch
23
+ export_table.tap { logger.info('Writing CSV') }.to_csv(out)
24
+ end
25
+
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,47 @@
1
+ require 'berkeley_library/tind/export/export_format'
2
+
3
+ module BerkeleyLibrary
4
+ module TIND
5
+ module Export
6
+ class << self
7
+ # Writes a spreadsheet in the specified format
8
+ # @overload export(collection, format = ExportFormat::CSV)
9
+ # Returns the spreadsheet as a string.
10
+ # @param collection [String] The collection name
11
+ # @param format [ExportFormat, String, Symbol] the export format
12
+ # @param exportable_only [Boolean] whether to include only exportable fields
13
+ # @overload export(collection, format = ExportFormat::CSV, out)
14
+ # Writes the spreadsheet to the specified output stream.
15
+ # @param collection [String] The collection name
16
+ # @param format [ExportFormat, String, Symbol] the export format
17
+ # @param out [IO] the output stream
18
+ # @param exportable_only [Boolean] whether to include only exportable fields
19
+ # @overload export(collection, format = ExportFormat::CSV, path)
20
+ # Writes the spreadsheet to the specified output file.
21
+ # @param collection [String] The collection name
22
+ # @param format [ExportFormat, String, Symbol] the export format
23
+ # @param path [String, Pathname] the path to the output file
24
+ # @param exportable_only [Boolean] whether to include only exportable fields
25
+ # @raise [ExportFailed] if the collection does not exist, or cannot be exported. Note
26
+ # that this error is guaranteed to be raised before anything is written to `out`.
27
+ def export(collection, format = ExportFormat::CSV, out = nil, exportable_only: true)
28
+ # noinspection RubyYardParamTypeMatch
29
+ exporter = exporter_for(collection, format, exportable_only: exportable_only)
30
+ exporter.export(out)
31
+ end
32
+
33
+ # Returns an exporter for the specified spreadsheet in the specified format
34
+ # @param collection [String] The collection name
35
+ # @param format [ExportFormat, String, Symbol] the export format
36
+ # @param exportable_only [Boolean] whether to include only exportable fields
37
+ # @return [Exporter] the exporter
38
+ def exporter_for(collection, format, exportable_only: true)
39
+ export_format = ExportFormat.ensure_format(format)
40
+ # noinspection RubyNilAnalysis
41
+ export_format.exporter_for(collection, exportable_only: exportable_only)
42
+ end
43
+
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,168 @@
1
+ require 'optparse'
2
+
3
+ require 'berkeley_library/tind/api'
4
+ require 'berkeley_library/tind/config'
5
+ require 'berkeley_library/tind/export/export'
6
+ require 'berkeley_library/tind/export/export_format'
7
+ require 'berkeley_library/logging'
8
+ require 'berkeley_library/util/sys_exits'
9
+
10
+ module BerkeleyLibrary
11
+ module TIND
12
+ module Export
13
+ # rubocop:disable Metrics/ClassLength
14
+ class ExportCommand
15
+ include BerkeleyLibrary::Util::SysExits
16
+
17
+ attr_reader :options
18
+ attr_reader :out
19
+
20
+ def initialize(*args, out: $stdout)
21
+ @options = ExportCommand.parse_options(args)
22
+ @out = out
23
+ end
24
+
25
+ def execute!
26
+ return list_collections if options[:list]
27
+
28
+ export_collection
29
+ rescue StandardError => e
30
+ warn(e)
31
+ warn(e.backtrace.join("\n")) if e.backtrace && options[:verbose]
32
+
33
+ exit(EX_SOFTWARE)
34
+ end
35
+
36
+ private
37
+
38
+ def list_collections
39
+ BerkeleyLibrary::TIND::API::Collection.each_collection { |c| out.puts "#{c.nb_rec}\t#{c.name}" }
40
+ end
41
+
42
+ def export_collection
43
+ BerkeleyLibrary::TIND::Export.export(
44
+ options[:collection],
45
+ options[:format],
46
+ options[:outfile] || out
47
+ )
48
+ end
49
+
50
+ class << self
51
+ include BerkeleyLibrary::Util::SysExits
52
+
53
+ DEFAULT_FORMAT = ExportFormat::CSV
54
+ FORMATS = ExportFormat.to_a.map(&:value).join(', ')
55
+ OPTS = {
56
+ f: ['--format FORMAT', "Format (#{FORMATS}; defaults to output file extension, or else to #{DEFAULT_FORMAT})"],
57
+ o: ['--output-file FILE', 'Output file or directory'],
58
+ l: ['--list-collections', 'List collection sizes and names'],
59
+ u: ['--tind-base-url URL', "TIND base URL (default $#{BerkeleyLibrary::TIND::Config::ENV_TIND_BASE_URL})"],
60
+ k: ['--api-key KEY', "TIND API key (default $#{BerkeleyLibrary::TIND::Config::ENV_TIND_API_KEY})"],
61
+ e: ['--env-file [ENV]', 'Read environment variables from <ENV> (default: ./.env)'],
62
+ v: ['--verbose', 'Verbose error logging'],
63
+ h: ['--help', 'Show help and exit']
64
+ }.freeze
65
+
66
+ def parse_options(argv)
67
+ {}.tap do |opts|
68
+ option_parser(opts).parse!(argv)
69
+ opts[:collection] = argv.pop
70
+ opts[:format] = ensure_format(opts)
71
+ validate!(opts)
72
+ configure!(opts)
73
+ end
74
+ rescue StandardError => e
75
+ print_usage_and_exit!($stderr, EX_USAGE, e.message)
76
+ end
77
+
78
+ private
79
+
80
+ def validate!(opts)
81
+ return if opts[:list]
82
+ raise ArgumentError, 'Collection not specified' unless opts[:collection]
83
+ raise ArgumentError, 'OpenOffice/LibreOffice export requires a filename' if opts[:format] == ExportFormat::ODS && !opts[:outfile]
84
+ end
85
+
86
+ def configure!(opts)
87
+ configure_env(opts)
88
+ BerkeleyLibrary::TIND::Config.base_uri = opts[:tind_base_url] if opts[:tind_base_url]
89
+ BerkeleyLibrary::TIND::Config.api_key = opts[:api_key] if opts[:api_key]
90
+ BerkeleyLibrary::Logging.logger = configure_logger(opts)
91
+ end
92
+
93
+ def configure_logger(opts)
94
+ return Logger.new(File::NULL) unless opts[:verbose]
95
+
96
+ # TODO: simpler log format? different log levels?
97
+ BerkeleyLibrary::Logging::Loggers.new_readable_logger($stderr).tap { |logger| logger.level = Logger::DEBUG }
98
+ end
99
+
100
+ def configure_env(opts)
101
+ return unless (env_file = opts[:env_file])
102
+
103
+ warn "Reading environment from #{env_file}" if opts[:verbose]
104
+
105
+ require 'dotenv'
106
+ Dotenv.load(env_file)
107
+ end
108
+
109
+ def ensure_format(opts)
110
+ fmt = opts[:format] || (File.extname(opts[:outfile]).sub(/^\./, '') if opts[:outfile])
111
+ return DEFAULT_FORMAT unless fmt
112
+
113
+ ExportFormat.ensure_format(fmt)
114
+ end
115
+
116
+ # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
117
+ def option_parser(opts = {})
118
+ OptionParser.new do |p|
119
+ p.summary_indent = ' '
120
+ p.on('-f', *OPTS[:f]) { |fmt| opts[:format] = fmt }
121
+ p.on('-o', *OPTS[:o]) { |out| opts[:outfile] = out }
122
+ p.on('-l', *OPTS[:l]) { opts[:list] = true }
123
+ p.on('-u', *OPTS[:u]) { |url| opts[:tind_base_url] = url }
124
+ p.on('-k', *OPTS[:k]) { |k| opts[:api_key] = k }
125
+ p.on('-e', *OPTS[:e]) { |e| opts[:env_file] = env_file_path(e) }
126
+ p.on('-v', *OPTS[:v]) { opts[:verbose] = true }
127
+ p.on('-h', *OPTS[:h]) { print_usage_and_exit! }
128
+ end
129
+ end
130
+ # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
131
+
132
+ def env_file_path(env_file_opt)
133
+ File.realpath(env_file_opt || File.join(Dir.pwd, '.env'))
134
+ end
135
+
136
+ def print_usage_and_exit!(out = $stdout, exit_code = EX_OK, msg = nil)
137
+ out.puts("#{msg}\n\n") if msg
138
+ out.puts(usage)
139
+ exit(exit_code)
140
+ end
141
+
142
+ def usage
143
+ <<~USAGE
144
+ Usage: tind-export [options] COLLECTION
145
+
146
+ Options:
147
+ #{summarize_options}
148
+
149
+ Examples:
150
+ 1. list collections
151
+ tind-export --list-collections
152
+ 2. export a collection as an OpenOffice/LibreOffice spreadsheet
153
+ tind-export -o lincoln-papers.ods 'Abraham Lincoln Papers'
154
+ 3. export a collection as an OpenOffice/LibreOffice spreadsheet in exploded XML format,
155
+ where `lincoln-papers` is a directory
156
+ tind-export -v -f ODS -o lincoln-papers 'Abraham Lincoln Papers'
157
+ USAGE
158
+ end
159
+
160
+ def summarize_options
161
+ option_parser.summarize.join(' ')
162
+ end
163
+ end
164
+ end
165
+ # rubocop:enable Metrics/ClassLength
166
+ end
167
+ end
168
+ end
@@ -0,0 +1,8 @@
1
+ module BerkeleyLibrary
2
+ module TIND
3
+ module Export
4
+ # Wrapper for export-specific exceptions.
5
+ class ExportException < StandardError; end
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,67 @@
1
+ require 'typesafe_enum'
2
+ require 'berkeley_library/tind/export/csv_exporter'
3
+ require 'berkeley_library/tind/export/ods_exporter'
4
+
5
+ module BerkeleyLibrary
6
+ module TIND
7
+ module Export
8
+ class ExportFormat < TypesafeEnum::Base
9
+ new :CSV
10
+ new :ODS
11
+
12
+ DEFAULT = ODS
13
+
14
+ def exporter_for(collection, exportable_only: true)
15
+ return CSVExporter.new(collection, exportable_only: exportable_only) if self == ExportFormat::CSV
16
+ return ODSExporter.new(collection, exportable_only: exportable_only) if self == ExportFormat::ODS
17
+ end
18
+
19
+ def description
20
+ return 'CSV (comma-separated text)' if self == ExportFormat::CSV
21
+ return 'LibreOffice/OpenOffice spreadsheet' if self == ExportFormat::ODS
22
+ end
23
+
24
+ def mime_type
25
+ return 'text/csv' if self == ExportFormat::CSV
26
+ return 'application/vnd.oasis.opendocument.spreadsheet' if self == ExportFormat::ODS
27
+ end
28
+
29
+ def to_s
30
+ # noinspection RubyYardReturnMatch
31
+ value
32
+ end
33
+
34
+ def to_str
35
+ value
36
+ end
37
+
38
+ def inspect
39
+ "#{ExportFormat}::#{key}"
40
+ end
41
+
42
+ def default?
43
+ self == DEFAULT
44
+ end
45
+
46
+ # noinspection RubyYardReturnMatch
47
+ class << self
48
+ # Converts a string or symbol to an {ExportFormat}, or returns
49
+ # an {ExportFormat} if passed on
50
+ #
51
+ # @param format [String, Symbol, ExportFormat] the format
52
+ # @return [ExportFormat] the format
53
+ def ensure_format(format)
54
+ return unless format
55
+ return format if format.is_a?(ExportFormat)
56
+
57
+ fmt = ExportFormat.find_by_value(format.to_s.downcase)
58
+ return fmt if fmt
59
+
60
+ raise ArgumentError, "Unknown #{ExportFormat}: #{format.inspect}"
61
+ end
62
+ end
63
+ end
64
+
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,105 @@
1
+ require 'berkeley_library/util/logging'
2
+ require 'berkeley_library/tind/api/search'
3
+ require 'berkeley_library/tind/export/table'
4
+
5
+ module BerkeleyLibrary
6
+ module TIND
7
+ module Export
8
+
9
+ # Superclass of exporters for different formats
10
+ class Exporter
11
+ include BerkeleyLibrary::Logging
12
+
13
+ # ------------------------------------------------------------
14
+ # Accessors
15
+
16
+ # @return [String] the collection name
17
+ attr_reader :collection
18
+
19
+ # @return [Boolean] whether to include only exportable fields
20
+ attr_reader :exportable_only
21
+
22
+ # ------------------------------------------------------------
23
+ # Initializer
24
+
25
+ # Initializes a new exporter
26
+ #
27
+ # @param collection [String] The collection name
28
+ # @param exportable_only [Boolean] whether to include only exportable fields
29
+ def initialize(collection, exportable_only: true)
30
+ @collection = collection
31
+ @exportable_only = exportable_only
32
+ end
33
+
34
+ # ------------------------------------------------------------
35
+ # Abstract methods
36
+
37
+ # Exports the collection
38
+ # @param out [IO, String, Pathname, nil] the IO or file path to write the
39
+ # exported data to, or nil to return a string
40
+ # rubocop:disable Lint/UnusedMethodArgument
41
+ def export(out = nil)
42
+ # This is a stub, used for documentation
43
+ raise NoMethodError, "#{self.class} does not implement `export`"
44
+ end
45
+ # rubocop:enable Lint/UnusedMethodArgument
46
+
47
+ # ------------------------------------------------------------
48
+ # Accessors
49
+
50
+ # Returns true if the collection can be exported, false otherwise.
51
+ # Note that this requires reading the collection data from the TIND
52
+ # server; failures will be fast but success may be slow. (On the other
53
+ # hand, the retrieved collection data is cached, so the subsequent
54
+ # export will not need to retrieve it again.)
55
+ def any_results?
56
+ !_export_table.empty?
57
+ end
58
+
59
+ # ------------------------------------------------------------
60
+ # Object overrides
61
+
62
+ def respond_to?(*args)
63
+ return false if instance_of?(Exporter) && (args && args.first.to_s == 'export')
64
+
65
+ super
66
+ end
67
+
68
+ # ------------------------------------------------------------
69
+ # Protected methods
70
+
71
+ protected
72
+
73
+ # Returns a table of all records in the specified
74
+ # collection
75
+ #
76
+ # @return [Export::Table] the table
77
+ # @raise NoResultsError if no search results were returned for the collection
78
+ def export_table
79
+ # TODO: something more clever. Search.has_results?
80
+ return _export_table unless _export_table.empty?
81
+
82
+ raise no_results_error
83
+ end
84
+
85
+ private
86
+
87
+ def no_results_error
88
+ NoResultsError.new("No records returned for collection #{collection.inspect}")
89
+ end
90
+
91
+ def _export_table
92
+ @_export_table ||= begin
93
+ logger.info("Reading collection #{collection.inspect}")
94
+ results = API::Search.new(collection: collection).each_result(freeze: true)
95
+
96
+ logger.info('Creating export table')
97
+ # noinspection RubyYardParamTypeMatch
98
+ Export::Table.from_records(results, freeze: true, exportable_only: exportable_only)
99
+ end
100
+ end
101
+
102
+ end
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,52 @@
1
+ module BerkeleyLibrary
2
+ module TIND
3
+ module Export
4
+ module Filter
5
+ DO_NOT_EXPORT_FIELDS = ['005', '8564 ', '902 ', '903 ', '991', '998'].map(&:freeze).freeze
6
+ DO_NOT_EDIT_FIELDS = (['001'.freeze] + DO_NOT_EXPORT_FIELDS).freeze
7
+
8
+ DO_NOT_EXPORT_SUBFIELDS = ['336 a', '852 c', '901 a', '901 f', '901 g', '980 a', '982 a', '982 b', '982 p'].map(&:freeze).freeze
9
+ DO_NOT_EDIT_SUBFIELDS = (['035 a'.freeze] + DO_NOT_EXPORT_SUBFIELDS).freeze
10
+
11
+ DO_NOT_EDIT = (DO_NOT_EDIT_FIELDS + DO_NOT_EDIT_SUBFIELDS).freeze
12
+
13
+ class << self
14
+ def can_export_tag?(tag)
15
+ !DO_NOT_EXPORT_FIELDS.include?(tag)
16
+ end
17
+
18
+ def can_export_data_field?(df)
19
+ !exportable_subfield_codes(df).empty?
20
+ end
21
+
22
+ def exportable_subfield_codes(df)
23
+ tag, ind1, ind2 = decompose_data_field(df)
24
+ DO_NOT_EXPORT_FIELDS.each { |f| return [] if excludes?(f, tag, ind1, ind2) }
25
+
26
+ df.subfield_codes.reject do |code|
27
+ DO_NOT_EXPORT_SUBFIELDS.any? { |f| excludes?(f, tag, ind1, ind2, code) }
28
+ end
29
+ end
30
+
31
+ def can_edit?(tag, ind1, ind2, code)
32
+ DO_NOT_EDIT.none? { |f| excludes?(f, tag, ind1, ind2, code) }
33
+ end
34
+
35
+ private
36
+
37
+ def decompose_data_field(df)
38
+ [df.tag, df.indicator1, df.indicator2]
39
+ end
40
+
41
+ # TODO: test this more carefully
42
+ def excludes?(f, tag, ind1, ind2, code = nil)
43
+ return f == tag if f.size == 3
44
+
45
+ excludes_tag = f.start_with?(tag) && f[3] == ind1 && f[4] == ind2
46
+ code ? excludes_tag && code : excludes_tag
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end