berkeley_library-tind 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/workflows/build.yml +18 -0
- data/.gitignore +388 -0
- data/.idea/inspectionProfiles/Project_Default.xml +20 -0
- data/.idea/misc.xml +4 -0
- data/.idea/modules.xml +8 -0
- data/.idea/tind.iml +138 -0
- data/.idea/vcs.xml +6 -0
- data/.rubocop.yml +334 -0
- data/.ruby-version +1 -0
- data/.simplecov +8 -0
- data/.yardopts +1 -0
- data/CHANGES.md +58 -0
- data/Dockerfile +57 -0
- data/Gemfile +3 -0
- data/Jenkinsfile +18 -0
- data/LICENSE.md +21 -0
- data/README.md +73 -0
- data/Rakefile +20 -0
- data/berkeley_library-tind.gemspec +50 -0
- data/bin/tind-export +14 -0
- data/docker-compose.yml +15 -0
- data/lib/berkeley_library/tind.rb +3 -0
- data/lib/berkeley_library/tind/api.rb +1 -0
- data/lib/berkeley_library/tind/api/api.rb +132 -0
- data/lib/berkeley_library/tind/api/api_exception.rb +131 -0
- data/lib/berkeley_library/tind/api/collection.rb +82 -0
- data/lib/berkeley_library/tind/api/date_range.rb +67 -0
- data/lib/berkeley_library/tind/api/format.rb +32 -0
- data/lib/berkeley_library/tind/api/search.rb +100 -0
- data/lib/berkeley_library/tind/config.rb +103 -0
- data/lib/berkeley_library/tind/export.rb +1 -0
- data/lib/berkeley_library/tind/export/column.rb +54 -0
- data/lib/berkeley_library/tind/export/column_group.rb +144 -0
- data/lib/berkeley_library/tind/export/column_group_list.rb +131 -0
- data/lib/berkeley_library/tind/export/column_width_calculator.rb +76 -0
- data/lib/berkeley_library/tind/export/config.rb +154 -0
- data/lib/berkeley_library/tind/export/csv_exporter.rb +29 -0
- data/lib/berkeley_library/tind/export/export.rb +47 -0
- data/lib/berkeley_library/tind/export/export_command.rb +168 -0
- data/lib/berkeley_library/tind/export/export_exception.rb +8 -0
- data/lib/berkeley_library/tind/export/export_format.rb +67 -0
- data/lib/berkeley_library/tind/export/exporter.rb +105 -0
- data/lib/berkeley_library/tind/export/filter.rb +52 -0
- data/lib/berkeley_library/tind/export/no_results_error.rb +7 -0
- data/lib/berkeley_library/tind/export/ods_exporter.rb +138 -0
- data/lib/berkeley_library/tind/export/row.rb +24 -0
- data/lib/berkeley_library/tind/export/row_metrics.rb +18 -0
- data/lib/berkeley_library/tind/export/table.rb +175 -0
- data/lib/berkeley_library/tind/export/table_metrics.rb +116 -0
- data/lib/berkeley_library/tind/marc.rb +1 -0
- data/lib/berkeley_library/tind/marc/xml_reader.rb +144 -0
- data/lib/berkeley_library/tind/module_info.rb +14 -0
- data/lib/berkeley_library/util/arrays.rb +178 -0
- data/lib/berkeley_library/util/logging.rb +1 -0
- data/lib/berkeley_library/util/ods/spreadsheet.rb +170 -0
- data/lib/berkeley_library/util/ods/xml/content_doc.rb +26 -0
- data/lib/berkeley_library/util/ods/xml/document_node.rb +57 -0
- data/lib/berkeley_library/util/ods/xml/element_node.rb +106 -0
- data/lib/berkeley_library/util/ods/xml/loext/table_protection.rb +26 -0
- data/lib/berkeley_library/util/ods/xml/manifest/file_entry.rb +42 -0
- data/lib/berkeley_library/util/ods/xml/manifest/manifest.rb +73 -0
- data/lib/berkeley_library/util/ods/xml/manifest_doc.rb +26 -0
- data/lib/berkeley_library/util/ods/xml/namespace.rb +46 -0
- data/lib/berkeley_library/util/ods/xml/office/automatic_styles.rb +181 -0
- data/lib/berkeley_library/util/ods/xml/office/body.rb +17 -0
- data/lib/berkeley_library/util/ods/xml/office/document_content.rb +98 -0
- data/lib/berkeley_library/util/ods/xml/office/document_styles.rb +39 -0
- data/lib/berkeley_library/util/ods/xml/office/font_face_decls.rb +30 -0
- data/lib/berkeley_library/util/ods/xml/office/scripts.rb +17 -0
- data/lib/berkeley_library/util/ods/xml/office/spreadsheet.rb +37 -0
- data/lib/berkeley_library/util/ods/xml/office/styles.rb +39 -0
- data/lib/berkeley_library/util/ods/xml/style/cell_style.rb +58 -0
- data/lib/berkeley_library/util/ods/xml/style/column_style.rb +36 -0
- data/lib/berkeley_library/util/ods/xml/style/default_style.rb +31 -0
- data/lib/berkeley_library/util/ods/xml/style/family.rb +85 -0
- data/lib/berkeley_library/util/ods/xml/style/font_face.rb +46 -0
- data/lib/berkeley_library/util/ods/xml/style/paragraph_properties.rb +30 -0
- data/lib/berkeley_library/util/ods/xml/style/row_style.rb +37 -0
- data/lib/berkeley_library/util/ods/xml/style/style.rb +44 -0
- data/lib/berkeley_library/util/ods/xml/style/table_cell_properties.rb +40 -0
- data/lib/berkeley_library/util/ods/xml/style/table_column_properties.rb +30 -0
- data/lib/berkeley_library/util/ods/xml/style/table_properties.rb +25 -0
- data/lib/berkeley_library/util/ods/xml/style/table_row_properties.rb +28 -0
- data/lib/berkeley_library/util/ods/xml/style/table_style.rb +27 -0
- data/lib/berkeley_library/util/ods/xml/style/text_properties.rb +52 -0
- data/lib/berkeley_library/util/ods/xml/styles_doc.rb +26 -0
- data/lib/berkeley_library/util/ods/xml/table/named_expressions.rb +17 -0
- data/lib/berkeley_library/util/ods/xml/table/repeatable.rb +38 -0
- data/lib/berkeley_library/util/ods/xml/table/table.rb +193 -0
- data/lib/berkeley_library/util/ods/xml/table/table_cell.rb +46 -0
- data/lib/berkeley_library/util/ods/xml/table/table_column.rb +43 -0
- data/lib/berkeley_library/util/ods/xml/table/table_row.rb +136 -0
- data/lib/berkeley_library/util/ods/xml/text/p.rb +118 -0
- data/lib/berkeley_library/util/paths.rb +111 -0
- data/lib/berkeley_library/util/stringios.rb +30 -0
- data/lib/berkeley_library/util/strings.rb +42 -0
- data/lib/berkeley_library/util/sys_exits.rb +15 -0
- data/lib/berkeley_library/util/times.rb +22 -0
- data/lib/berkeley_library/util/uris.rb +44 -0
- data/lib/berkeley_library/util/uris/appender.rb +162 -0
- data/lib/berkeley_library/util/uris/requester.rb +62 -0
- data/lib/berkeley_library/util/uris/validator.rb +32 -0
- data/rakelib/bundle.rake +8 -0
- data/rakelib/coverage.rake +11 -0
- data/rakelib/gem.rake +54 -0
- data/rakelib/rubocop.rake +18 -0
- data/rakelib/spec.rake +2 -0
- data/spec/.rubocop.yml +40 -0
- data/spec/berkeley_library/tind/api/api_exception_spec.rb +91 -0
- data/spec/berkeley_library/tind/api/api_spec.rb +143 -0
- data/spec/berkeley_library/tind/api/collection_spec.rb +74 -0
- data/spec/berkeley_library/tind/api/date_range_spec.rb +110 -0
- data/spec/berkeley_library/tind/api/format_spec.rb +54 -0
- data/spec/berkeley_library/tind/api/search_spec.rb +364 -0
- data/spec/berkeley_library/tind/config_spec.rb +86 -0
- data/spec/berkeley_library/tind/export/column_group_spec.rb +29 -0
- data/spec/berkeley_library/tind/export/column_spec.rb +43 -0
- data/spec/berkeley_library/tind/export/config_spec.rb +206 -0
- data/spec/berkeley_library/tind/export/export_command_spec.rb +169 -0
- data/spec/berkeley_library/tind/export/export_format_spec.rb +59 -0
- data/spec/berkeley_library/tind/export/export_matcher.rb +112 -0
- data/spec/berkeley_library/tind/export/export_spec.rb +150 -0
- data/spec/berkeley_library/tind/export/exporter_spec.rb +125 -0
- data/spec/berkeley_library/tind/export/row_spec.rb +118 -0
- data/spec/berkeley_library/tind/export/table_spec.rb +322 -0
- data/spec/berkeley_library/tind/marc/xml_reader_spec.rb +93 -0
- data/spec/berkeley_library/util/arrays_spec.rb +340 -0
- data/spec/berkeley_library/util/ods/spreadsheet_spec.rb +124 -0
- data/spec/berkeley_library/util/ods/xml/content_doc_spec.rb +121 -0
- data/spec/berkeley_library/util/ods/xml/manifest/file_entry_spec.rb +27 -0
- data/spec/berkeley_library/util/ods/xml/manifest/manifest_spec.rb +33 -0
- data/spec/berkeley_library/util/ods/xml/office/document_content_spec.rb +60 -0
- data/spec/berkeley_library/util/ods/xml/style/automatic_styles_spec.rb +37 -0
- data/spec/berkeley_library/util/ods/xml/style/family_spec.rb +57 -0
- data/spec/berkeley_library/util/ods/xml/table/table_row_spec.rb +179 -0
- data/spec/berkeley_library/util/ods/xml/table/table_spec.rb +218 -0
- data/spec/berkeley_library/util/paths_spec.rb +90 -0
- data/spec/berkeley_library/util/stringios_spec.rb +34 -0
- data/spec/berkeley_library/util/strings_spec.rb +27 -0
- data/spec/berkeley_library/util/times_spec.rb +39 -0
- data/spec/berkeley_library/util/uris_spec.rb +118 -0
- data/spec/data/collection-names.txt +438 -0
- data/spec/data/collections.json +4827 -0
- data/spec/data/disjoint-records.xml +187 -0
- data/spec/data/record-184453.xml +58 -0
- data/spec/data/record-184458.xml +63 -0
- data/spec/data/record-187888.xml +78 -0
- data/spec/data/records-api-search-cjk-p1.xml +6381 -0
- data/spec/data/records-api-search-cjk-p2.xml +5 -0
- data/spec/data/records-api-search-p1.xml +4506 -0
- data/spec/data/records-api-search-p2.xml +4509 -0
- data/spec/data/records-api-search-p3.xml +4506 -0
- data/spec/data/records-api-search-p4.xml +4509 -0
- data/spec/data/records-api-search-p5.xml +4506 -0
- data/spec/data/records-api-search-p6.xml +2436 -0
- data/spec/data/records-api-search-p7.xml +5 -0
- data/spec/data/records-api-search.xml +234 -0
- data/spec/data/records-manual-search.xml +547 -0
- data/spec/spec_helper.rb +30 -0
- data/test/profile/table_from_records_profile.rb +46 -0
- metadata +585 -0
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'csv'
|
2
|
+
require 'berkeley_library/tind/export/exporter'
|
3
|
+
|
4
|
+
module BerkeleyLibrary
|
5
|
+
module TIND
|
6
|
+
module Export
|
7
|
+
# Exporter for CSV (comma-separated value) text
|
8
|
+
class CSVExporter < Exporter
|
9
|
+
# Exports {ExportBase#collection} as CSV
|
10
|
+
# @overload export
|
11
|
+
# Exports to a new string.
|
12
|
+
# @return [String] the CSV string
|
13
|
+
# @overload export(out)
|
14
|
+
# Exports to the specified output stream.
|
15
|
+
# @param out [IO] the output stream
|
16
|
+
# @return[void]
|
17
|
+
# @overload export(path)
|
18
|
+
# Exports to the specified file.
|
19
|
+
# @param path [String, Pathname] the path to the output file
|
20
|
+
# @return[void]
|
21
|
+
def export(out = nil)
|
22
|
+
# noinspection RubyYardReturnMatch
|
23
|
+
export_table.tap { logger.info('Writing CSV') }.to_csv(out)
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'berkeley_library/tind/export/export_format'
|
2
|
+
|
3
|
+
module BerkeleyLibrary
|
4
|
+
module TIND
|
5
|
+
module Export
|
6
|
+
class << self
|
7
|
+
# Writes a spreadsheet in the specified format
|
8
|
+
# @overload export(collection, format = ExportFormat::CSV)
|
9
|
+
# Returns the spreadsheet as a string.
|
10
|
+
# @param collection [String] The collection name
|
11
|
+
# @param format [ExportFormat, String, Symbol] the export format
|
12
|
+
# @param exportable_only [Boolean] whether to include only exportable fields
|
13
|
+
# @overload export(collection, format = ExportFormat::CSV, out)
|
14
|
+
# Writes the spreadsheet to the specified output stream.
|
15
|
+
# @param collection [String] The collection name
|
16
|
+
# @param format [ExportFormat, String, Symbol] the export format
|
17
|
+
# @param out [IO] the output stream
|
18
|
+
# @param exportable_only [Boolean] whether to include only exportable fields
|
19
|
+
# @overload export(collection, format = ExportFormat::CSV, path)
|
20
|
+
# Writes the spreadsheet to the specified output file.
|
21
|
+
# @param collection [String] The collection name
|
22
|
+
# @param format [ExportFormat, String, Symbol] the export format
|
23
|
+
# @param path [String, Pathname] the path to the output file
|
24
|
+
# @param exportable_only [Boolean] whether to include only exportable fields
|
25
|
+
# @raise [ExportFailed] if the collection does not exist, or cannot be exported. Note
|
26
|
+
# that this error is guaranteed to be raised before anything is written to `out`.
|
27
|
+
def export(collection, format = ExportFormat::CSV, out = nil, exportable_only: true)
|
28
|
+
# noinspection RubyYardParamTypeMatch
|
29
|
+
exporter = exporter_for(collection, format, exportable_only: exportable_only)
|
30
|
+
exporter.export(out)
|
31
|
+
end
|
32
|
+
|
33
|
+
# Returns an exporter for the specified spreadsheet in the specified format
|
34
|
+
# @param collection [String] The collection name
|
35
|
+
# @param format [ExportFormat, String, Symbol] the export format
|
36
|
+
# @param exportable_only [Boolean] whether to include only exportable fields
|
37
|
+
# @return [Exporter] the exporter
|
38
|
+
def exporter_for(collection, format, exportable_only: true)
|
39
|
+
export_format = ExportFormat.ensure_format(format)
|
40
|
+
# noinspection RubyNilAnalysis
|
41
|
+
export_format.exporter_for(collection, exportable_only: exportable_only)
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,168 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
|
3
|
+
require 'berkeley_library/tind/api'
|
4
|
+
require 'berkeley_library/tind/config'
|
5
|
+
require 'berkeley_library/tind/export/export'
|
6
|
+
require 'berkeley_library/tind/export/export_format'
|
7
|
+
require 'berkeley_library/logging'
|
8
|
+
require 'berkeley_library/util/sys_exits'
|
9
|
+
|
10
|
+
module BerkeleyLibrary
|
11
|
+
module TIND
|
12
|
+
module Export
|
13
|
+
# rubocop:disable Metrics/ClassLength
|
14
|
+
class ExportCommand
|
15
|
+
include BerkeleyLibrary::Util::SysExits
|
16
|
+
|
17
|
+
attr_reader :options
|
18
|
+
attr_reader :out
|
19
|
+
|
20
|
+
def initialize(*args, out: $stdout)
|
21
|
+
@options = ExportCommand.parse_options(args)
|
22
|
+
@out = out
|
23
|
+
end
|
24
|
+
|
25
|
+
def execute!
|
26
|
+
return list_collections if options[:list]
|
27
|
+
|
28
|
+
export_collection
|
29
|
+
rescue StandardError => e
|
30
|
+
warn(e)
|
31
|
+
warn(e.backtrace.join("\n")) if e.backtrace && options[:verbose]
|
32
|
+
|
33
|
+
exit(EX_SOFTWARE)
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def list_collections
|
39
|
+
BerkeleyLibrary::TIND::API::Collection.each_collection { |c| out.puts "#{c.nb_rec}\t#{c.name}" }
|
40
|
+
end
|
41
|
+
|
42
|
+
def export_collection
|
43
|
+
BerkeleyLibrary::TIND::Export.export(
|
44
|
+
options[:collection],
|
45
|
+
options[:format],
|
46
|
+
options[:outfile] || out
|
47
|
+
)
|
48
|
+
end
|
49
|
+
|
50
|
+
class << self
|
51
|
+
include BerkeleyLibrary::Util::SysExits
|
52
|
+
|
53
|
+
DEFAULT_FORMAT = ExportFormat::CSV
|
54
|
+
FORMATS = ExportFormat.to_a.map(&:value).join(', ')
|
55
|
+
OPTS = {
|
56
|
+
f: ['--format FORMAT', "Format (#{FORMATS}; defaults to output file extension, or else to #{DEFAULT_FORMAT})"],
|
57
|
+
o: ['--output-file FILE', 'Output file or directory'],
|
58
|
+
l: ['--list-collections', 'List collection sizes and names'],
|
59
|
+
u: ['--tind-base-url URL', "TIND base URL (default $#{BerkeleyLibrary::TIND::Config::ENV_TIND_BASE_URL})"],
|
60
|
+
k: ['--api-key KEY', "TIND API key (default $#{BerkeleyLibrary::TIND::Config::ENV_TIND_API_KEY})"],
|
61
|
+
e: ['--env-file [ENV]', 'Read environment variables from <ENV> (default: ./.env)'],
|
62
|
+
v: ['--verbose', 'Verbose error logging'],
|
63
|
+
h: ['--help', 'Show help and exit']
|
64
|
+
}.freeze
|
65
|
+
|
66
|
+
def parse_options(argv)
|
67
|
+
{}.tap do |opts|
|
68
|
+
option_parser(opts).parse!(argv)
|
69
|
+
opts[:collection] = argv.pop
|
70
|
+
opts[:format] = ensure_format(opts)
|
71
|
+
validate!(opts)
|
72
|
+
configure!(opts)
|
73
|
+
end
|
74
|
+
rescue StandardError => e
|
75
|
+
print_usage_and_exit!($stderr, EX_USAGE, e.message)
|
76
|
+
end
|
77
|
+
|
78
|
+
private
|
79
|
+
|
80
|
+
def validate!(opts)
|
81
|
+
return if opts[:list]
|
82
|
+
raise ArgumentError, 'Collection not specified' unless opts[:collection]
|
83
|
+
raise ArgumentError, 'OpenOffice/LibreOffice export requires a filename' if opts[:format] == ExportFormat::ODS && !opts[:outfile]
|
84
|
+
end
|
85
|
+
|
86
|
+
def configure!(opts)
|
87
|
+
configure_env(opts)
|
88
|
+
BerkeleyLibrary::TIND::Config.base_uri = opts[:tind_base_url] if opts[:tind_base_url]
|
89
|
+
BerkeleyLibrary::TIND::Config.api_key = opts[:api_key] if opts[:api_key]
|
90
|
+
BerkeleyLibrary::Logging.logger = configure_logger(opts)
|
91
|
+
end
|
92
|
+
|
93
|
+
def configure_logger(opts)
|
94
|
+
return Logger.new(File::NULL) unless opts[:verbose]
|
95
|
+
|
96
|
+
# TODO: simpler log format? different log levels?
|
97
|
+
BerkeleyLibrary::Logging::Loggers.new_readable_logger($stderr).tap { |logger| logger.level = Logger::DEBUG }
|
98
|
+
end
|
99
|
+
|
100
|
+
def configure_env(opts)
|
101
|
+
return unless (env_file = opts[:env_file])
|
102
|
+
|
103
|
+
warn "Reading environment from #{env_file}" if opts[:verbose]
|
104
|
+
|
105
|
+
require 'dotenv'
|
106
|
+
Dotenv.load(env_file)
|
107
|
+
end
|
108
|
+
|
109
|
+
def ensure_format(opts)
|
110
|
+
fmt = opts[:format] || (File.extname(opts[:outfile]).sub(/^\./, '') if opts[:outfile])
|
111
|
+
return DEFAULT_FORMAT unless fmt
|
112
|
+
|
113
|
+
ExportFormat.ensure_format(fmt)
|
114
|
+
end
|
115
|
+
|
116
|
+
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
117
|
+
def option_parser(opts = {})
|
118
|
+
OptionParser.new do |p|
|
119
|
+
p.summary_indent = ' '
|
120
|
+
p.on('-f', *OPTS[:f]) { |fmt| opts[:format] = fmt }
|
121
|
+
p.on('-o', *OPTS[:o]) { |out| opts[:outfile] = out }
|
122
|
+
p.on('-l', *OPTS[:l]) { opts[:list] = true }
|
123
|
+
p.on('-u', *OPTS[:u]) { |url| opts[:tind_base_url] = url }
|
124
|
+
p.on('-k', *OPTS[:k]) { |k| opts[:api_key] = k }
|
125
|
+
p.on('-e', *OPTS[:e]) { |e| opts[:env_file] = env_file_path(e) }
|
126
|
+
p.on('-v', *OPTS[:v]) { opts[:verbose] = true }
|
127
|
+
p.on('-h', *OPTS[:h]) { print_usage_and_exit! }
|
128
|
+
end
|
129
|
+
end
|
130
|
+
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
131
|
+
|
132
|
+
def env_file_path(env_file_opt)
|
133
|
+
File.realpath(env_file_opt || File.join(Dir.pwd, '.env'))
|
134
|
+
end
|
135
|
+
|
136
|
+
def print_usage_and_exit!(out = $stdout, exit_code = EX_OK, msg = nil)
|
137
|
+
out.puts("#{msg}\n\n") if msg
|
138
|
+
out.puts(usage)
|
139
|
+
exit(exit_code)
|
140
|
+
end
|
141
|
+
|
142
|
+
def usage
|
143
|
+
<<~USAGE
|
144
|
+
Usage: tind-export [options] COLLECTION
|
145
|
+
|
146
|
+
Options:
|
147
|
+
#{summarize_options}
|
148
|
+
|
149
|
+
Examples:
|
150
|
+
1. list collections
|
151
|
+
tind-export --list-collections
|
152
|
+
2. export a collection as an OpenOffice/LibreOffice spreadsheet
|
153
|
+
tind-export -o lincoln-papers.ods 'Abraham Lincoln Papers'
|
154
|
+
3. export a collection as an OpenOffice/LibreOffice spreadsheet in exploded XML format,
|
155
|
+
where `lincoln-papers` is a directory
|
156
|
+
tind-export -v -f ODS -o lincoln-papers 'Abraham Lincoln Papers'
|
157
|
+
USAGE
|
158
|
+
end
|
159
|
+
|
160
|
+
def summarize_options
|
161
|
+
option_parser.summarize.join(' ')
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
165
|
+
# rubocop:enable Metrics/ClassLength
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
require 'typesafe_enum'
|
2
|
+
require 'berkeley_library/tind/export/csv_exporter'
|
3
|
+
require 'berkeley_library/tind/export/ods_exporter'
|
4
|
+
|
5
|
+
module BerkeleyLibrary
|
6
|
+
module TIND
|
7
|
+
module Export
|
8
|
+
class ExportFormat < TypesafeEnum::Base
|
9
|
+
new :CSV
|
10
|
+
new :ODS
|
11
|
+
|
12
|
+
DEFAULT = ODS
|
13
|
+
|
14
|
+
def exporter_for(collection, exportable_only: true)
|
15
|
+
return CSVExporter.new(collection, exportable_only: exportable_only) if self == ExportFormat::CSV
|
16
|
+
return ODSExporter.new(collection, exportable_only: exportable_only) if self == ExportFormat::ODS
|
17
|
+
end
|
18
|
+
|
19
|
+
def description
|
20
|
+
return 'CSV (comma-separated text)' if self == ExportFormat::CSV
|
21
|
+
return 'LibreOffice/OpenOffice spreadsheet' if self == ExportFormat::ODS
|
22
|
+
end
|
23
|
+
|
24
|
+
def mime_type
|
25
|
+
return 'text/csv' if self == ExportFormat::CSV
|
26
|
+
return 'application/vnd.oasis.opendocument.spreadsheet' if self == ExportFormat::ODS
|
27
|
+
end
|
28
|
+
|
29
|
+
def to_s
|
30
|
+
# noinspection RubyYardReturnMatch
|
31
|
+
value
|
32
|
+
end
|
33
|
+
|
34
|
+
def to_str
|
35
|
+
value
|
36
|
+
end
|
37
|
+
|
38
|
+
def inspect
|
39
|
+
"#{ExportFormat}::#{key}"
|
40
|
+
end
|
41
|
+
|
42
|
+
def default?
|
43
|
+
self == DEFAULT
|
44
|
+
end
|
45
|
+
|
46
|
+
# noinspection RubyYardReturnMatch
|
47
|
+
class << self
|
48
|
+
# Converts a string or symbol to an {ExportFormat}, or returns
|
49
|
+
# an {ExportFormat} if passed on
|
50
|
+
#
|
51
|
+
# @param format [String, Symbol, ExportFormat] the format
|
52
|
+
# @return [ExportFormat] the format
|
53
|
+
def ensure_format(format)
|
54
|
+
return unless format
|
55
|
+
return format if format.is_a?(ExportFormat)
|
56
|
+
|
57
|
+
fmt = ExportFormat.find_by_value(format.to_s.downcase)
|
58
|
+
return fmt if fmt
|
59
|
+
|
60
|
+
raise ArgumentError, "Unknown #{ExportFormat}: #{format.inspect}"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
require 'berkeley_library/util/logging'
|
2
|
+
require 'berkeley_library/tind/api/search'
|
3
|
+
require 'berkeley_library/tind/export/table'
|
4
|
+
|
5
|
+
module BerkeleyLibrary
|
6
|
+
module TIND
|
7
|
+
module Export
|
8
|
+
|
9
|
+
# Superclass of exporters for different formats
|
10
|
+
class Exporter
|
11
|
+
include BerkeleyLibrary::Logging
|
12
|
+
|
13
|
+
# ------------------------------------------------------------
|
14
|
+
# Accessors
|
15
|
+
|
16
|
+
# @return [String] the collection name
|
17
|
+
attr_reader :collection
|
18
|
+
|
19
|
+
# @return [Boolean] whether to include only exportable fields
|
20
|
+
attr_reader :exportable_only
|
21
|
+
|
22
|
+
# ------------------------------------------------------------
|
23
|
+
# Initializer
|
24
|
+
|
25
|
+
# Initializes a new exporter
|
26
|
+
#
|
27
|
+
# @param collection [String] The collection name
|
28
|
+
# @param exportable_only [Boolean] whether to include only exportable fields
|
29
|
+
def initialize(collection, exportable_only: true)
|
30
|
+
@collection = collection
|
31
|
+
@exportable_only = exportable_only
|
32
|
+
end
|
33
|
+
|
34
|
+
# ------------------------------------------------------------
|
35
|
+
# Abstract methods
|
36
|
+
|
37
|
+
# Exports the collection
|
38
|
+
# @param out [IO, String, Pathname, nil] the IO or file path to write the
|
39
|
+
# exported data to, or nil to return a string
|
40
|
+
# rubocop:disable Lint/UnusedMethodArgument
|
41
|
+
def export(out = nil)
|
42
|
+
# This is a stub, used for documentation
|
43
|
+
raise NoMethodError, "#{self.class} does not implement `export`"
|
44
|
+
end
|
45
|
+
# rubocop:enable Lint/UnusedMethodArgument
|
46
|
+
|
47
|
+
# ------------------------------------------------------------
|
48
|
+
# Accessors
|
49
|
+
|
50
|
+
# Returns true if the collection can be exported, false otherwise.
|
51
|
+
# Note that this requires reading the collection data from the TIND
|
52
|
+
# server; failures will be fast but success may be slow. (On the other
|
53
|
+
# hand, the retrieved collection data is cached, so the subsequent
|
54
|
+
# export will not need to retrieve it again.)
|
55
|
+
def any_results?
|
56
|
+
!_export_table.empty?
|
57
|
+
end
|
58
|
+
|
59
|
+
# ------------------------------------------------------------
|
60
|
+
# Object overrides
|
61
|
+
|
62
|
+
def respond_to?(*args)
|
63
|
+
return false if instance_of?(Exporter) && (args && args.first.to_s == 'export')
|
64
|
+
|
65
|
+
super
|
66
|
+
end
|
67
|
+
|
68
|
+
# ------------------------------------------------------------
|
69
|
+
# Protected methods
|
70
|
+
|
71
|
+
protected
|
72
|
+
|
73
|
+
# Returns a table of all records in the specified
|
74
|
+
# collection
|
75
|
+
#
|
76
|
+
# @return [Export::Table] the table
|
77
|
+
# @raise NoResultsError if no search results were returned for the collection
|
78
|
+
def export_table
|
79
|
+
# TODO: something more clever. Search.has_results?
|
80
|
+
return _export_table unless _export_table.empty?
|
81
|
+
|
82
|
+
raise no_results_error
|
83
|
+
end
|
84
|
+
|
85
|
+
private
|
86
|
+
|
87
|
+
def no_results_error
|
88
|
+
NoResultsError.new("No records returned for collection #{collection.inspect}")
|
89
|
+
end
|
90
|
+
|
91
|
+
def _export_table
|
92
|
+
@_export_table ||= begin
|
93
|
+
logger.info("Reading collection #{collection.inspect}")
|
94
|
+
results = API::Search.new(collection: collection).each_result(freeze: true)
|
95
|
+
|
96
|
+
logger.info('Creating export table')
|
97
|
+
# noinspection RubyYardParamTypeMatch
|
98
|
+
Export::Table.from_records(results, freeze: true, exportable_only: exportable_only)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
module BerkeleyLibrary
|
2
|
+
module TIND
|
3
|
+
module Export
|
4
|
+
module Filter
|
5
|
+
DO_NOT_EXPORT_FIELDS = ['005', '8564 ', '902 ', '903 ', '991', '998'].map(&:freeze).freeze
|
6
|
+
DO_NOT_EDIT_FIELDS = (['001'.freeze] + DO_NOT_EXPORT_FIELDS).freeze
|
7
|
+
|
8
|
+
DO_NOT_EXPORT_SUBFIELDS = ['336 a', '852 c', '901 a', '901 f', '901 g', '980 a', '982 a', '982 b', '982 p'].map(&:freeze).freeze
|
9
|
+
DO_NOT_EDIT_SUBFIELDS = (['035 a'.freeze] + DO_NOT_EXPORT_SUBFIELDS).freeze
|
10
|
+
|
11
|
+
DO_NOT_EDIT = (DO_NOT_EDIT_FIELDS + DO_NOT_EDIT_SUBFIELDS).freeze
|
12
|
+
|
13
|
+
class << self
|
14
|
+
def can_export_tag?(tag)
|
15
|
+
!DO_NOT_EXPORT_FIELDS.include?(tag)
|
16
|
+
end
|
17
|
+
|
18
|
+
def can_export_data_field?(df)
|
19
|
+
!exportable_subfield_codes(df).empty?
|
20
|
+
end
|
21
|
+
|
22
|
+
def exportable_subfield_codes(df)
|
23
|
+
tag, ind1, ind2 = decompose_data_field(df)
|
24
|
+
DO_NOT_EXPORT_FIELDS.each { |f| return [] if excludes?(f, tag, ind1, ind2) }
|
25
|
+
|
26
|
+
df.subfield_codes.reject do |code|
|
27
|
+
DO_NOT_EXPORT_SUBFIELDS.any? { |f| excludes?(f, tag, ind1, ind2, code) }
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def can_edit?(tag, ind1, ind2, code)
|
32
|
+
DO_NOT_EDIT.none? { |f| excludes?(f, tag, ind1, ind2, code) }
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def decompose_data_field(df)
|
38
|
+
[df.tag, df.indicator1, df.indicator2]
|
39
|
+
end
|
40
|
+
|
41
|
+
# TODO: test this more carefully
|
42
|
+
def excludes?(f, tag, ind1, ind2, code = nil)
|
43
|
+
return f == tag if f.size == 3
|
44
|
+
|
45
|
+
excludes_tag = f.start_with?(tag) && f[3] == ind1 && f[4] == ind2
|
46
|
+
code ? excludes_tag && code : excludes_tag
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|