berkeley_library-tind 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/build.yml +18 -0
- data/.gitignore +388 -0
- data/.idea/inspectionProfiles/Project_Default.xml +20 -0
- data/.idea/misc.xml +4 -0
- data/.idea/modules.xml +8 -0
- data/.idea/tind.iml +138 -0
- data/.idea/vcs.xml +6 -0
- data/.rubocop.yml +334 -0
- data/.ruby-version +1 -0
- data/.simplecov +8 -0
- data/.yardopts +1 -0
- data/CHANGES.md +58 -0
- data/Dockerfile +57 -0
- data/Gemfile +3 -0
- data/Jenkinsfile +18 -0
- data/LICENSE.md +21 -0
- data/README.md +73 -0
- data/Rakefile +20 -0
- data/berkeley_library-tind.gemspec +50 -0
- data/bin/tind-export +14 -0
- data/docker-compose.yml +15 -0
- data/lib/berkeley_library/tind.rb +3 -0
- data/lib/berkeley_library/tind/api.rb +1 -0
- data/lib/berkeley_library/tind/api/api.rb +132 -0
- data/lib/berkeley_library/tind/api/api_exception.rb +131 -0
- data/lib/berkeley_library/tind/api/collection.rb +82 -0
- data/lib/berkeley_library/tind/api/date_range.rb +67 -0
- data/lib/berkeley_library/tind/api/format.rb +32 -0
- data/lib/berkeley_library/tind/api/search.rb +100 -0
- data/lib/berkeley_library/tind/config.rb +103 -0
- data/lib/berkeley_library/tind/export.rb +1 -0
- data/lib/berkeley_library/tind/export/column.rb +54 -0
- data/lib/berkeley_library/tind/export/column_group.rb +144 -0
- data/lib/berkeley_library/tind/export/column_group_list.rb +131 -0
- data/lib/berkeley_library/tind/export/column_width_calculator.rb +76 -0
- data/lib/berkeley_library/tind/export/config.rb +154 -0
- data/lib/berkeley_library/tind/export/csv_exporter.rb +29 -0
- data/lib/berkeley_library/tind/export/export.rb +47 -0
- data/lib/berkeley_library/tind/export/export_command.rb +168 -0
- data/lib/berkeley_library/tind/export/export_exception.rb +8 -0
- data/lib/berkeley_library/tind/export/export_format.rb +67 -0
- data/lib/berkeley_library/tind/export/exporter.rb +105 -0
- data/lib/berkeley_library/tind/export/filter.rb +52 -0
- data/lib/berkeley_library/tind/export/no_results_error.rb +7 -0
- data/lib/berkeley_library/tind/export/ods_exporter.rb +138 -0
- data/lib/berkeley_library/tind/export/row.rb +24 -0
- data/lib/berkeley_library/tind/export/row_metrics.rb +18 -0
- data/lib/berkeley_library/tind/export/table.rb +175 -0
- data/lib/berkeley_library/tind/export/table_metrics.rb +116 -0
- data/lib/berkeley_library/tind/marc.rb +1 -0
- data/lib/berkeley_library/tind/marc/xml_reader.rb +144 -0
- data/lib/berkeley_library/tind/module_info.rb +14 -0
- data/lib/berkeley_library/util/arrays.rb +178 -0
- data/lib/berkeley_library/util/logging.rb +1 -0
- data/lib/berkeley_library/util/ods/spreadsheet.rb +170 -0
- data/lib/berkeley_library/util/ods/xml/content_doc.rb +26 -0
- data/lib/berkeley_library/util/ods/xml/document_node.rb +57 -0
- data/lib/berkeley_library/util/ods/xml/element_node.rb +106 -0
- data/lib/berkeley_library/util/ods/xml/loext/table_protection.rb +26 -0
- data/lib/berkeley_library/util/ods/xml/manifest/file_entry.rb +42 -0
- data/lib/berkeley_library/util/ods/xml/manifest/manifest.rb +73 -0
- data/lib/berkeley_library/util/ods/xml/manifest_doc.rb +26 -0
- data/lib/berkeley_library/util/ods/xml/namespace.rb +46 -0
- data/lib/berkeley_library/util/ods/xml/office/automatic_styles.rb +181 -0
- data/lib/berkeley_library/util/ods/xml/office/body.rb +17 -0
- data/lib/berkeley_library/util/ods/xml/office/document_content.rb +98 -0
- data/lib/berkeley_library/util/ods/xml/office/document_styles.rb +39 -0
- data/lib/berkeley_library/util/ods/xml/office/font_face_decls.rb +30 -0
- data/lib/berkeley_library/util/ods/xml/office/scripts.rb +17 -0
- data/lib/berkeley_library/util/ods/xml/office/spreadsheet.rb +37 -0
- data/lib/berkeley_library/util/ods/xml/office/styles.rb +39 -0
- data/lib/berkeley_library/util/ods/xml/style/cell_style.rb +58 -0
- data/lib/berkeley_library/util/ods/xml/style/column_style.rb +36 -0
- data/lib/berkeley_library/util/ods/xml/style/default_style.rb +31 -0
- data/lib/berkeley_library/util/ods/xml/style/family.rb +85 -0
- data/lib/berkeley_library/util/ods/xml/style/font_face.rb +46 -0
- data/lib/berkeley_library/util/ods/xml/style/paragraph_properties.rb +30 -0
- data/lib/berkeley_library/util/ods/xml/style/row_style.rb +37 -0
- data/lib/berkeley_library/util/ods/xml/style/style.rb +44 -0
- data/lib/berkeley_library/util/ods/xml/style/table_cell_properties.rb +40 -0
- data/lib/berkeley_library/util/ods/xml/style/table_column_properties.rb +30 -0
- data/lib/berkeley_library/util/ods/xml/style/table_properties.rb +25 -0
- data/lib/berkeley_library/util/ods/xml/style/table_row_properties.rb +28 -0
- data/lib/berkeley_library/util/ods/xml/style/table_style.rb +27 -0
- data/lib/berkeley_library/util/ods/xml/style/text_properties.rb +52 -0
- data/lib/berkeley_library/util/ods/xml/styles_doc.rb +26 -0
- data/lib/berkeley_library/util/ods/xml/table/named_expressions.rb +17 -0
- data/lib/berkeley_library/util/ods/xml/table/repeatable.rb +38 -0
- data/lib/berkeley_library/util/ods/xml/table/table.rb +193 -0
- data/lib/berkeley_library/util/ods/xml/table/table_cell.rb +46 -0
- data/lib/berkeley_library/util/ods/xml/table/table_column.rb +43 -0
- data/lib/berkeley_library/util/ods/xml/table/table_row.rb +136 -0
- data/lib/berkeley_library/util/ods/xml/text/p.rb +118 -0
- data/lib/berkeley_library/util/paths.rb +111 -0
- data/lib/berkeley_library/util/stringios.rb +30 -0
- data/lib/berkeley_library/util/strings.rb +42 -0
- data/lib/berkeley_library/util/sys_exits.rb +15 -0
- data/lib/berkeley_library/util/times.rb +22 -0
- data/lib/berkeley_library/util/uris.rb +44 -0
- data/lib/berkeley_library/util/uris/appender.rb +162 -0
- data/lib/berkeley_library/util/uris/requester.rb +62 -0
- data/lib/berkeley_library/util/uris/validator.rb +32 -0
- data/rakelib/bundle.rake +8 -0
- data/rakelib/coverage.rake +11 -0
- data/rakelib/gem.rake +54 -0
- data/rakelib/rubocop.rake +18 -0
- data/rakelib/spec.rake +2 -0
- data/spec/.rubocop.yml +40 -0
- data/spec/berkeley_library/tind/api/api_exception_spec.rb +91 -0
- data/spec/berkeley_library/tind/api/api_spec.rb +143 -0
- data/spec/berkeley_library/tind/api/collection_spec.rb +74 -0
- data/spec/berkeley_library/tind/api/date_range_spec.rb +110 -0
- data/spec/berkeley_library/tind/api/format_spec.rb +54 -0
- data/spec/berkeley_library/tind/api/search_spec.rb +364 -0
- data/spec/berkeley_library/tind/config_spec.rb +86 -0
- data/spec/berkeley_library/tind/export/column_group_spec.rb +29 -0
- data/spec/berkeley_library/tind/export/column_spec.rb +43 -0
- data/spec/berkeley_library/tind/export/config_spec.rb +206 -0
- data/spec/berkeley_library/tind/export/export_command_spec.rb +169 -0
- data/spec/berkeley_library/tind/export/export_format_spec.rb +59 -0
- data/spec/berkeley_library/tind/export/export_matcher.rb +112 -0
- data/spec/berkeley_library/tind/export/export_spec.rb +150 -0
- data/spec/berkeley_library/tind/export/exporter_spec.rb +125 -0
- data/spec/berkeley_library/tind/export/row_spec.rb +118 -0
- data/spec/berkeley_library/tind/export/table_spec.rb +322 -0
- data/spec/berkeley_library/tind/marc/xml_reader_spec.rb +93 -0
- data/spec/berkeley_library/util/arrays_spec.rb +340 -0
- data/spec/berkeley_library/util/ods/spreadsheet_spec.rb +124 -0
- data/spec/berkeley_library/util/ods/xml/content_doc_spec.rb +121 -0
- data/spec/berkeley_library/util/ods/xml/manifest/file_entry_spec.rb +27 -0
- data/spec/berkeley_library/util/ods/xml/manifest/manifest_spec.rb +33 -0
- data/spec/berkeley_library/util/ods/xml/office/document_content_spec.rb +60 -0
- data/spec/berkeley_library/util/ods/xml/style/automatic_styles_spec.rb +37 -0
- data/spec/berkeley_library/util/ods/xml/style/family_spec.rb +57 -0
- data/spec/berkeley_library/util/ods/xml/table/table_row_spec.rb +179 -0
- data/spec/berkeley_library/util/ods/xml/table/table_spec.rb +218 -0
- data/spec/berkeley_library/util/paths_spec.rb +90 -0
- data/spec/berkeley_library/util/stringios_spec.rb +34 -0
- data/spec/berkeley_library/util/strings_spec.rb +27 -0
- data/spec/berkeley_library/util/times_spec.rb +39 -0
- data/spec/berkeley_library/util/uris_spec.rb +118 -0
- data/spec/data/collection-names.txt +438 -0
- data/spec/data/collections.json +4827 -0
- data/spec/data/disjoint-records.xml +187 -0
- data/spec/data/record-184453.xml +58 -0
- data/spec/data/record-184458.xml +63 -0
- data/spec/data/record-187888.xml +78 -0
- data/spec/data/records-api-search-cjk-p1.xml +6381 -0
- data/spec/data/records-api-search-cjk-p2.xml +5 -0
- data/spec/data/records-api-search-p1.xml +4506 -0
- data/spec/data/records-api-search-p2.xml +4509 -0
- data/spec/data/records-api-search-p3.xml +4506 -0
- data/spec/data/records-api-search-p4.xml +4509 -0
- data/spec/data/records-api-search-p5.xml +4506 -0
- data/spec/data/records-api-search-p6.xml +2436 -0
- data/spec/data/records-api-search-p7.xml +5 -0
- data/spec/data/records-api-search.xml +234 -0
- data/spec/data/records-manual-search.xml +547 -0
- data/spec/spec_helper.rb +30 -0
- data/test/profile/table_from_records_profile.rb +46 -0
- metadata +585 -0
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
require 'csv'
|
|
2
|
+
require 'berkeley_library/tind/export/exporter'
|
|
3
|
+
|
|
4
|
+
module BerkeleyLibrary
|
|
5
|
+
module TIND
|
|
6
|
+
module Export
|
|
7
|
+
# Exporter for CSV (comma-separated value) text
|
|
8
|
+
class CSVExporter < Exporter
|
|
9
|
+
# Exports {ExportBase#collection} as CSV
|
|
10
|
+
# @overload export
|
|
11
|
+
# Exports to a new string.
|
|
12
|
+
# @return [String] the CSV string
|
|
13
|
+
# @overload export(out)
|
|
14
|
+
# Exports to the specified output stream.
|
|
15
|
+
# @param out [IO] the output stream
|
|
16
|
+
# @return[void]
|
|
17
|
+
# @overload export(path)
|
|
18
|
+
# Exports to the specified file.
|
|
19
|
+
# @param path [String, Pathname] the path to the output file
|
|
20
|
+
# @return[void]
|
|
21
|
+
def export(out = nil)
|
|
22
|
+
# noinspection RubyYardReturnMatch
|
|
23
|
+
export_table.tap { logger.info('Writing CSV') }.to_csv(out)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
require 'berkeley_library/tind/export/export_format'
|
|
2
|
+
|
|
3
|
+
module BerkeleyLibrary
|
|
4
|
+
module TIND
|
|
5
|
+
module Export
|
|
6
|
+
class << self
|
|
7
|
+
# Writes a spreadsheet in the specified format
|
|
8
|
+
# @overload export(collection, format = ExportFormat::CSV)
|
|
9
|
+
# Returns the spreadsheet as a string.
|
|
10
|
+
# @param collection [String] The collection name
|
|
11
|
+
# @param format [ExportFormat, String, Symbol] the export format
|
|
12
|
+
# @param exportable_only [Boolean] whether to include only exportable fields
|
|
13
|
+
# @overload export(collection, format = ExportFormat::CSV, out)
|
|
14
|
+
# Writes the spreadsheet to the specified output stream.
|
|
15
|
+
# @param collection [String] The collection name
|
|
16
|
+
# @param format [ExportFormat, String, Symbol] the export format
|
|
17
|
+
# @param out [IO] the output stream
|
|
18
|
+
# @param exportable_only [Boolean] whether to include only exportable fields
|
|
19
|
+
# @overload export(collection, format = ExportFormat::CSV, path)
|
|
20
|
+
# Writes the spreadsheet to the specified output file.
|
|
21
|
+
# @param collection [String] The collection name
|
|
22
|
+
# @param format [ExportFormat, String, Symbol] the export format
|
|
23
|
+
# @param path [String, Pathname] the path to the output file
|
|
24
|
+
# @param exportable_only [Boolean] whether to include only exportable fields
|
|
25
|
+
# @raise [ExportFailed] if the collection does not exist, or cannot be exported. Note
|
|
26
|
+
# that this error is guaranteed to be raised before anything is written to `out`.
|
|
27
|
+
def export(collection, format = ExportFormat::CSV, out = nil, exportable_only: true)
|
|
28
|
+
# noinspection RubyYardParamTypeMatch
|
|
29
|
+
exporter = exporter_for(collection, format, exportable_only: exportable_only)
|
|
30
|
+
exporter.export(out)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Returns an exporter for the specified spreadsheet in the specified format
|
|
34
|
+
# @param collection [String] The collection name
|
|
35
|
+
# @param format [ExportFormat, String, Symbol] the export format
|
|
36
|
+
# @param exportable_only [Boolean] whether to include only exportable fields
|
|
37
|
+
# @return [Exporter] the exporter
|
|
38
|
+
def exporter_for(collection, format, exportable_only: true)
|
|
39
|
+
export_format = ExportFormat.ensure_format(format)
|
|
40
|
+
# noinspection RubyNilAnalysis
|
|
41
|
+
export_format.exporter_for(collection, exportable_only: exportable_only)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
require 'optparse'
|
|
2
|
+
|
|
3
|
+
require 'berkeley_library/tind/api'
|
|
4
|
+
require 'berkeley_library/tind/config'
|
|
5
|
+
require 'berkeley_library/tind/export/export'
|
|
6
|
+
require 'berkeley_library/tind/export/export_format'
|
|
7
|
+
require 'berkeley_library/logging'
|
|
8
|
+
require 'berkeley_library/util/sys_exits'
|
|
9
|
+
|
|
10
|
+
module BerkeleyLibrary
|
|
11
|
+
module TIND
|
|
12
|
+
module Export
|
|
13
|
+
# rubocop:disable Metrics/ClassLength
|
|
14
|
+
class ExportCommand
|
|
15
|
+
include BerkeleyLibrary::Util::SysExits
|
|
16
|
+
|
|
17
|
+
attr_reader :options
|
|
18
|
+
attr_reader :out
|
|
19
|
+
|
|
20
|
+
def initialize(*args, out: $stdout)
|
|
21
|
+
@options = ExportCommand.parse_options(args)
|
|
22
|
+
@out = out
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def execute!
|
|
26
|
+
return list_collections if options[:list]
|
|
27
|
+
|
|
28
|
+
export_collection
|
|
29
|
+
rescue StandardError => e
|
|
30
|
+
warn(e)
|
|
31
|
+
warn(e.backtrace.join("\n")) if e.backtrace && options[:verbose]
|
|
32
|
+
|
|
33
|
+
exit(EX_SOFTWARE)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
private
|
|
37
|
+
|
|
38
|
+
def list_collections
|
|
39
|
+
BerkeleyLibrary::TIND::API::Collection.each_collection { |c| out.puts "#{c.nb_rec}\t#{c.name}" }
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def export_collection
|
|
43
|
+
BerkeleyLibrary::TIND::Export.export(
|
|
44
|
+
options[:collection],
|
|
45
|
+
options[:format],
|
|
46
|
+
options[:outfile] || out
|
|
47
|
+
)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
class << self
|
|
51
|
+
include BerkeleyLibrary::Util::SysExits
|
|
52
|
+
|
|
53
|
+
DEFAULT_FORMAT = ExportFormat::CSV
|
|
54
|
+
FORMATS = ExportFormat.to_a.map(&:value).join(', ')
|
|
55
|
+
OPTS = {
|
|
56
|
+
f: ['--format FORMAT', "Format (#{FORMATS}; defaults to output file extension, or else to #{DEFAULT_FORMAT})"],
|
|
57
|
+
o: ['--output-file FILE', 'Output file or directory'],
|
|
58
|
+
l: ['--list-collections', 'List collection sizes and names'],
|
|
59
|
+
u: ['--tind-base-url URL', "TIND base URL (default $#{BerkeleyLibrary::TIND::Config::ENV_TIND_BASE_URL})"],
|
|
60
|
+
k: ['--api-key KEY', "TIND API key (default $#{BerkeleyLibrary::TIND::Config::ENV_TIND_API_KEY})"],
|
|
61
|
+
e: ['--env-file [ENV]', 'Read environment variables from <ENV> (default: ./.env)'],
|
|
62
|
+
v: ['--verbose', 'Verbose error logging'],
|
|
63
|
+
h: ['--help', 'Show help and exit']
|
|
64
|
+
}.freeze
|
|
65
|
+
|
|
66
|
+
def parse_options(argv)
|
|
67
|
+
{}.tap do |opts|
|
|
68
|
+
option_parser(opts).parse!(argv)
|
|
69
|
+
opts[:collection] = argv.pop
|
|
70
|
+
opts[:format] = ensure_format(opts)
|
|
71
|
+
validate!(opts)
|
|
72
|
+
configure!(opts)
|
|
73
|
+
end
|
|
74
|
+
rescue StandardError => e
|
|
75
|
+
print_usage_and_exit!($stderr, EX_USAGE, e.message)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
private
|
|
79
|
+
|
|
80
|
+
def validate!(opts)
|
|
81
|
+
return if opts[:list]
|
|
82
|
+
raise ArgumentError, 'Collection not specified' unless opts[:collection]
|
|
83
|
+
raise ArgumentError, 'OpenOffice/LibreOffice export requires a filename' if opts[:format] == ExportFormat::ODS && !opts[:outfile]
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def configure!(opts)
|
|
87
|
+
configure_env(opts)
|
|
88
|
+
BerkeleyLibrary::TIND::Config.base_uri = opts[:tind_base_url] if opts[:tind_base_url]
|
|
89
|
+
BerkeleyLibrary::TIND::Config.api_key = opts[:api_key] if opts[:api_key]
|
|
90
|
+
BerkeleyLibrary::Logging.logger = configure_logger(opts)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def configure_logger(opts)
|
|
94
|
+
return Logger.new(File::NULL) unless opts[:verbose]
|
|
95
|
+
|
|
96
|
+
# TODO: simpler log format? different log levels?
|
|
97
|
+
BerkeleyLibrary::Logging::Loggers.new_readable_logger($stderr).tap { |logger| logger.level = Logger::DEBUG }
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def configure_env(opts)
|
|
101
|
+
return unless (env_file = opts[:env_file])
|
|
102
|
+
|
|
103
|
+
warn "Reading environment from #{env_file}" if opts[:verbose]
|
|
104
|
+
|
|
105
|
+
require 'dotenv'
|
|
106
|
+
Dotenv.load(env_file)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def ensure_format(opts)
|
|
110
|
+
fmt = opts[:format] || (File.extname(opts[:outfile]).sub(/^\./, '') if opts[:outfile])
|
|
111
|
+
return DEFAULT_FORMAT unless fmt
|
|
112
|
+
|
|
113
|
+
ExportFormat.ensure_format(fmt)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
|
117
|
+
def option_parser(opts = {})
|
|
118
|
+
OptionParser.new do |p|
|
|
119
|
+
p.summary_indent = ' '
|
|
120
|
+
p.on('-f', *OPTS[:f]) { |fmt| opts[:format] = fmt }
|
|
121
|
+
p.on('-o', *OPTS[:o]) { |out| opts[:outfile] = out }
|
|
122
|
+
p.on('-l', *OPTS[:l]) { opts[:list] = true }
|
|
123
|
+
p.on('-u', *OPTS[:u]) { |url| opts[:tind_base_url] = url }
|
|
124
|
+
p.on('-k', *OPTS[:k]) { |k| opts[:api_key] = k }
|
|
125
|
+
p.on('-e', *OPTS[:e]) { |e| opts[:env_file] = env_file_path(e) }
|
|
126
|
+
p.on('-v', *OPTS[:v]) { opts[:verbose] = true }
|
|
127
|
+
p.on('-h', *OPTS[:h]) { print_usage_and_exit! }
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
|
131
|
+
|
|
132
|
+
def env_file_path(env_file_opt)
|
|
133
|
+
File.realpath(env_file_opt || File.join(Dir.pwd, '.env'))
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def print_usage_and_exit!(out = $stdout, exit_code = EX_OK, msg = nil)
|
|
137
|
+
out.puts("#{msg}\n\n") if msg
|
|
138
|
+
out.puts(usage)
|
|
139
|
+
exit(exit_code)
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def usage
|
|
143
|
+
<<~USAGE
|
|
144
|
+
Usage: tind-export [options] COLLECTION
|
|
145
|
+
|
|
146
|
+
Options:
|
|
147
|
+
#{summarize_options}
|
|
148
|
+
|
|
149
|
+
Examples:
|
|
150
|
+
1. list collections
|
|
151
|
+
tind-export --list-collections
|
|
152
|
+
2. export a collection as an OpenOffice/LibreOffice spreadsheet
|
|
153
|
+
tind-export -o lincoln-papers.ods 'Abraham Lincoln Papers'
|
|
154
|
+
3. export a collection as an OpenOffice/LibreOffice spreadsheet in exploded XML format,
|
|
155
|
+
where `lincoln-papers` is a directory
|
|
156
|
+
tind-export -v -f ODS -o lincoln-papers 'Abraham Lincoln Papers'
|
|
157
|
+
USAGE
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def summarize_options
|
|
161
|
+
option_parser.summarize.join(' ')
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
# rubocop:enable Metrics/ClassLength
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
end
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
require 'typesafe_enum'
|
|
2
|
+
require 'berkeley_library/tind/export/csv_exporter'
|
|
3
|
+
require 'berkeley_library/tind/export/ods_exporter'
|
|
4
|
+
|
|
5
|
+
module BerkeleyLibrary
|
|
6
|
+
module TIND
|
|
7
|
+
module Export
|
|
8
|
+
class ExportFormat < TypesafeEnum::Base
|
|
9
|
+
new :CSV
|
|
10
|
+
new :ODS
|
|
11
|
+
|
|
12
|
+
DEFAULT = ODS
|
|
13
|
+
|
|
14
|
+
def exporter_for(collection, exportable_only: true)
|
|
15
|
+
return CSVExporter.new(collection, exportable_only: exportable_only) if self == ExportFormat::CSV
|
|
16
|
+
return ODSExporter.new(collection, exportable_only: exportable_only) if self == ExportFormat::ODS
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def description
|
|
20
|
+
return 'CSV (comma-separated text)' if self == ExportFormat::CSV
|
|
21
|
+
return 'LibreOffice/OpenOffice spreadsheet' if self == ExportFormat::ODS
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def mime_type
|
|
25
|
+
return 'text/csv' if self == ExportFormat::CSV
|
|
26
|
+
return 'application/vnd.oasis.opendocument.spreadsheet' if self == ExportFormat::ODS
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def to_s
|
|
30
|
+
# noinspection RubyYardReturnMatch
|
|
31
|
+
value
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def to_str
|
|
35
|
+
value
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def inspect
|
|
39
|
+
"#{ExportFormat}::#{key}"
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def default?
|
|
43
|
+
self == DEFAULT
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# noinspection RubyYardReturnMatch
|
|
47
|
+
class << self
|
|
48
|
+
# Converts a string or symbol to an {ExportFormat}, or returns
|
|
49
|
+
# an {ExportFormat} if passed on
|
|
50
|
+
#
|
|
51
|
+
# @param format [String, Symbol, ExportFormat] the format
|
|
52
|
+
# @return [ExportFormat] the format
|
|
53
|
+
def ensure_format(format)
|
|
54
|
+
return unless format
|
|
55
|
+
return format if format.is_a?(ExportFormat)
|
|
56
|
+
|
|
57
|
+
fmt = ExportFormat.find_by_value(format.to_s.downcase)
|
|
58
|
+
return fmt if fmt
|
|
59
|
+
|
|
60
|
+
raise ArgumentError, "Unknown #{ExportFormat}: #{format.inspect}"
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
require 'berkeley_library/util/logging'
|
|
2
|
+
require 'berkeley_library/tind/api/search'
|
|
3
|
+
require 'berkeley_library/tind/export/table'
|
|
4
|
+
|
|
5
|
+
module BerkeleyLibrary
|
|
6
|
+
module TIND
|
|
7
|
+
module Export
|
|
8
|
+
|
|
9
|
+
# Superclass of exporters for different formats
|
|
10
|
+
class Exporter
|
|
11
|
+
include BerkeleyLibrary::Logging
|
|
12
|
+
|
|
13
|
+
# ------------------------------------------------------------
|
|
14
|
+
# Accessors
|
|
15
|
+
|
|
16
|
+
# @return [String] the collection name
|
|
17
|
+
attr_reader :collection
|
|
18
|
+
|
|
19
|
+
# @return [Boolean] whether to include only exportable fields
|
|
20
|
+
attr_reader :exportable_only
|
|
21
|
+
|
|
22
|
+
# ------------------------------------------------------------
|
|
23
|
+
# Initializer
|
|
24
|
+
|
|
25
|
+
# Initializes a new exporter
|
|
26
|
+
#
|
|
27
|
+
# @param collection [String] The collection name
|
|
28
|
+
# @param exportable_only [Boolean] whether to include only exportable fields
|
|
29
|
+
def initialize(collection, exportable_only: true)
|
|
30
|
+
@collection = collection
|
|
31
|
+
@exportable_only = exportable_only
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# ------------------------------------------------------------
|
|
35
|
+
# Abstract methods
|
|
36
|
+
|
|
37
|
+
# Exports the collection
|
|
38
|
+
# @param out [IO, String, Pathname, nil] the IO or file path to write the
|
|
39
|
+
# exported data to, or nil to return a string
|
|
40
|
+
# rubocop:disable Lint/UnusedMethodArgument
|
|
41
|
+
def export(out = nil)
|
|
42
|
+
# This is a stub, used for documentation
|
|
43
|
+
raise NoMethodError, "#{self.class} does not implement `export`"
|
|
44
|
+
end
|
|
45
|
+
# rubocop:enable Lint/UnusedMethodArgument
|
|
46
|
+
|
|
47
|
+
# ------------------------------------------------------------
|
|
48
|
+
# Accessors
|
|
49
|
+
|
|
50
|
+
# Returns true if the collection can be exported, false otherwise.
|
|
51
|
+
# Note that this requires reading the collection data from the TIND
|
|
52
|
+
# server; failures will be fast but success may be slow. (On the other
|
|
53
|
+
# hand, the retrieved collection data is cached, so the subsequent
|
|
54
|
+
# export will not need to retrieve it again.)
|
|
55
|
+
def any_results?
|
|
56
|
+
!_export_table.empty?
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# ------------------------------------------------------------
|
|
60
|
+
# Object overrides
|
|
61
|
+
|
|
62
|
+
def respond_to?(*args)
|
|
63
|
+
return false if instance_of?(Exporter) && (args && args.first.to_s == 'export')
|
|
64
|
+
|
|
65
|
+
super
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# ------------------------------------------------------------
|
|
69
|
+
# Protected methods
|
|
70
|
+
|
|
71
|
+
protected
|
|
72
|
+
|
|
73
|
+
# Returns a table of all records in the specified
|
|
74
|
+
# collection
|
|
75
|
+
#
|
|
76
|
+
# @return [Export::Table] the table
|
|
77
|
+
# @raise NoResultsError if no search results were returned for the collection
|
|
78
|
+
def export_table
|
|
79
|
+
# TODO: something more clever. Search.has_results?
|
|
80
|
+
return _export_table unless _export_table.empty?
|
|
81
|
+
|
|
82
|
+
raise no_results_error
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
private
|
|
86
|
+
|
|
87
|
+
def no_results_error
|
|
88
|
+
NoResultsError.new("No records returned for collection #{collection.inspect}")
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def _export_table
|
|
92
|
+
@_export_table ||= begin
|
|
93
|
+
logger.info("Reading collection #{collection.inspect}")
|
|
94
|
+
results = API::Search.new(collection: collection).each_result(freeze: true)
|
|
95
|
+
|
|
96
|
+
logger.info('Creating export table')
|
|
97
|
+
# noinspection RubyYardParamTypeMatch
|
|
98
|
+
Export::Table.from_records(results, freeze: true, exportable_only: exportable_only)
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
module BerkeleyLibrary
|
|
2
|
+
module TIND
|
|
3
|
+
module Export
|
|
4
|
+
module Filter
|
|
5
|
+
DO_NOT_EXPORT_FIELDS = ['005', '8564 ', '902 ', '903 ', '991', '998'].map(&:freeze).freeze
|
|
6
|
+
DO_NOT_EDIT_FIELDS = (['001'.freeze] + DO_NOT_EXPORT_FIELDS).freeze
|
|
7
|
+
|
|
8
|
+
DO_NOT_EXPORT_SUBFIELDS = ['336 a', '852 c', '901 a', '901 f', '901 g', '980 a', '982 a', '982 b', '982 p'].map(&:freeze).freeze
|
|
9
|
+
DO_NOT_EDIT_SUBFIELDS = (['035 a'.freeze] + DO_NOT_EXPORT_SUBFIELDS).freeze
|
|
10
|
+
|
|
11
|
+
DO_NOT_EDIT = (DO_NOT_EDIT_FIELDS + DO_NOT_EDIT_SUBFIELDS).freeze
|
|
12
|
+
|
|
13
|
+
class << self
|
|
14
|
+
def can_export_tag?(tag)
|
|
15
|
+
!DO_NOT_EXPORT_FIELDS.include?(tag)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def can_export_data_field?(df)
|
|
19
|
+
!exportable_subfield_codes(df).empty?
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def exportable_subfield_codes(df)
|
|
23
|
+
tag, ind1, ind2 = decompose_data_field(df)
|
|
24
|
+
DO_NOT_EXPORT_FIELDS.each { |f| return [] if excludes?(f, tag, ind1, ind2) }
|
|
25
|
+
|
|
26
|
+
df.subfield_codes.reject do |code|
|
|
27
|
+
DO_NOT_EXPORT_SUBFIELDS.any? { |f| excludes?(f, tag, ind1, ind2, code) }
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def can_edit?(tag, ind1, ind2, code)
|
|
32
|
+
DO_NOT_EDIT.none? { |f| excludes?(f, tag, ind1, ind2, code) }
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
private
|
|
36
|
+
|
|
37
|
+
def decompose_data_field(df)
|
|
38
|
+
[df.tag, df.indicator1, df.indicator2]
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# TODO: test this more carefully
|
|
42
|
+
def excludes?(f, tag, ind1, ind2, code = nil)
|
|
43
|
+
return f == tag if f.size == 3
|
|
44
|
+
|
|
45
|
+
excludes_tag = f.start_with?(tag) && f[3] == ind1 && f[4] == ind2
|
|
46
|
+
code ? excludes_tag && code : excludes_tag
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|