berkeley_library-tind 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/build.yml +18 -0
- data/.gitignore +388 -0
- data/.idea/inspectionProfiles/Project_Default.xml +20 -0
- data/.idea/misc.xml +4 -0
- data/.idea/modules.xml +8 -0
- data/.idea/tind.iml +138 -0
- data/.idea/vcs.xml +6 -0
- data/.rubocop.yml +334 -0
- data/.ruby-version +1 -0
- data/.simplecov +8 -0
- data/.yardopts +1 -0
- data/CHANGES.md +58 -0
- data/Dockerfile +57 -0
- data/Gemfile +3 -0
- data/Jenkinsfile +18 -0
- data/LICENSE.md +21 -0
- data/README.md +73 -0
- data/Rakefile +20 -0
- data/berkeley_library-tind.gemspec +50 -0
- data/bin/tind-export +14 -0
- data/docker-compose.yml +15 -0
- data/lib/berkeley_library/tind.rb +3 -0
- data/lib/berkeley_library/tind/api.rb +1 -0
- data/lib/berkeley_library/tind/api/api.rb +132 -0
- data/lib/berkeley_library/tind/api/api_exception.rb +131 -0
- data/lib/berkeley_library/tind/api/collection.rb +82 -0
- data/lib/berkeley_library/tind/api/date_range.rb +67 -0
- data/lib/berkeley_library/tind/api/format.rb +32 -0
- data/lib/berkeley_library/tind/api/search.rb +100 -0
- data/lib/berkeley_library/tind/config.rb +103 -0
- data/lib/berkeley_library/tind/export.rb +1 -0
- data/lib/berkeley_library/tind/export/column.rb +54 -0
- data/lib/berkeley_library/tind/export/column_group.rb +144 -0
- data/lib/berkeley_library/tind/export/column_group_list.rb +131 -0
- data/lib/berkeley_library/tind/export/column_width_calculator.rb +76 -0
- data/lib/berkeley_library/tind/export/config.rb +154 -0
- data/lib/berkeley_library/tind/export/csv_exporter.rb +29 -0
- data/lib/berkeley_library/tind/export/export.rb +47 -0
- data/lib/berkeley_library/tind/export/export_command.rb +168 -0
- data/lib/berkeley_library/tind/export/export_exception.rb +8 -0
- data/lib/berkeley_library/tind/export/export_format.rb +67 -0
- data/lib/berkeley_library/tind/export/exporter.rb +105 -0
- data/lib/berkeley_library/tind/export/filter.rb +52 -0
- data/lib/berkeley_library/tind/export/no_results_error.rb +7 -0
- data/lib/berkeley_library/tind/export/ods_exporter.rb +138 -0
- data/lib/berkeley_library/tind/export/row.rb +24 -0
- data/lib/berkeley_library/tind/export/row_metrics.rb +18 -0
- data/lib/berkeley_library/tind/export/table.rb +175 -0
- data/lib/berkeley_library/tind/export/table_metrics.rb +116 -0
- data/lib/berkeley_library/tind/marc.rb +1 -0
- data/lib/berkeley_library/tind/marc/xml_reader.rb +144 -0
- data/lib/berkeley_library/tind/module_info.rb +14 -0
- data/lib/berkeley_library/util/arrays.rb +178 -0
- data/lib/berkeley_library/util/logging.rb +1 -0
- data/lib/berkeley_library/util/ods/spreadsheet.rb +170 -0
- data/lib/berkeley_library/util/ods/xml/content_doc.rb +26 -0
- data/lib/berkeley_library/util/ods/xml/document_node.rb +57 -0
- data/lib/berkeley_library/util/ods/xml/element_node.rb +106 -0
- data/lib/berkeley_library/util/ods/xml/loext/table_protection.rb +26 -0
- data/lib/berkeley_library/util/ods/xml/manifest/file_entry.rb +42 -0
- data/lib/berkeley_library/util/ods/xml/manifest/manifest.rb +73 -0
- data/lib/berkeley_library/util/ods/xml/manifest_doc.rb +26 -0
- data/lib/berkeley_library/util/ods/xml/namespace.rb +46 -0
- data/lib/berkeley_library/util/ods/xml/office/automatic_styles.rb +181 -0
- data/lib/berkeley_library/util/ods/xml/office/body.rb +17 -0
- data/lib/berkeley_library/util/ods/xml/office/document_content.rb +98 -0
- data/lib/berkeley_library/util/ods/xml/office/document_styles.rb +39 -0
- data/lib/berkeley_library/util/ods/xml/office/font_face_decls.rb +30 -0
- data/lib/berkeley_library/util/ods/xml/office/scripts.rb +17 -0
- data/lib/berkeley_library/util/ods/xml/office/spreadsheet.rb +37 -0
- data/lib/berkeley_library/util/ods/xml/office/styles.rb +39 -0
- data/lib/berkeley_library/util/ods/xml/style/cell_style.rb +58 -0
- data/lib/berkeley_library/util/ods/xml/style/column_style.rb +36 -0
- data/lib/berkeley_library/util/ods/xml/style/default_style.rb +31 -0
- data/lib/berkeley_library/util/ods/xml/style/family.rb +85 -0
- data/lib/berkeley_library/util/ods/xml/style/font_face.rb +46 -0
- data/lib/berkeley_library/util/ods/xml/style/paragraph_properties.rb +30 -0
- data/lib/berkeley_library/util/ods/xml/style/row_style.rb +37 -0
- data/lib/berkeley_library/util/ods/xml/style/style.rb +44 -0
- data/lib/berkeley_library/util/ods/xml/style/table_cell_properties.rb +40 -0
- data/lib/berkeley_library/util/ods/xml/style/table_column_properties.rb +30 -0
- data/lib/berkeley_library/util/ods/xml/style/table_properties.rb +25 -0
- data/lib/berkeley_library/util/ods/xml/style/table_row_properties.rb +28 -0
- data/lib/berkeley_library/util/ods/xml/style/table_style.rb +27 -0
- data/lib/berkeley_library/util/ods/xml/style/text_properties.rb +52 -0
- data/lib/berkeley_library/util/ods/xml/styles_doc.rb +26 -0
- data/lib/berkeley_library/util/ods/xml/table/named_expressions.rb +17 -0
- data/lib/berkeley_library/util/ods/xml/table/repeatable.rb +38 -0
- data/lib/berkeley_library/util/ods/xml/table/table.rb +193 -0
- data/lib/berkeley_library/util/ods/xml/table/table_cell.rb +46 -0
- data/lib/berkeley_library/util/ods/xml/table/table_column.rb +43 -0
- data/lib/berkeley_library/util/ods/xml/table/table_row.rb +136 -0
- data/lib/berkeley_library/util/ods/xml/text/p.rb +118 -0
- data/lib/berkeley_library/util/paths.rb +111 -0
- data/lib/berkeley_library/util/stringios.rb +30 -0
- data/lib/berkeley_library/util/strings.rb +42 -0
- data/lib/berkeley_library/util/sys_exits.rb +15 -0
- data/lib/berkeley_library/util/times.rb +22 -0
- data/lib/berkeley_library/util/uris.rb +44 -0
- data/lib/berkeley_library/util/uris/appender.rb +162 -0
- data/lib/berkeley_library/util/uris/requester.rb +62 -0
- data/lib/berkeley_library/util/uris/validator.rb +32 -0
- data/rakelib/bundle.rake +8 -0
- data/rakelib/coverage.rake +11 -0
- data/rakelib/gem.rake +54 -0
- data/rakelib/rubocop.rake +18 -0
- data/rakelib/spec.rake +2 -0
- data/spec/.rubocop.yml +40 -0
- data/spec/berkeley_library/tind/api/api_exception_spec.rb +91 -0
- data/spec/berkeley_library/tind/api/api_spec.rb +143 -0
- data/spec/berkeley_library/tind/api/collection_spec.rb +74 -0
- data/spec/berkeley_library/tind/api/date_range_spec.rb +110 -0
- data/spec/berkeley_library/tind/api/format_spec.rb +54 -0
- data/spec/berkeley_library/tind/api/search_spec.rb +364 -0
- data/spec/berkeley_library/tind/config_spec.rb +86 -0
- data/spec/berkeley_library/tind/export/column_group_spec.rb +29 -0
- data/spec/berkeley_library/tind/export/column_spec.rb +43 -0
- data/spec/berkeley_library/tind/export/config_spec.rb +206 -0
- data/spec/berkeley_library/tind/export/export_command_spec.rb +169 -0
- data/spec/berkeley_library/tind/export/export_format_spec.rb +59 -0
- data/spec/berkeley_library/tind/export/export_matcher.rb +112 -0
- data/spec/berkeley_library/tind/export/export_spec.rb +150 -0
- data/spec/berkeley_library/tind/export/exporter_spec.rb +125 -0
- data/spec/berkeley_library/tind/export/row_spec.rb +118 -0
- data/spec/berkeley_library/tind/export/table_spec.rb +322 -0
- data/spec/berkeley_library/tind/marc/xml_reader_spec.rb +93 -0
- data/spec/berkeley_library/util/arrays_spec.rb +340 -0
- data/spec/berkeley_library/util/ods/spreadsheet_spec.rb +124 -0
- data/spec/berkeley_library/util/ods/xml/content_doc_spec.rb +121 -0
- data/spec/berkeley_library/util/ods/xml/manifest/file_entry_spec.rb +27 -0
- data/spec/berkeley_library/util/ods/xml/manifest/manifest_spec.rb +33 -0
- data/spec/berkeley_library/util/ods/xml/office/document_content_spec.rb +60 -0
- data/spec/berkeley_library/util/ods/xml/style/automatic_styles_spec.rb +37 -0
- data/spec/berkeley_library/util/ods/xml/style/family_spec.rb +57 -0
- data/spec/berkeley_library/util/ods/xml/table/table_row_spec.rb +179 -0
- data/spec/berkeley_library/util/ods/xml/table/table_spec.rb +218 -0
- data/spec/berkeley_library/util/paths_spec.rb +90 -0
- data/spec/berkeley_library/util/stringios_spec.rb +34 -0
- data/spec/berkeley_library/util/strings_spec.rb +27 -0
- data/spec/berkeley_library/util/times_spec.rb +39 -0
- data/spec/berkeley_library/util/uris_spec.rb +118 -0
- data/spec/data/collection-names.txt +438 -0
- data/spec/data/collections.json +4827 -0
- data/spec/data/disjoint-records.xml +187 -0
- data/spec/data/record-184453.xml +58 -0
- data/spec/data/record-184458.xml +63 -0
- data/spec/data/record-187888.xml +78 -0
- data/spec/data/records-api-search-cjk-p1.xml +6381 -0
- data/spec/data/records-api-search-cjk-p2.xml +5 -0
- data/spec/data/records-api-search-p1.xml +4506 -0
- data/spec/data/records-api-search-p2.xml +4509 -0
- data/spec/data/records-api-search-p3.xml +4506 -0
- data/spec/data/records-api-search-p4.xml +4509 -0
- data/spec/data/records-api-search-p5.xml +4506 -0
- data/spec/data/records-api-search-p6.xml +2436 -0
- data/spec/data/records-api-search-p7.xml +5 -0
- data/spec/data/records-api-search.xml +234 -0
- data/spec/data/records-manual-search.xml +547 -0
- data/spec/spec_helper.rb +30 -0
- data/test/profile/table_from_records_profile.rb +46 -0
- metadata +585 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
Dir.glob(File.expand_path('export/*.rb', __dir__)).sort.each(&method(:require))
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
require 'marc_extensions'
|
|
2
|
+
|
|
3
|
+
module BerkeleyLibrary
|
|
4
|
+
module TIND
|
|
5
|
+
module Export
|
|
6
|
+
class Column
|
|
7
|
+
|
|
8
|
+
# @return [ColumnGroup] the group containing this column
|
|
9
|
+
attr_reader :column_group
|
|
10
|
+
|
|
11
|
+
# @return [Integer] the index of this column in the group
|
|
12
|
+
attr_reader :col_in_group
|
|
13
|
+
|
|
14
|
+
# Initializes a new column
|
|
15
|
+
#
|
|
16
|
+
# @param column_group [ColumnGroup] the group containing this column
|
|
17
|
+
# @param col_in_group [Integer] the index of this column in the group
|
|
18
|
+
def initialize(column_group, col_in_group)
|
|
19
|
+
@column_group = column_group
|
|
20
|
+
@col_in_group = col_in_group
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def header
|
|
24
|
+
# NOTE: that TIND "-#" suffixes must be unique by tag, not tag + ind1 + ind2
|
|
25
|
+
@header ||= "#{column_group.prefix}#{subfield_code}-#{1 + column_group.index_in_tag}"
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def subfield_code
|
|
29
|
+
@subfield_code ||= column_group.subfield_codes[col_in_group]
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def value_at(row)
|
|
33
|
+
column_group.value_at(row, col_in_group)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def can_edit?
|
|
37
|
+
@can_edit ||= Filter.can_edit?(
|
|
38
|
+
column_group.tag,
|
|
39
|
+
column_group.ind1,
|
|
40
|
+
column_group.ind2,
|
|
41
|
+
subfield_code
|
|
42
|
+
)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def each_value(include_header: false)
|
|
46
|
+
return to_enum(:each_value, include_header: include_header) unless block_given?
|
|
47
|
+
|
|
48
|
+
yield header if include_header
|
|
49
|
+
column_group.row_count.times { |row| yield value_at(row) }
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
require 'berkeley_library/tind/export/column'
|
|
2
|
+
require 'berkeley_library/util/arrays'
|
|
3
|
+
require 'berkeley_library/util/strings'
|
|
4
|
+
|
|
5
|
+
module BerkeleyLibrary
|
|
6
|
+
module TIND
|
|
7
|
+
module Export
|
|
8
|
+
|
|
9
|
+
# A group of columns representing the subfields of a particular
|
|
10
|
+
# data field.
|
|
11
|
+
class ColumnGroup
|
|
12
|
+
include BerkeleyLibrary::Util::Arrays
|
|
13
|
+
|
|
14
|
+
# ------------------------------------------------------------
|
|
15
|
+
# Constants
|
|
16
|
+
|
|
17
|
+
# Indicators SHOULD NOT be capital letters, but TIND internal fields
|
|
18
|
+
# don't respect that. Thus the /i flag.
|
|
19
|
+
INDICATOR_RE = /^[0-9a-z ]$/i.freeze
|
|
20
|
+
|
|
21
|
+
SUBFIELD_CODE_RE = /^[0-9a-z]$/.freeze
|
|
22
|
+
|
|
23
|
+
# ------------------------------------------------------------
|
|
24
|
+
# Accessors
|
|
25
|
+
|
|
26
|
+
attr_reader :tag, :index_in_tag, :ind1, :ind2, :subfield_codes
|
|
27
|
+
|
|
28
|
+
# ------------------------------------------------------------
|
|
29
|
+
# Initializer
|
|
30
|
+
|
|
31
|
+
def initialize(tag, index_in_tag, ind1, ind2, subfield_codes)
|
|
32
|
+
@tag, @ind1, @ind2 = valid_tag_and_indicators(tag, ind1, ind2)
|
|
33
|
+
@subfield_codes = valid_subfield_codes(subfield_codes).dup.freeze
|
|
34
|
+
@index_in_tag = index_in_tag
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# ------------------------------------------------------------
|
|
38
|
+
# Class methods
|
|
39
|
+
|
|
40
|
+
class << self
|
|
41
|
+
|
|
42
|
+
def prefix_for(data_field)
|
|
43
|
+
format_prefix(data_field.tag, data_field.indicator1, data_field.indicator2)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def format_indicator(ind)
|
|
47
|
+
ind == ' ' ? '_' : ind
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def format_prefix(tag, ind1, ind2)
|
|
51
|
+
[tag, format_indicator(ind1), format_indicator(ind2)].join
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# ------------------------------------------------------------
|
|
56
|
+
# Instance methods
|
|
57
|
+
|
|
58
|
+
def prefix
|
|
59
|
+
ColumnGroup.format_prefix(tag, ind1, ind2)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def maybe_add_at(row, data_field)
|
|
63
|
+
warn "Data field at row #{row} is not frozen: #{data_field}" unless data_field.subfields.frozen?
|
|
64
|
+
# set nil explicitly so row_count etc. are correct
|
|
65
|
+
return (data_fields[row] = nil) unless can_add?(data_field)
|
|
66
|
+
|
|
67
|
+
@subfield_codes = merge(subfield_codes, data_field.subfield_codes)
|
|
68
|
+
data_fields[row] = data_field
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def value_at(row, col)
|
|
72
|
+
return unless (data_field = data_fields[row])
|
|
73
|
+
return unless (subfield_indices = subfield_indices_for(row))
|
|
74
|
+
return unless (subfield_index = subfield_indices[col])
|
|
75
|
+
return unless (subfield = data_field.subfields[subfield_index])
|
|
76
|
+
|
|
77
|
+
subfield.value
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def columns
|
|
81
|
+
@columns ||= (0...subfield_codes.length).map { |col| Column.new(self, col) }
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def row_count
|
|
85
|
+
data_fields.size
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# ------------------------------------------------------------
|
|
89
|
+
# Object overrides
|
|
90
|
+
|
|
91
|
+
def to_s
|
|
92
|
+
"ColumnGroup #{tag}-#{index_in_tag}:" + [prefix, subfield_codes.join].join
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# ------------------------------------------------------------
|
|
96
|
+
# Private methods
|
|
97
|
+
|
|
98
|
+
private
|
|
99
|
+
|
|
100
|
+
def valid_tag_and_indicators(tag, ind1, ind2)
|
|
101
|
+
raise ArgumentError, "#{tag}#{ind1}#{ind2}: not a valid tag" unless tag.size == 3 && BerkeleyLibrary::Util::Strings.ascii_numeric?(tag)
|
|
102
|
+
raise ArgumentError, "#{tag}#{ind1}#{ind2}: not a valid indicator: #{ind1.inspect}" unless ind1 =~ INDICATOR_RE
|
|
103
|
+
raise ArgumentError, "#{tag}#{ind1}#{ind2}: not a valid indicator: #{ind2.inspect}" unless ind2 =~ INDICATOR_RE
|
|
104
|
+
|
|
105
|
+
[tag, ind1, ind2]
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def valid_subfield_codes(subfield_codes)
|
|
109
|
+
subfield_codes.tap do |scc|
|
|
110
|
+
raise ArgumentError, "Invalid subfield codes: #{scc.inspect}" unless scc.all? { |c| c =~ SUBFIELD_CODE_RE }
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def can_add?(data_field)
|
|
115
|
+
data_field.tag == tag &&
|
|
116
|
+
data_field.indicator1 == ind1 &&
|
|
117
|
+
data_field.indicator2 == ind2
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def subfield_indices_for(row)
|
|
121
|
+
return cached_subfield_indices[row] if row < cached_subfield_indices.size
|
|
122
|
+
return unless (data_field = data_fields[row])
|
|
123
|
+
|
|
124
|
+
cached_subfield_indices[row] = find_subfield_indices(data_field)
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def cached_subfield_indices
|
|
128
|
+
@cached_subfield_indices ||= []
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def find_subfield_indices(data_field)
|
|
132
|
+
return unless can_add?(data_field)
|
|
133
|
+
|
|
134
|
+
df_index_to_cg_index = find_indices(in_array: subfield_codes, for_array: data_field.subfield_codes)
|
|
135
|
+
invert(df_index_to_cg_index)
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def data_fields
|
|
139
|
+
@data_fields ||= []
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
end
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
require 'berkeley_library/tind/export/filter'
|
|
2
|
+
require 'berkeley_library/tind/export/column_group'
|
|
3
|
+
require 'berkeley_library/tind/export/column'
|
|
4
|
+
require 'berkeley_library/tind/export/export_exception'
|
|
5
|
+
require 'berkeley_library/tind/export/row'
|
|
6
|
+
require 'berkeley_library/util/arrays'
|
|
7
|
+
|
|
8
|
+
module BerkeleyLibrary
|
|
9
|
+
module TIND
|
|
10
|
+
module Export
|
|
11
|
+
class ColumnGroupList
|
|
12
|
+
include Enumerable
|
|
13
|
+
|
|
14
|
+
# ------------------------------------------------------------
|
|
15
|
+
# Initializer
|
|
16
|
+
|
|
17
|
+
def initialize(exportable_only: false)
|
|
18
|
+
@exportable_only = exportable_only
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# ------------------------------------------------------------
|
|
22
|
+
# Accessors
|
|
23
|
+
|
|
24
|
+
def exportable_only?
|
|
25
|
+
@exportable_only
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# ------------------------------------------------------------
|
|
29
|
+
# Misc. instance methods
|
|
30
|
+
|
|
31
|
+
def all_groups
|
|
32
|
+
# NOTE: this isn't ||= because we only cache on #freeze
|
|
33
|
+
@all_groups || begin
|
|
34
|
+
all_tags = column_groups_by_tag.keys.sort
|
|
35
|
+
all_tags.each_with_object([]) do |tag, groups|
|
|
36
|
+
tag_column_groups = column_groups_by_tag[tag]
|
|
37
|
+
groups.concat(tag_column_groups)
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def add_data_fields(marc_record, row)
|
|
43
|
+
# TODO: what about control fields?
|
|
44
|
+
marc_record.data_fields_by_tag.each do |tag, data_fields|
|
|
45
|
+
next unless can_export_tag(tag)
|
|
46
|
+
next if data_fields.empty?
|
|
47
|
+
|
|
48
|
+
add_fields_at(data_fields, row)
|
|
49
|
+
end
|
|
50
|
+
rescue StandardError => e
|
|
51
|
+
raise Export::ExportException, "Error adding MARC record #{marc_record.record_id} at row #{row}: #{e.message}"
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# ------------------------------------------------------------
|
|
55
|
+
# Enumerable
|
|
56
|
+
|
|
57
|
+
def each(&block)
|
|
58
|
+
all_groups.each(&block)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# ------------------------------------------------------------
|
|
62
|
+
# Object overrides
|
|
63
|
+
|
|
64
|
+
def freeze
|
|
65
|
+
column_groups_by_tag.each_value(&:freeze)
|
|
66
|
+
column_groups_by_tag.freeze
|
|
67
|
+
@all_groups ||= all_groups.freeze
|
|
68
|
+
self
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def frozen?
|
|
72
|
+
column_groups_by_tag.frozen? &&
|
|
73
|
+
@all_groups && @all_groups.frozen?
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# ------------------------------------------------------------
|
|
77
|
+
# Private methods
|
|
78
|
+
|
|
79
|
+
private
|
|
80
|
+
|
|
81
|
+
def column_groups_by_tag
|
|
82
|
+
@column_groups_by_tag ||= {}
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def add_fields_at(data_fields, row)
|
|
86
|
+
tag = data_fields[0].tag.freeze
|
|
87
|
+
tag_column_groups = (column_groups_by_tag[tag] ||= [])
|
|
88
|
+
|
|
89
|
+
data_fields.inject(0) do |offset, df|
|
|
90
|
+
next offset unless can_export_df(df)
|
|
91
|
+
|
|
92
|
+
1 + add_data_field(df, row, tag_column_groups, at_or_after: offset)
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def add_data_field(df, row, tag_column_groups, at_or_after: 0)
|
|
97
|
+
added_at = added_at_index(df, row, tag_column_groups, at_or_after)
|
|
98
|
+
return added_at if added_at
|
|
99
|
+
|
|
100
|
+
new_group = ColumnGroup.new(df.tag, tag_column_groups.size, df.indicator1, df.indicator2, exportable_subfield_codes(df)).tap do |cg|
|
|
101
|
+
raise Export::ExportException, "Unexpected failure to add #{df} to #{cg}" unless cg.maybe_add_at(row, df)
|
|
102
|
+
end
|
|
103
|
+
tag_column_groups << new_group
|
|
104
|
+
tag_column_groups.size - 1
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def added_at_index(df, row, tag_column_groups, at_or_after)
|
|
108
|
+
BerkeleyLibrary::Util::Arrays.find_index(in_array: tag_column_groups, start_index: at_or_after) { |cg| cg.maybe_add_at(row, df) }
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def can_export_tag(tag)
|
|
112
|
+
return true unless exportable_only?
|
|
113
|
+
|
|
114
|
+
Filter.can_export_tag?(tag)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def can_export_df(df)
|
|
118
|
+
return true unless exportable_only?
|
|
119
|
+
|
|
120
|
+
Filter.can_export_data_field?(df)
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def exportable_subfield_codes(df)
|
|
124
|
+
return df.subfield_codes unless exportable_only?
|
|
125
|
+
|
|
126
|
+
Filter.exportable_subfield_codes(df)
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
end
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
require 'berkeley_library/tind/export/config'
|
|
2
|
+
|
|
3
|
+
module BerkeleyLibrary
|
|
4
|
+
module TIND
|
|
5
|
+
module Export
|
|
6
|
+
# Calculates approximate column widths for cell values, based on
|
|
7
|
+
# Arial average character widths ()in units of 1/1000 point size) per
|
|
8
|
+
# {https://www.math.utah.edu/~beebe/fonts/afm-widths.html this table}.
|
|
9
|
+
# (LibreOffice default is Liberation Sans, which should match Arial.)
|
|
10
|
+
#
|
|
11
|
+
# CJK and fullwidth characters will probably be mapped to another font,
|
|
12
|
+
# but it's probably going to be roughly square.
|
|
13
|
+
#
|
|
14
|
+
# Non-Western, non-CJK characters will *hopefully* not be much wider
|
|
15
|
+
# than their Western counterparts.
|
|
16
|
+
module ColumnWidthCalculator
|
|
17
|
+
include Config
|
|
18
|
+
|
|
19
|
+
WIDTH_UNIT = 1000.0
|
|
20
|
+
|
|
21
|
+
WIDTH_LOWER = 489.46
|
|
22
|
+
|
|
23
|
+
WIDTH_UPPER = 677.42
|
|
24
|
+
|
|
25
|
+
WIDTH_DIGIT = 556.0
|
|
26
|
+
|
|
27
|
+
# Measured empirically in LibreOffice 6.4.7.2
|
|
28
|
+
WIDTH_CJK = 970.0
|
|
29
|
+
|
|
30
|
+
WIDTHS = {
|
|
31
|
+
/[\u4e00-\u9fff]/ => WIDTH_CJK, # CJK (excluding half-width forms)
|
|
32
|
+
/[\uff01-\uff65\uffe0-\uffee]/ => WIDTH_CJK, # Fullwidth forms
|
|
33
|
+
/[[:digit:]]/ => WIDTH_DIGIT,
|
|
34
|
+
/[[:upper:]]/ => WIDTH_UPPER,
|
|
35
|
+
/[[:lower:]]/ => WIDTH_LOWER,
|
|
36
|
+
/[[:space:]]/ => 2 * WIDTH_LOWER / 3 # empirical
|
|
37
|
+
}.freeze
|
|
38
|
+
|
|
39
|
+
# See {WIDTHS}
|
|
40
|
+
WIDTH_DEFAULT = WIDTH_DIGIT # Fallback to digit width for other characters
|
|
41
|
+
|
|
42
|
+
def width_ps_units(str)
|
|
43
|
+
return 0 if str.nil? || str.empty?
|
|
44
|
+
|
|
45
|
+
chars = str.unicode_normalize.chars
|
|
46
|
+
chars.inject(0) { |total, c| total + width_for_char(c) }
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def width_points(str, font_size_points = font_size_pt)
|
|
50
|
+
width_per_point(str) * font_size_points
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def width_inches(str, font_size_points = font_size_pt)
|
|
54
|
+
return 0 if str.nil? || str.empty?
|
|
55
|
+
|
|
56
|
+
width_points(str, font_size_points) / 72.0
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
private
|
|
60
|
+
|
|
61
|
+
def width_per_point(str)
|
|
62
|
+
width_ps_units(str) / WIDTH_UNIT
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def width_for_char(c)
|
|
66
|
+
WIDTHS.each { |re, w| return w if c =~ re }
|
|
67
|
+
WIDTH_DEFAULT
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
class << self
|
|
71
|
+
include ColumnWidthCalculator
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
module BerkeleyLibrary
|
|
2
|
+
module TIND
|
|
3
|
+
module Export
|
|
4
|
+
module Config
|
|
5
|
+
|
|
6
|
+
# Font size in points
|
|
7
|
+
FONT_SIZE_DEFAULT = 10.0
|
|
8
|
+
|
|
9
|
+
# Round column widths up to nearest eighth inch
|
|
10
|
+
WIDTH_INCREMENT_DEFAULT_INCHES = '1/8'.to_r
|
|
11
|
+
|
|
12
|
+
# Round row heights up to nearest 2 points
|
|
13
|
+
HEIGHT_INCREMENT_DEFAULT_POINTS = 2
|
|
14
|
+
|
|
15
|
+
# Max column width before wrapping
|
|
16
|
+
MAX_COLUMN_WIDTH_INCHES = 5.0
|
|
17
|
+
|
|
18
|
+
# Decimal places for formatting
|
|
19
|
+
FORMAT_DIGITS_DEFAULT = 3
|
|
20
|
+
|
|
21
|
+
# Line height as multiple of font size
|
|
22
|
+
LINE_HEIGHT_DEFAULT_EM = '4/3'.to_r
|
|
23
|
+
|
|
24
|
+
# @return [Numeric] the font size in points
|
|
25
|
+
def font_size_pt
|
|
26
|
+
Config.font_size_pt
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# @return [Numeric] the max column width in inches
|
|
30
|
+
def max_col_width_in
|
|
31
|
+
Config.max_col_width_in
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# @return [Numeric] the width rounding increment in inches
|
|
35
|
+
def w_incr_in
|
|
36
|
+
Config.w_incr_in
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# @return [Numeric] the height rounding increment in points
|
|
40
|
+
def h_incr_pt
|
|
41
|
+
Config.h_incr_pt
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# @return [Numeric] the line height in ems (multiples of the font point size)
|
|
45
|
+
def line_height_em
|
|
46
|
+
Config.line_height_em
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# @return [Integer] the number of digits to use when formatting values
|
|
50
|
+
def format_digits
|
|
51
|
+
Config.format_digits
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# noinspection RubyYardReturnMatch
|
|
55
|
+
class << self
|
|
56
|
+
|
|
57
|
+
# @return [Numeric] the font size in points
|
|
58
|
+
def font_size_pt
|
|
59
|
+
@font_size_pt ||= ensure_positive_numeric(ENV['ODS_FONT_SIZE_DEFAULT'] || Config::FONT_SIZE_DEFAULT)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def font_size_pt=(value)
|
|
63
|
+
@font_size_pt = ensure_positive_numeric(value)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# @return [Numeric] the max column width in inches
|
|
67
|
+
def max_col_width_in
|
|
68
|
+
@max_col_width_in ||= ensure_positive_numeric(ENV['ODS_MAX_COLUMN_WIDTH_INCHES'] || Config::MAX_COLUMN_WIDTH_INCHES)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def max_col_width_in=(value)
|
|
72
|
+
@max_col_width_in = ensure_positive_numeric(value)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# @return [Numeric] the width rounding increment in inches
|
|
76
|
+
def w_incr_in
|
|
77
|
+
@w_incr_in ||= ensure_positive_numeric(ENV['ODS_WIDTH_INCREMENT_DEFAULT_INCHES'] || Config::WIDTH_INCREMENT_DEFAULT_INCHES)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def w_incr_in=(value)
|
|
81
|
+
@w_incr_in = ensure_positive_numeric(value)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# @return [Numeric] the height rounding increment in points
|
|
85
|
+
def h_incr_pt
|
|
86
|
+
@h_incr_pt ||= ensure_positive_numeric(ENV['ODS_HEIGHT_INCREMENT_DEFAULT_POINTS'] || Config::HEIGHT_INCREMENT_DEFAULT_POINTS)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def h_incr_pt=(value)
|
|
90
|
+
@h_incr_pt = ensure_positive_numeric(value)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# @return [Numeric] the line height in ems (multiples of the font point size)
|
|
94
|
+
def line_height_em
|
|
95
|
+
@line_height_em ||= ensure_positive_numeric(ENV['ODS_LINE_HEIGHT_DEFAULT_EM'] || Config::LINE_HEIGHT_DEFAULT_EM)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def line_height_em=(value)
|
|
99
|
+
@line_height_em = ensure_positive_numeric(value)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# @return [Integer] the number of digits to use when formatting values
|
|
103
|
+
def format_digits
|
|
104
|
+
@format_digits ||= ensure_positive_int(ENV['ODS_FORMAT_DIGITS_DEFAULT'] || Config::FORMAT_DIGITS_DEFAULT)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def format_digits=(value)
|
|
108
|
+
@format_digits = ensure_positive_int(value)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
private
|
|
112
|
+
|
|
113
|
+
# @param v [Object] a value
|
|
114
|
+
# @return [Numeric] a numeric value, or nil if the value is not numeric
|
|
115
|
+
def ensure_positive_numeric(v)
|
|
116
|
+
v_n = ensure_numeric(v)
|
|
117
|
+
return v_n if v_n > 0
|
|
118
|
+
|
|
119
|
+
raise ArgumentError, "Value must be positive: #{v_n}"
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def ensure_numeric(v)
|
|
123
|
+
return v if v.is_a?(Numeric)
|
|
124
|
+
|
|
125
|
+
v_str = v.to_s
|
|
126
|
+
return v_str.to_r if v_str.include?('/')
|
|
127
|
+
return v_str.to_f if v_str.include?('.')
|
|
128
|
+
return Integer(v_str) if v_str =~ /(?:0x\h+|\d+)/
|
|
129
|
+
|
|
130
|
+
raise ArgumentError, "Can't parse #{v.inspect} as a numeric value"
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# @param v [Object] a value
|
|
134
|
+
# @return [Integer]
|
|
135
|
+
def ensure_positive_int(v)
|
|
136
|
+
v_i = ensure_int(v)
|
|
137
|
+
return v_i if v_i > 0
|
|
138
|
+
|
|
139
|
+
raise ArgumentError, "Value must be positive: #{v_i}"
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def ensure_int(v)
|
|
143
|
+
return v if v.is_a?(Integer)
|
|
144
|
+
|
|
145
|
+
v_str = v.to_s
|
|
146
|
+
return Integer(v_str) if v_str =~ /(?:0x\h+|\d+)/
|
|
147
|
+
|
|
148
|
+
raise ArgumentError, "Can't parse #{v.inspect} as an integer value"
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
end
|