berkeley_library-tind 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/build.yml +18 -0
  3. data/.gitignore +388 -0
  4. data/.idea/inspectionProfiles/Project_Default.xml +20 -0
  5. data/.idea/misc.xml +4 -0
  6. data/.idea/modules.xml +8 -0
  7. data/.idea/tind.iml +138 -0
  8. data/.idea/vcs.xml +6 -0
  9. data/.rubocop.yml +334 -0
  10. data/.ruby-version +1 -0
  11. data/.simplecov +8 -0
  12. data/.yardopts +1 -0
  13. data/CHANGES.md +58 -0
  14. data/Dockerfile +57 -0
  15. data/Gemfile +3 -0
  16. data/Jenkinsfile +18 -0
  17. data/LICENSE.md +21 -0
  18. data/README.md +73 -0
  19. data/Rakefile +20 -0
  20. data/berkeley_library-tind.gemspec +50 -0
  21. data/bin/tind-export +14 -0
  22. data/docker-compose.yml +15 -0
  23. data/lib/berkeley_library/tind.rb +3 -0
  24. data/lib/berkeley_library/tind/api.rb +1 -0
  25. data/lib/berkeley_library/tind/api/api.rb +132 -0
  26. data/lib/berkeley_library/tind/api/api_exception.rb +131 -0
  27. data/lib/berkeley_library/tind/api/collection.rb +82 -0
  28. data/lib/berkeley_library/tind/api/date_range.rb +67 -0
  29. data/lib/berkeley_library/tind/api/format.rb +32 -0
  30. data/lib/berkeley_library/tind/api/search.rb +100 -0
  31. data/lib/berkeley_library/tind/config.rb +103 -0
  32. data/lib/berkeley_library/tind/export.rb +1 -0
  33. data/lib/berkeley_library/tind/export/column.rb +54 -0
  34. data/lib/berkeley_library/tind/export/column_group.rb +144 -0
  35. data/lib/berkeley_library/tind/export/column_group_list.rb +131 -0
  36. data/lib/berkeley_library/tind/export/column_width_calculator.rb +76 -0
  37. data/lib/berkeley_library/tind/export/config.rb +154 -0
  38. data/lib/berkeley_library/tind/export/csv_exporter.rb +29 -0
  39. data/lib/berkeley_library/tind/export/export.rb +47 -0
  40. data/lib/berkeley_library/tind/export/export_command.rb +168 -0
  41. data/lib/berkeley_library/tind/export/export_exception.rb +8 -0
  42. data/lib/berkeley_library/tind/export/export_format.rb +67 -0
  43. data/lib/berkeley_library/tind/export/exporter.rb +105 -0
  44. data/lib/berkeley_library/tind/export/filter.rb +52 -0
  45. data/lib/berkeley_library/tind/export/no_results_error.rb +7 -0
  46. data/lib/berkeley_library/tind/export/ods_exporter.rb +138 -0
  47. data/lib/berkeley_library/tind/export/row.rb +24 -0
  48. data/lib/berkeley_library/tind/export/row_metrics.rb +18 -0
  49. data/lib/berkeley_library/tind/export/table.rb +175 -0
  50. data/lib/berkeley_library/tind/export/table_metrics.rb +116 -0
  51. data/lib/berkeley_library/tind/marc.rb +1 -0
  52. data/lib/berkeley_library/tind/marc/xml_reader.rb +144 -0
  53. data/lib/berkeley_library/tind/module_info.rb +14 -0
  54. data/lib/berkeley_library/util/arrays.rb +178 -0
  55. data/lib/berkeley_library/util/logging.rb +1 -0
  56. data/lib/berkeley_library/util/ods/spreadsheet.rb +170 -0
  57. data/lib/berkeley_library/util/ods/xml/content_doc.rb +26 -0
  58. data/lib/berkeley_library/util/ods/xml/document_node.rb +57 -0
  59. data/lib/berkeley_library/util/ods/xml/element_node.rb +106 -0
  60. data/lib/berkeley_library/util/ods/xml/loext/table_protection.rb +26 -0
  61. data/lib/berkeley_library/util/ods/xml/manifest/file_entry.rb +42 -0
  62. data/lib/berkeley_library/util/ods/xml/manifest/manifest.rb +73 -0
  63. data/lib/berkeley_library/util/ods/xml/manifest_doc.rb +26 -0
  64. data/lib/berkeley_library/util/ods/xml/namespace.rb +46 -0
  65. data/lib/berkeley_library/util/ods/xml/office/automatic_styles.rb +181 -0
  66. data/lib/berkeley_library/util/ods/xml/office/body.rb +17 -0
  67. data/lib/berkeley_library/util/ods/xml/office/document_content.rb +98 -0
  68. data/lib/berkeley_library/util/ods/xml/office/document_styles.rb +39 -0
  69. data/lib/berkeley_library/util/ods/xml/office/font_face_decls.rb +30 -0
  70. data/lib/berkeley_library/util/ods/xml/office/scripts.rb +17 -0
  71. data/lib/berkeley_library/util/ods/xml/office/spreadsheet.rb +37 -0
  72. data/lib/berkeley_library/util/ods/xml/office/styles.rb +39 -0
  73. data/lib/berkeley_library/util/ods/xml/style/cell_style.rb +58 -0
  74. data/lib/berkeley_library/util/ods/xml/style/column_style.rb +36 -0
  75. data/lib/berkeley_library/util/ods/xml/style/default_style.rb +31 -0
  76. data/lib/berkeley_library/util/ods/xml/style/family.rb +85 -0
  77. data/lib/berkeley_library/util/ods/xml/style/font_face.rb +46 -0
  78. data/lib/berkeley_library/util/ods/xml/style/paragraph_properties.rb +30 -0
  79. data/lib/berkeley_library/util/ods/xml/style/row_style.rb +37 -0
  80. data/lib/berkeley_library/util/ods/xml/style/style.rb +44 -0
  81. data/lib/berkeley_library/util/ods/xml/style/table_cell_properties.rb +40 -0
  82. data/lib/berkeley_library/util/ods/xml/style/table_column_properties.rb +30 -0
  83. data/lib/berkeley_library/util/ods/xml/style/table_properties.rb +25 -0
  84. data/lib/berkeley_library/util/ods/xml/style/table_row_properties.rb +28 -0
  85. data/lib/berkeley_library/util/ods/xml/style/table_style.rb +27 -0
  86. data/lib/berkeley_library/util/ods/xml/style/text_properties.rb +52 -0
  87. data/lib/berkeley_library/util/ods/xml/styles_doc.rb +26 -0
  88. data/lib/berkeley_library/util/ods/xml/table/named_expressions.rb +17 -0
  89. data/lib/berkeley_library/util/ods/xml/table/repeatable.rb +38 -0
  90. data/lib/berkeley_library/util/ods/xml/table/table.rb +193 -0
  91. data/lib/berkeley_library/util/ods/xml/table/table_cell.rb +46 -0
  92. data/lib/berkeley_library/util/ods/xml/table/table_column.rb +43 -0
  93. data/lib/berkeley_library/util/ods/xml/table/table_row.rb +136 -0
  94. data/lib/berkeley_library/util/ods/xml/text/p.rb +118 -0
  95. data/lib/berkeley_library/util/paths.rb +111 -0
  96. data/lib/berkeley_library/util/stringios.rb +30 -0
  97. data/lib/berkeley_library/util/strings.rb +42 -0
  98. data/lib/berkeley_library/util/sys_exits.rb +15 -0
  99. data/lib/berkeley_library/util/times.rb +22 -0
  100. data/lib/berkeley_library/util/uris.rb +44 -0
  101. data/lib/berkeley_library/util/uris/appender.rb +162 -0
  102. data/lib/berkeley_library/util/uris/requester.rb +62 -0
  103. data/lib/berkeley_library/util/uris/validator.rb +32 -0
  104. data/rakelib/bundle.rake +8 -0
  105. data/rakelib/coverage.rake +11 -0
  106. data/rakelib/gem.rake +54 -0
  107. data/rakelib/rubocop.rake +18 -0
  108. data/rakelib/spec.rake +2 -0
  109. data/spec/.rubocop.yml +40 -0
  110. data/spec/berkeley_library/tind/api/api_exception_spec.rb +91 -0
  111. data/spec/berkeley_library/tind/api/api_spec.rb +143 -0
  112. data/spec/berkeley_library/tind/api/collection_spec.rb +74 -0
  113. data/spec/berkeley_library/tind/api/date_range_spec.rb +110 -0
  114. data/spec/berkeley_library/tind/api/format_spec.rb +54 -0
  115. data/spec/berkeley_library/tind/api/search_spec.rb +364 -0
  116. data/spec/berkeley_library/tind/config_spec.rb +86 -0
  117. data/spec/berkeley_library/tind/export/column_group_spec.rb +29 -0
  118. data/spec/berkeley_library/tind/export/column_spec.rb +43 -0
  119. data/spec/berkeley_library/tind/export/config_spec.rb +206 -0
  120. data/spec/berkeley_library/tind/export/export_command_spec.rb +169 -0
  121. data/spec/berkeley_library/tind/export/export_format_spec.rb +59 -0
  122. data/spec/berkeley_library/tind/export/export_matcher.rb +112 -0
  123. data/spec/berkeley_library/tind/export/export_spec.rb +150 -0
  124. data/spec/berkeley_library/tind/export/exporter_spec.rb +125 -0
  125. data/spec/berkeley_library/tind/export/row_spec.rb +118 -0
  126. data/spec/berkeley_library/tind/export/table_spec.rb +322 -0
  127. data/spec/berkeley_library/tind/marc/xml_reader_spec.rb +93 -0
  128. data/spec/berkeley_library/util/arrays_spec.rb +340 -0
  129. data/spec/berkeley_library/util/ods/spreadsheet_spec.rb +124 -0
  130. data/spec/berkeley_library/util/ods/xml/content_doc_spec.rb +121 -0
  131. data/spec/berkeley_library/util/ods/xml/manifest/file_entry_spec.rb +27 -0
  132. data/spec/berkeley_library/util/ods/xml/manifest/manifest_spec.rb +33 -0
  133. data/spec/berkeley_library/util/ods/xml/office/document_content_spec.rb +60 -0
  134. data/spec/berkeley_library/util/ods/xml/style/automatic_styles_spec.rb +37 -0
  135. data/spec/berkeley_library/util/ods/xml/style/family_spec.rb +57 -0
  136. data/spec/berkeley_library/util/ods/xml/table/table_row_spec.rb +179 -0
  137. data/spec/berkeley_library/util/ods/xml/table/table_spec.rb +218 -0
  138. data/spec/berkeley_library/util/paths_spec.rb +90 -0
  139. data/spec/berkeley_library/util/stringios_spec.rb +34 -0
  140. data/spec/berkeley_library/util/strings_spec.rb +27 -0
  141. data/spec/berkeley_library/util/times_spec.rb +39 -0
  142. data/spec/berkeley_library/util/uris_spec.rb +118 -0
  143. data/spec/data/collection-names.txt +438 -0
  144. data/spec/data/collections.json +4827 -0
  145. data/spec/data/disjoint-records.xml +187 -0
  146. data/spec/data/record-184453.xml +58 -0
  147. data/spec/data/record-184458.xml +63 -0
  148. data/spec/data/record-187888.xml +78 -0
  149. data/spec/data/records-api-search-cjk-p1.xml +6381 -0
  150. data/spec/data/records-api-search-cjk-p2.xml +5 -0
  151. data/spec/data/records-api-search-p1.xml +4506 -0
  152. data/spec/data/records-api-search-p2.xml +4509 -0
  153. data/spec/data/records-api-search-p3.xml +4506 -0
  154. data/spec/data/records-api-search-p4.xml +4509 -0
  155. data/spec/data/records-api-search-p5.xml +4506 -0
  156. data/spec/data/records-api-search-p6.xml +2436 -0
  157. data/spec/data/records-api-search-p7.xml +5 -0
  158. data/spec/data/records-api-search.xml +234 -0
  159. data/spec/data/records-manual-search.xml +547 -0
  160. data/spec/spec_helper.rb +30 -0
  161. data/test/profile/table_from_records_profile.rb +46 -0
  162. metadata +585 -0
@@ -0,0 +1 @@
1
+ Dir.glob(File.expand_path('export/*.rb', __dir__)).sort.each(&method(:require))
@@ -0,0 +1,54 @@
1
+ require 'marc_extensions'
2
+
3
+ module BerkeleyLibrary
4
+ module TIND
5
+ module Export
6
+ class Column
7
+
8
+ # @return [ColumnGroup] the group containing this column
9
+ attr_reader :column_group
10
+
11
+ # @return [Integer] the index of this column in the group
12
+ attr_reader :col_in_group
13
+
14
+ # Initializes a new column
15
+ #
16
+ # @param column_group [ColumnGroup] the group containing this column
17
+ # @param col_in_group [Integer] the index of this column in the group
18
+ def initialize(column_group, col_in_group)
19
+ @column_group = column_group
20
+ @col_in_group = col_in_group
21
+ end
22
+
23
+ def header
24
+ # NOTE: that TIND "-#" suffixes must be unique by tag, not tag + ind1 + ind2
25
+ @header ||= "#{column_group.prefix}#{subfield_code}-#{1 + column_group.index_in_tag}"
26
+ end
27
+
28
+ def subfield_code
29
+ @subfield_code ||= column_group.subfield_codes[col_in_group]
30
+ end
31
+
32
+ def value_at(row)
33
+ column_group.value_at(row, col_in_group)
34
+ end
35
+
36
+ def can_edit?
37
+ @can_edit ||= Filter.can_edit?(
38
+ column_group.tag,
39
+ column_group.ind1,
40
+ column_group.ind2,
41
+ subfield_code
42
+ )
43
+ end
44
+
45
+ def each_value(include_header: false)
46
+ return to_enum(:each_value, include_header: include_header) unless block_given?
47
+
48
+ yield header if include_header
49
+ column_group.row_count.times { |row| yield value_at(row) }
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,144 @@
1
+ require 'berkeley_library/tind/export/column'
2
+ require 'berkeley_library/util/arrays'
3
+ require 'berkeley_library/util/strings'
4
+
5
+ module BerkeleyLibrary
6
+ module TIND
7
+ module Export
8
+
9
+ # A group of columns representing the subfields of a particular
10
+ # data field.
11
+ class ColumnGroup
12
+ include BerkeleyLibrary::Util::Arrays
13
+
14
+ # ------------------------------------------------------------
15
+ # Constants
16
+
17
+ # Indicators SHOULD NOT be capital letters, but TIND internal fields
18
+ # don't respect that. Thus the /i flag.
19
+ INDICATOR_RE = /^[0-9a-z ]$/i.freeze
20
+
21
+ SUBFIELD_CODE_RE = /^[0-9a-z]$/.freeze
22
+
23
+ # ------------------------------------------------------------
24
+ # Accessors
25
+
26
+ attr_reader :tag, :index_in_tag, :ind1, :ind2, :subfield_codes
27
+
28
+ # ------------------------------------------------------------
29
+ # Initializer
30
+
31
+ def initialize(tag, index_in_tag, ind1, ind2, subfield_codes)
32
+ @tag, @ind1, @ind2 = valid_tag_and_indicators(tag, ind1, ind2)
33
+ @subfield_codes = valid_subfield_codes(subfield_codes).dup.freeze
34
+ @index_in_tag = index_in_tag
35
+ end
36
+
37
+ # ------------------------------------------------------------
38
+ # Class methods
39
+
40
+ class << self
41
+
42
+ def prefix_for(data_field)
43
+ format_prefix(data_field.tag, data_field.indicator1, data_field.indicator2)
44
+ end
45
+
46
+ def format_indicator(ind)
47
+ ind == ' ' ? '_' : ind
48
+ end
49
+
50
+ def format_prefix(tag, ind1, ind2)
51
+ [tag, format_indicator(ind1), format_indicator(ind2)].join
52
+ end
53
+ end
54
+
55
+ # ------------------------------------------------------------
56
+ # Instance methods
57
+
58
+ def prefix
59
+ ColumnGroup.format_prefix(tag, ind1, ind2)
60
+ end
61
+
62
+ def maybe_add_at(row, data_field)
63
+ warn "Data field at row #{row} is not frozen: #{data_field}" unless data_field.subfields.frozen?
64
+ # set nil explicitly so row_count etc. are correct
65
+ return (data_fields[row] = nil) unless can_add?(data_field)
66
+
67
+ @subfield_codes = merge(subfield_codes, data_field.subfield_codes)
68
+ data_fields[row] = data_field
69
+ end
70
+
71
+ def value_at(row, col)
72
+ return unless (data_field = data_fields[row])
73
+ return unless (subfield_indices = subfield_indices_for(row))
74
+ return unless (subfield_index = subfield_indices[col])
75
+ return unless (subfield = data_field.subfields[subfield_index])
76
+
77
+ subfield.value
78
+ end
79
+
80
+ def columns
81
+ @columns ||= (0...subfield_codes.length).map { |col| Column.new(self, col) }
82
+ end
83
+
84
+ def row_count
85
+ data_fields.size
86
+ end
87
+
88
+ # ------------------------------------------------------------
89
+ # Object overrides
90
+
91
+ def to_s
92
+ "ColumnGroup #{tag}-#{index_in_tag}:" + [prefix, subfield_codes.join].join
93
+ end
94
+
95
+ # ------------------------------------------------------------
96
+ # Private methods
97
+
98
+ private
99
+
100
+ def valid_tag_and_indicators(tag, ind1, ind2)
101
+ raise ArgumentError, "#{tag}#{ind1}#{ind2}: not a valid tag" unless tag.size == 3 && BerkeleyLibrary::Util::Strings.ascii_numeric?(tag)
102
+ raise ArgumentError, "#{tag}#{ind1}#{ind2}: not a valid indicator: #{ind1.inspect}" unless ind1 =~ INDICATOR_RE
103
+ raise ArgumentError, "#{tag}#{ind1}#{ind2}: not a valid indicator: #{ind2.inspect}" unless ind2 =~ INDICATOR_RE
104
+
105
+ [tag, ind1, ind2]
106
+ end
107
+
108
+ def valid_subfield_codes(subfield_codes)
109
+ subfield_codes.tap do |scc|
110
+ raise ArgumentError, "Invalid subfield codes: #{scc.inspect}" unless scc.all? { |c| c =~ SUBFIELD_CODE_RE }
111
+ end
112
+ end
113
+
114
+ def can_add?(data_field)
115
+ data_field.tag == tag &&
116
+ data_field.indicator1 == ind1 &&
117
+ data_field.indicator2 == ind2
118
+ end
119
+
120
+ def subfield_indices_for(row)
121
+ return cached_subfield_indices[row] if row < cached_subfield_indices.size
122
+ return unless (data_field = data_fields[row])
123
+
124
+ cached_subfield_indices[row] = find_subfield_indices(data_field)
125
+ end
126
+
127
+ def cached_subfield_indices
128
+ @cached_subfield_indices ||= []
129
+ end
130
+
131
+ def find_subfield_indices(data_field)
132
+ return unless can_add?(data_field)
133
+
134
+ df_index_to_cg_index = find_indices(in_array: subfield_codes, for_array: data_field.subfield_codes)
135
+ invert(df_index_to_cg_index)
136
+ end
137
+
138
+ def data_fields
139
+ @data_fields ||= []
140
+ end
141
+ end
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,131 @@
1
+ require 'berkeley_library/tind/export/filter'
2
+ require 'berkeley_library/tind/export/column_group'
3
+ require 'berkeley_library/tind/export/column'
4
+ require 'berkeley_library/tind/export/export_exception'
5
+ require 'berkeley_library/tind/export/row'
6
+ require 'berkeley_library/util/arrays'
7
+
8
+ module BerkeleyLibrary
9
+ module TIND
10
+ module Export
11
+ class ColumnGroupList
12
+ include Enumerable
13
+
14
+ # ------------------------------------------------------------
15
+ # Initializer
16
+
17
+ def initialize(exportable_only: false)
18
+ @exportable_only = exportable_only
19
+ end
20
+
21
+ # ------------------------------------------------------------
22
+ # Accessors
23
+
24
+ def exportable_only?
25
+ @exportable_only
26
+ end
27
+
28
+ # ------------------------------------------------------------
29
+ # Misc. instance methods
30
+
31
+ def all_groups
32
+ # NOTE: this isn't ||= because we only cache on #freeze
33
+ @all_groups || begin
34
+ all_tags = column_groups_by_tag.keys.sort
35
+ all_tags.each_with_object([]) do |tag, groups|
36
+ tag_column_groups = column_groups_by_tag[tag]
37
+ groups.concat(tag_column_groups)
38
+ end
39
+ end
40
+ end
41
+
42
+ def add_data_fields(marc_record, row)
43
+ # TODO: what about control fields?
44
+ marc_record.data_fields_by_tag.each do |tag, data_fields|
45
+ next unless can_export_tag(tag)
46
+ next if data_fields.empty?
47
+
48
+ add_fields_at(data_fields, row)
49
+ end
50
+ rescue StandardError => e
51
+ raise Export::ExportException, "Error adding MARC record #{marc_record.record_id} at row #{row}: #{e.message}"
52
+ end
53
+
54
+ # ------------------------------------------------------------
55
+ # Enumerable
56
+
57
+ def each(&block)
58
+ all_groups.each(&block)
59
+ end
60
+
61
+ # ------------------------------------------------------------
62
+ # Object overrides
63
+
64
+ def freeze
65
+ column_groups_by_tag.each_value(&:freeze)
66
+ column_groups_by_tag.freeze
67
+ @all_groups ||= all_groups.freeze
68
+ self
69
+ end
70
+
71
+ def frozen?
72
+ column_groups_by_tag.frozen? &&
73
+ @all_groups && @all_groups.frozen?
74
+ end
75
+
76
+ # ------------------------------------------------------------
77
+ # Private methods
78
+
79
+ private
80
+
81
+ def column_groups_by_tag
82
+ @column_groups_by_tag ||= {}
83
+ end
84
+
85
+ def add_fields_at(data_fields, row)
86
+ tag = data_fields[0].tag.freeze
87
+ tag_column_groups = (column_groups_by_tag[tag] ||= [])
88
+
89
+ data_fields.inject(0) do |offset, df|
90
+ next offset unless can_export_df(df)
91
+
92
+ 1 + add_data_field(df, row, tag_column_groups, at_or_after: offset)
93
+ end
94
+ end
95
+
96
+ def add_data_field(df, row, tag_column_groups, at_or_after: 0)
97
+ added_at = added_at_index(df, row, tag_column_groups, at_or_after)
98
+ return added_at if added_at
99
+
100
+ new_group = ColumnGroup.new(df.tag, tag_column_groups.size, df.indicator1, df.indicator2, exportable_subfield_codes(df)).tap do |cg|
101
+ raise Export::ExportException, "Unexpected failure to add #{df} to #{cg}" unless cg.maybe_add_at(row, df)
102
+ end
103
+ tag_column_groups << new_group
104
+ tag_column_groups.size - 1
105
+ end
106
+
107
+ def added_at_index(df, row, tag_column_groups, at_or_after)
108
+ BerkeleyLibrary::Util::Arrays.find_index(in_array: tag_column_groups, start_index: at_or_after) { |cg| cg.maybe_add_at(row, df) }
109
+ end
110
+
111
+ def can_export_tag(tag)
112
+ return true unless exportable_only?
113
+
114
+ Filter.can_export_tag?(tag)
115
+ end
116
+
117
+ def can_export_df(df)
118
+ return true unless exportable_only?
119
+
120
+ Filter.can_export_data_field?(df)
121
+ end
122
+
123
+ def exportable_subfield_codes(df)
124
+ return df.subfield_codes unless exportable_only?
125
+
126
+ Filter.exportable_subfield_codes(df)
127
+ end
128
+ end
129
+ end
130
+ end
131
+ end
@@ -0,0 +1,76 @@
1
+ require 'berkeley_library/tind/export/config'
2
+
3
+ module BerkeleyLibrary
4
+ module TIND
5
+ module Export
6
+ # Calculates approximate column widths for cell values, based on
7
+ # Arial average character widths ()in units of 1/1000 point size) per
8
+ # {https://www.math.utah.edu/~beebe/fonts/afm-widths.html this table}.
9
+ # (LibreOffice default is Liberation Sans, which should match Arial.)
10
+ #
11
+ # CJK and fullwidth characters will probably be mapped to another font,
12
+ # but it's probably going to be roughly square.
13
+ #
14
+ # Non-Western, non-CJK characters will *hopefully* not be much wider
15
+ # than their Western counterparts.
16
+ module ColumnWidthCalculator
17
+ include Config
18
+
19
+ WIDTH_UNIT = 1000.0
20
+
21
+ WIDTH_LOWER = 489.46
22
+
23
+ WIDTH_UPPER = 677.42
24
+
25
+ WIDTH_DIGIT = 556.0
26
+
27
+ # Measured empirically in LibreOffice 6.4.7.2
28
+ WIDTH_CJK = 970.0
29
+
30
+ WIDTHS = {
31
+ /[\u4e00-\u9fff]/ => WIDTH_CJK, # CJK (excluding half-width forms)
32
+ /[\uff01-\uff65\uffe0-\uffee]/ => WIDTH_CJK, # Fullwidth forms
33
+ /[[:digit:]]/ => WIDTH_DIGIT,
34
+ /[[:upper:]]/ => WIDTH_UPPER,
35
+ /[[:lower:]]/ => WIDTH_LOWER,
36
+ /[[:space:]]/ => 2 * WIDTH_LOWER / 3 # empirical
37
+ }.freeze
38
+
39
+ # See {WIDTHS}
40
+ WIDTH_DEFAULT = WIDTH_DIGIT # Fallback to digit width for other characters
41
+
42
+ def width_ps_units(str)
43
+ return 0 if str.nil? || str.empty?
44
+
45
+ chars = str.unicode_normalize.chars
46
+ chars.inject(0) { |total, c| total + width_for_char(c) }
47
+ end
48
+
49
+ def width_points(str, font_size_points = font_size_pt)
50
+ width_per_point(str) * font_size_points
51
+ end
52
+
53
+ def width_inches(str, font_size_points = font_size_pt)
54
+ return 0 if str.nil? || str.empty?
55
+
56
+ width_points(str, font_size_points) / 72.0
57
+ end
58
+
59
+ private
60
+
61
+ def width_per_point(str)
62
+ width_ps_units(str) / WIDTH_UNIT
63
+ end
64
+
65
+ def width_for_char(c)
66
+ WIDTHS.each { |re, w| return w if c =~ re }
67
+ WIDTH_DEFAULT
68
+ end
69
+
70
+ class << self
71
+ include ColumnWidthCalculator
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,154 @@
1
+ module BerkeleyLibrary
2
+ module TIND
3
+ module Export
4
+ module Config
5
+
6
+ # Font size in points
7
+ FONT_SIZE_DEFAULT = 10.0
8
+
9
+ # Round column widths up to nearest eighth inch
10
+ WIDTH_INCREMENT_DEFAULT_INCHES = '1/8'.to_r
11
+
12
+ # Round row heights up to nearest 2 points
13
+ HEIGHT_INCREMENT_DEFAULT_POINTS = 2
14
+
15
+ # Max column width before wrapping
16
+ MAX_COLUMN_WIDTH_INCHES = 5.0
17
+
18
+ # Decimal places for formatting
19
+ FORMAT_DIGITS_DEFAULT = 3
20
+
21
+ # Line height as multiple of font size
22
+ LINE_HEIGHT_DEFAULT_EM = '4/3'.to_r
23
+
24
+ # @return [Numeric] the font size in points
25
+ def font_size_pt
26
+ Config.font_size_pt
27
+ end
28
+
29
+ # @return [Numeric] the max column width in inches
30
+ def max_col_width_in
31
+ Config.max_col_width_in
32
+ end
33
+
34
+ # @return [Numeric] the width rounding increment in inches
35
+ def w_incr_in
36
+ Config.w_incr_in
37
+ end
38
+
39
+ # @return [Numeric] the height rounding increment in points
40
+ def h_incr_pt
41
+ Config.h_incr_pt
42
+ end
43
+
44
+ # @return [Numeric] the line height in ems (multiples of the font point size)
45
+ def line_height_em
46
+ Config.line_height_em
47
+ end
48
+
49
+ # @return [Integer] the number of digits to use when formatting values
50
+ def format_digits
51
+ Config.format_digits
52
+ end
53
+
54
+ # noinspection RubyYardReturnMatch
55
+ class << self
56
+
57
+ # @return [Numeric] the font size in points
58
+ def font_size_pt
59
+ @font_size_pt ||= ensure_positive_numeric(ENV['ODS_FONT_SIZE_DEFAULT'] || Config::FONT_SIZE_DEFAULT)
60
+ end
61
+
62
+ def font_size_pt=(value)
63
+ @font_size_pt = ensure_positive_numeric(value)
64
+ end
65
+
66
+ # @return [Numeric] the max column width in inches
67
+ def max_col_width_in
68
+ @max_col_width_in ||= ensure_positive_numeric(ENV['ODS_MAX_COLUMN_WIDTH_INCHES'] || Config::MAX_COLUMN_WIDTH_INCHES)
69
+ end
70
+
71
+ def max_col_width_in=(value)
72
+ @max_col_width_in = ensure_positive_numeric(value)
73
+ end
74
+
75
+ # @return [Numeric] the width rounding increment in inches
76
+ def w_incr_in
77
+ @w_incr_in ||= ensure_positive_numeric(ENV['ODS_WIDTH_INCREMENT_DEFAULT_INCHES'] || Config::WIDTH_INCREMENT_DEFAULT_INCHES)
78
+ end
79
+
80
+ def w_incr_in=(value)
81
+ @w_incr_in = ensure_positive_numeric(value)
82
+ end
83
+
84
+ # @return [Numeric] the height rounding increment in points
85
+ def h_incr_pt
86
+ @h_incr_pt ||= ensure_positive_numeric(ENV['ODS_HEIGHT_INCREMENT_DEFAULT_POINTS'] || Config::HEIGHT_INCREMENT_DEFAULT_POINTS)
87
+ end
88
+
89
+ def h_incr_pt=(value)
90
+ @h_incr_pt = ensure_positive_numeric(value)
91
+ end
92
+
93
+ # @return [Numeric] the line height in ems (multiples of the font point size)
94
+ def line_height_em
95
+ @line_height_em ||= ensure_positive_numeric(ENV['ODS_LINE_HEIGHT_DEFAULT_EM'] || Config::LINE_HEIGHT_DEFAULT_EM)
96
+ end
97
+
98
+ def line_height_em=(value)
99
+ @line_height_em = ensure_positive_numeric(value)
100
+ end
101
+
102
+ # @return [Integer] the number of digits to use when formatting values
103
+ def format_digits
104
+ @format_digits ||= ensure_positive_int(ENV['ODS_FORMAT_DIGITS_DEFAULT'] || Config::FORMAT_DIGITS_DEFAULT)
105
+ end
106
+
107
+ def format_digits=(value)
108
+ @format_digits = ensure_positive_int(value)
109
+ end
110
+
111
+ private
112
+
113
+ # @param v [Object] a value
114
+ # @return [Numeric] a numeric value, or nil if the value is not numeric
115
+ def ensure_positive_numeric(v)
116
+ v_n = ensure_numeric(v)
117
+ return v_n if v_n > 0
118
+
119
+ raise ArgumentError, "Value must be positive: #{v_n}"
120
+ end
121
+
122
+ def ensure_numeric(v)
123
+ return v if v.is_a?(Numeric)
124
+
125
+ v_str = v.to_s
126
+ return v_str.to_r if v_str.include?('/')
127
+ return v_str.to_f if v_str.include?('.')
128
+ return Integer(v_str) if v_str =~ /(?:0x\h+|\d+)/
129
+
130
+ raise ArgumentError, "Can't parse #{v.inspect} as a numeric value"
131
+ end
132
+
133
+ # @param v [Object] a value
134
+ # @return [Integer]
135
+ def ensure_positive_int(v)
136
+ v_i = ensure_int(v)
137
+ return v_i if v_i > 0
138
+
139
+ raise ArgumentError, "Value must be positive: #{v_i}"
140
+ end
141
+
142
+ def ensure_int(v)
143
+ return v if v.is_a?(Integer)
144
+
145
+ v_str = v.to_s
146
+ return Integer(v_str) if v_str =~ /(?:0x\h+|\d+)/
147
+
148
+ raise ArgumentError, "Can't parse #{v.inspect} as an integer value"
149
+ end
150
+ end
151
+ end
152
+ end
153
+ end
154
+ end