libis-format 1.3.4 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.coveralls.yml +2 -0
- data/.gitignore +20 -0
- data/.travis.yml +70 -0
- data/Gemfile +0 -12
- data/README.md +2 -2
- data/Rakefile +8 -0
- data/base/Dockerfile +35 -0
- data/base/Dockerfile.alpine +20 -0
- data/base/Dockerfile.rvm +56 -0
- data/base/rework_path +20 -0
- data/bin/{pdf_tool → pdf_copy} +2 -3
- data/data/PDFA_def.ps +3 -3
- data/data/eciRGB_v2.icc +0 -0
- data/data/types.yml +4 -17
- data/docker_cfg.yml +1 -0
- data/lib/libis/format/cli/convert.rb +4 -4
- data/lib/libis/format/cli/prompt_helper.rb +24 -32
- data/lib/libis/format/command_line.rb +3 -2
- data/lib/libis/format/config.rb +23 -19
- data/lib/libis/format/converter/audio_converter.rb +31 -56
- data/lib/libis/format/converter/base.rb +36 -16
- data/lib/libis/format/converter/chain.rb +32 -52
- data/lib/libis/format/converter/fop_pdf_converter.rb +8 -4
- data/lib/libis/format/converter/image_assembler.rb +82 -0
- data/lib/libis/format/converter/image_converter.rb +45 -250
- data/lib/libis/format/converter/image_splitter.rb +80 -0
- data/lib/libis/format/converter/image_watermarker.rb +261 -0
- data/lib/libis/format/converter/jp2_converter.rb +38 -36
- data/lib/libis/format/converter/office_converter.rb +28 -22
- data/lib/libis/format/converter/pdf_assembler.rb +66 -0
- data/lib/libis/format/converter/pdf_converter.rb +52 -200
- data/lib/libis/format/converter/pdf_optimizer.rb +70 -0
- data/lib/libis/format/converter/pdf_splitter.rb +65 -0
- data/lib/libis/format/converter/pdf_watermarker.rb +110 -0
- data/lib/libis/format/converter/repository.rb +13 -7
- data/lib/libis/format/converter/spreadsheet_converter.rb +16 -10
- data/lib/libis/format/converter/video_converter.rb +58 -47
- data/lib/libis/format/converter/xslt_converter.rb +11 -13
- data/lib/libis/format/converter.rb +1 -1
- data/lib/libis/format/identifier.rb +46 -44
- data/lib/libis/format/info.rb +27 -0
- data/lib/libis/format/library.rb +147 -0
- data/lib/libis/format/tool/droid.rb +30 -29
- data/lib/libis/format/tool/extension_identification.rb +26 -24
- data/lib/libis/format/tool/{ff_mpeg.rb → ffmpeg.rb} +10 -17
- data/lib/libis/format/tool/fido.rb +27 -22
- data/lib/libis/format/tool/file_tool.rb +24 -11
- data/lib/libis/format/tool/fop_pdf.rb +14 -25
- data/lib/libis/format/tool/identification_tool.rb +40 -38
- data/lib/libis/format/tool/office_to_pdf.rb +18 -30
- data/lib/libis/format/tool/pdf_copy.rb +47 -0
- data/lib/libis/format/tool/pdf_merge.rb +19 -25
- data/lib/libis/format/tool/pdf_optimizer.rb +19 -22
- data/lib/libis/format/tool/pdf_split.rb +33 -6
- data/lib/libis/format/tool/pdf_to_pdfa.rb +31 -45
- data/lib/libis/format/tool/pdfa_validator.rb +30 -24
- data/lib/libis/format/tool/spreadsheet_to_ods.rb +18 -29
- data/lib/libis/format/tool.rb +3 -4
- data/lib/libis/format/version.rb +1 -3
- data/lib/libis/format/yaml_loader.rb +71 -0
- data/lib/libis/format.rb +7 -5
- data/lib/libis-format.rb +0 -2
- data/libis-format.gemspec +18 -24
- data/tools/PdfTool.jar +0 -0
- data/tools/pdfbox/pdfbox-app-2.0.13.jar +0 -0
- data/tools/pdfbox/{preflight-app-3.0.3.jar → preflight-app-2.0.13.jar} +0 -0
- metadata +86 -128
- data/data/AdobeRGB1998.icc +0 -0
- data/lib/libis/format/converter/email_converter.rb +0 -35
- data/lib/libis/format/tool/msg_to_pdf.rb +0 -270
- data/lib/libis/format/tool/pdf_tool.rb +0 -52
- data/lib/libis/format/type_database.rb +0 -156
- data/lib/libis/format/type_database_impl.rb +0 -153
- data/tools/pdf2pdfa +0 -395
- data/tools/pdfbox/pdfbox-app-3.0.3.jar +0 -0
- /data/bin/{droid_tool → droid} +0 -0
- /data/bin/{fido_tool → fido} +0 -0
@@ -1,270 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'mapi/msg'
|
4
|
-
require 'rfc_2047'
|
5
|
-
require 'cgi'
|
6
|
-
require 'pdfkit'
|
7
|
-
require 'time'
|
8
|
-
require 'fileutils'
|
9
|
-
require 'pathname'
|
10
|
-
require 'libis/format/config'
|
11
|
-
|
12
|
-
module Libis
|
13
|
-
module Format
|
14
|
-
module Tool
|
15
|
-
class MsgToPdf
|
16
|
-
include ::Libis::Tools::Logger
|
17
|
-
|
18
|
-
HEADER_STYLE = '<style>.header-table {margin: 0 0 20 0;padding: 0;font-family: Arial, Helvetica, sans-serif;}.header-name {padding-right: 5px;color: #9E9E9E;text-align: right;vertical-align: top;font-size: 12px;}.header-value {font-size: 12px;}#header_fields {#background: white;#margin: 0;#border: 1px solid #DDD;#border-radius: 3px;#padding: 8px;#width: 100%%;#box-sizing: border-box;#}</style><script type="text/javascript">function timer() {try {parent.postMessage(Math.max(document.body.offsetHeight, document.body.scrollHeight), \'*\');} catch (r) {}setTimeout(timer, 10);};timer();</script>' # rubocop:disable Layout/LineLength
|
19
|
-
HEADER_TABLE_TEMPLATE = '<div class="header-table"><table id="header_fields"><tbody>%s</tbody></table></div>'
|
20
|
-
HEADER_FIELD_TEMPLATE = '<tr><td class="header-name">%s</td><td class="header-value">%s</td></tr>'
|
21
|
-
HTML_WRAPPER_TEMPLATE = '<!DOCTYPE html><html><head><style>body {font-size: 0.5cm;}</style><title>title</title></head><body>%s</body></html>' # rubocop:disable Layout/LineLength
|
22
|
-
|
23
|
-
IMG_CID_PLAIN_REGEX = /\[cid:(.*?)\]/m
|
24
|
-
IMG_CID_HTML_REGEX = /cid:([^"]*)/m
|
25
|
-
|
26
|
-
def self.installed?
|
27
|
-
File.exist?(Libis::Format::Config[:wkhtmltopdf])
|
28
|
-
end
|
29
|
-
|
30
|
-
def self.run(source, target, **options)
|
31
|
-
new.run source, target, **options
|
32
|
-
end
|
33
|
-
|
34
|
-
def run(source, target, **options)
|
35
|
-
# Preliminary checks
|
36
|
-
# ------------------
|
37
|
-
|
38
|
-
@warnings = []
|
39
|
-
|
40
|
-
# Check if source file exists
|
41
|
-
raise "File #{source} does not exist" unless File.exist?(source)
|
42
|
-
|
43
|
-
# Retrieving the message
|
44
|
-
# ----------------------
|
45
|
-
|
46
|
-
# Open the message
|
47
|
-
msg = Mapi::Msg.open(source)
|
48
|
-
|
49
|
-
target_format = options.delete(:to_html) ? :HTML : :PDF
|
50
|
-
result = msg_to_pdf(msg, target, target_format, options)
|
51
|
-
msg.close
|
52
|
-
result
|
53
|
-
end
|
54
|
-
|
55
|
-
def msg_to_pdf(msg, target, target_format, pdf_options, root_msg: true)
|
56
|
-
# Make sure the target directory exists
|
57
|
-
outdir = File.dirname(target)
|
58
|
-
FileUtils.mkdir_p(outdir)
|
59
|
-
|
60
|
-
# Get the body of the message in HTML
|
61
|
-
body = msg.properties.body_html
|
62
|
-
|
63
|
-
# Embed plain body in HTML as a fallback
|
64
|
-
body ||= HTML_WRAPPER_TEMPLATE % msg.properties.body
|
65
|
-
|
66
|
-
# Check and fix the character encoding
|
67
|
-
begin
|
68
|
-
# Try to encode into UTF-8
|
69
|
-
body.encode!('UTF-8', universal_newline: true)
|
70
|
-
rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError
|
71
|
-
begin
|
72
|
-
# If it fails, the text may be in Windows' Latin1 (ISO-8859-1)
|
73
|
-
body.force_encoding('ISO-8859-1').encode!('UTF-8', universal_newline: true)
|
74
|
-
rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError => e
|
75
|
-
# If that fails too, log a warning and replace the invalid/unknown with a ? character.
|
76
|
-
@warnings << "#{e.class}: #{e.message}"
|
77
|
-
body.encode!('UTF-8', universal_newline: true, invalid: :replace, undef: :replace)
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
# Process headers
|
82
|
-
# ---------------
|
83
|
-
headers = {}
|
84
|
-
hdr_html = ''
|
85
|
-
|
86
|
-
%w[From To Cc Subject Date].each do |key|
|
87
|
-
value = find_hdr(msg.headers, key)
|
88
|
-
if value
|
89
|
-
headers[key.downcase.to_sym] = value
|
90
|
-
hdr_html += hdr_html(key, value)
|
91
|
-
end
|
92
|
-
end
|
93
|
-
|
94
|
-
[:date].each do |key|
|
95
|
-
next unless headers[key]
|
96
|
-
|
97
|
-
headers[key] = DateTime.parse(headers[key]).to_time.localtime.iso8601
|
98
|
-
end
|
99
|
-
|
100
|
-
# Add header section to the HTML body
|
101
|
-
unless hdr_html.empty?
|
102
|
-
# Insert header block styles
|
103
|
-
if body =~ %r{</head>}
|
104
|
-
# if head exists, append the style block
|
105
|
-
body.gsub!(%r{</head>}, "#{HEADER_STYLE}</head>")
|
106
|
-
elsif body =~ %r{<head/>}
|
107
|
-
# empty head, replace with the style block
|
108
|
-
body.gsub!(%r{<head/>}, "<head>#{HEADER_STYLE}</head>")
|
109
|
-
else
|
110
|
-
# otherwise insert a head section before the body tag
|
111
|
-
body.gsub!(/<body/, "<head>#{HEADER_STYLE}</head><body")
|
112
|
-
end
|
113
|
-
# Add the headers html table as first element in the body section
|
114
|
-
body.gsub!(/<body[^>]*>/) { |m| "#{m}#{HEADER_TABLE_TEMPLATE % hdr_html}" }
|
115
|
-
end
|
116
|
-
|
117
|
-
# Embed inline images
|
118
|
-
# -------------------
|
119
|
-
attachments = msg.attachments
|
120
|
-
used_files = []
|
121
|
-
|
122
|
-
# First process plaintext cid entries
|
123
|
-
body.gsub!(IMG_CID_PLAIN_REGEX) do |_match|
|
124
|
-
data = get_attachment_data(attachments, ::Regexp.last_match(1))
|
125
|
-
if data
|
126
|
-
used_files << ::Regexp.last_match(1)
|
127
|
-
"<img src=\"data:#{data[:mime_type]};base64,#{data[:base64]}\"/>"
|
128
|
-
else
|
129
|
-
'<img src=""/>'
|
130
|
-
end
|
131
|
-
end
|
132
|
-
|
133
|
-
# Then process HTML img tags with CID entries
|
134
|
-
body.gsub!(IMG_CID_HTML_REGEX) do |_match|
|
135
|
-
data = get_attachment_data(attachments, ::Regexp.last_match(1))
|
136
|
-
if data
|
137
|
-
used_files << ::Regexp.last_match(1)
|
138
|
-
"data:#{data[:mime_type]};base64,#{data[:base64]}"
|
139
|
-
else
|
140
|
-
''
|
141
|
-
end
|
142
|
-
end
|
143
|
-
|
144
|
-
# Create PDF
|
145
|
-
# ----------
|
146
|
-
files = []
|
147
|
-
|
148
|
-
if target_format == :PDF
|
149
|
-
# PDF creation options
|
150
|
-
pdf_options = {
|
151
|
-
page_size: 'A4',
|
152
|
-
margin_top: '10mm',
|
153
|
-
margin_bottom: '10mm',
|
154
|
-
margin_left: '10mm',
|
155
|
-
margin_right: '10mm',
|
156
|
-
# image_quality: 100,
|
157
|
-
# viewport_size: '2480x3508',
|
158
|
-
dpi: 300
|
159
|
-
}.merge pdf_options
|
160
|
-
|
161
|
-
subject = find_hdr(msg.headers, 'Subject')
|
162
|
-
kit = PDFKit.new(body, title: (subject || 'message'), **pdf_options)
|
163
|
-
pdf = kit.to_pdf
|
164
|
-
File.open(target, 'wb') { |f| f.write(pdf) }
|
165
|
-
else
|
166
|
-
File.open(target, 'wb') { |f| f.write(body) }
|
167
|
-
end
|
168
|
-
files << target if File.exist?(target)
|
169
|
-
|
170
|
-
# Save attachments
|
171
|
-
# ----------------
|
172
|
-
outdir = File.join(outdir, "#{File.basename(target)}.attachments")
|
173
|
-
digits = ((attachments.count + 1) / 10) + 1
|
174
|
-
i = 1
|
175
|
-
attachments.delete_if { |a| a.properties.attachment_hidden }.each do |a|
|
176
|
-
prefix = "#{format('%0*d', digits, i)}-"
|
177
|
-
if (sub_msg = a.instance_variable_get(:@embedded_msg))
|
178
|
-
subject = a.properties[:display_name] || sub_msg.subject || ''
|
179
|
-
file = File.join(outdir, "#{prefix}#{subject}.msg.#{target_format.to_s.downcase}")
|
180
|
-
result = msg_to_pdf(sub_msg, file, target_format, pdf_options, root_msg: false)
|
181
|
-
if (e = result[:error])
|
182
|
-
raise e
|
183
|
-
end
|
184
|
-
|
185
|
-
files += result[:files]
|
186
|
-
elsif a.filename
|
187
|
-
next if used_files.include?(a.filename)
|
188
|
-
|
189
|
-
file = File.join(outdir, "#{prefix}#{a.filename}")
|
190
|
-
FileUtils.mkdir_p(File.dirname(file))
|
191
|
-
File.open(file, 'wb') { |f| a.save(f) }
|
192
|
-
files << file
|
193
|
-
else
|
194
|
-
@warnings << "Attachment #{a.properties[:display_name]} cannot be extracted"
|
195
|
-
next
|
196
|
-
end
|
197
|
-
i += 1
|
198
|
-
end
|
199
|
-
|
200
|
-
if root_msg
|
201
|
-
p = Pathname(File.dirname(files.first))
|
202
|
-
files[1..].each do |f|
|
203
|
-
(headers[:attachments] ||= []) << Pathname.new(f).relative_path_from(p).to_s
|
204
|
-
end
|
205
|
-
end
|
206
|
-
|
207
|
-
{
|
208
|
-
command: { status: 0 },
|
209
|
-
files:,
|
210
|
-
headers:,
|
211
|
-
warnings: @warnings
|
212
|
-
}
|
213
|
-
rescue Exception => e
|
214
|
-
raise unless root_msg
|
215
|
-
|
216
|
-
msg.close
|
217
|
-
{
|
218
|
-
command: { status: -1 },
|
219
|
-
files: [],
|
220
|
-
headers: {},
|
221
|
-
errors: [
|
222
|
-
{
|
223
|
-
error: e.message,
|
224
|
-
error_class: e.class.name,
|
225
|
-
error_trace: e.backtrace
|
226
|
-
}
|
227
|
-
],
|
228
|
-
warnings: @warnings
|
229
|
-
}
|
230
|
-
end
|
231
|
-
|
232
|
-
protected
|
233
|
-
|
234
|
-
def eml_to_html; end
|
235
|
-
|
236
|
-
private
|
237
|
-
|
238
|
-
def find_hdr(list, key)
|
239
|
-
keys = list.keys
|
240
|
-
if (k = keys.find { |x| x.to_s =~ /^#{key}$/i })
|
241
|
-
v = list[k]
|
242
|
-
v = v.first if v.is_a? Array
|
243
|
-
v = Rfc2047.decode(v).strip if v.is_a? String
|
244
|
-
return v
|
245
|
-
end
|
246
|
-
nil
|
247
|
-
end
|
248
|
-
|
249
|
-
def hdr_html(key, value)
|
250
|
-
return format(HEADER_FIELD_TEMPLATE, key, CGI.escapeHTML(value)) if key.is_a?(String) && value.is_a?(String) && !value.empty?
|
251
|
-
|
252
|
-
''
|
253
|
-
end
|
254
|
-
|
255
|
-
def get_attachment_data(attachments, cid)
|
256
|
-
attachments.each do |attachment|
|
257
|
-
next unless attachment.properties.attach_content_id == cid
|
258
|
-
|
259
|
-
attachment.data.rewind
|
260
|
-
return {
|
261
|
-
mime_type: attachment.properties.attach_mime_tag,
|
262
|
-
base64: Base64.encode64(attachment.data.read).gsub(/[\r\n]/, '')
|
263
|
-
}
|
264
|
-
end
|
265
|
-
nil
|
266
|
-
end
|
267
|
-
end
|
268
|
-
end
|
269
|
-
end
|
270
|
-
end
|
@@ -1,52 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'os'
|
4
|
-
|
5
|
-
require 'libis/tools/extend/string'
|
6
|
-
require 'libis/tools/logger'
|
7
|
-
require 'libis/tools/command'
|
8
|
-
|
9
|
-
require 'libis/format/config'
|
10
|
-
|
11
|
-
module Libis
|
12
|
-
module Format
|
13
|
-
module Tool
|
14
|
-
class PdfTool
|
15
|
-
include ::Libis::Tools::Logger
|
16
|
-
|
17
|
-
def self.installed?
|
18
|
-
result = Libis::Tools::Command.run(Libis::Format::Config[:java_cmd], '-version')
|
19
|
-
return false unless (result[:status]).zero?
|
20
|
-
|
21
|
-
File.exist?(Libis::Format::Config[:pdf_tool])
|
22
|
-
end
|
23
|
-
|
24
|
-
def self.run(command, source, target, *options)
|
25
|
-
new.run command, source, target, *options
|
26
|
-
end
|
27
|
-
|
28
|
-
def run(command, source, target, *options)
|
29
|
-
if OS.java?
|
30
|
-
# TODO: import library and execute in current VM. For now do exactly as in MRI.
|
31
|
-
end
|
32
|
-
|
33
|
-
timeout = Libis::Format::Config[:timeouts][:pdf_tool]
|
34
|
-
args = [
|
35
|
-
Libis::Format::Config[:java_cmd],
|
36
|
-
'-jar', Libis::Format::Config[:pdf_tool],
|
37
|
-
[command],
|
38
|
-
'-i', source,
|
39
|
-
'-o', target,
|
40
|
-
options,
|
41
|
-
].flatten
|
42
|
-
|
43
|
-
result = Libis::Tools::Command.run(*args, timeout: , kill_after: timeout * 2)
|
44
|
-
|
45
|
-
result[:err] << "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
46
|
-
|
47
|
-
result
|
48
|
-
end
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
@@ -1,156 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'yaml'
|
4
|
-
require 'libis/tools/extend/hash'
|
5
|
-
|
6
|
-
module Libis
|
7
|
-
module Format
|
8
|
-
# noinspection RubyClassVariableUsageInspection
|
9
|
-
class TypeDatabase
|
10
|
-
@implementation = Libis::Format::TypeDatabaseImpl.instance
|
11
|
-
|
12
|
-
def self.implementation(impl)
|
13
|
-
@implementation = impl
|
14
|
-
end
|
15
|
-
|
16
|
-
def self.enrich(info, map_keys = {})
|
17
|
-
return {} unless info.is_a? Hash
|
18
|
-
|
19
|
-
mapper = Hash.new { |hash, key| hash[key] = key }
|
20
|
-
mapper.merge! map_keys
|
21
|
-
unless (puid = info[mapper[:PUID]]).blank?
|
22
|
-
info[mapper[:TYPE]] ||= begin
|
23
|
-
puid_infos(puid).first[:TYPE]
|
24
|
-
rescue StandardError
|
25
|
-
nil
|
26
|
-
end
|
27
|
-
end
|
28
|
-
unless (mime = info[mapper[:MIME]]).blank?
|
29
|
-
info[mapper[:TYPE]] ||= begin
|
30
|
-
mime_infos(mime).first[:TYPE]
|
31
|
-
rescue StandardError
|
32
|
-
nil
|
33
|
-
end
|
34
|
-
end
|
35
|
-
unless (type_name = info[mapper[:TYPE]]).nil?
|
36
|
-
mapper.each_key do |key|
|
37
|
-
info[mapper[key]] = get(type_name, key) || info[mapper[key]]
|
38
|
-
end
|
39
|
-
info[mapper[:GROUP]] = type_group(type_name)
|
40
|
-
end
|
41
|
-
info
|
42
|
-
end
|
43
|
-
|
44
|
-
def self.normalize(info, map_keys = {})
|
45
|
-
return {} unless info.is_a? Hash
|
46
|
-
|
47
|
-
mapper = Hash.new { |hash, key| hash[key] = key }
|
48
|
-
mapper.merge! map_keys
|
49
|
-
unless (puid = info[mapper[:PUID]]).blank?
|
50
|
-
info[mapper[:TYPE]] ||= begin
|
51
|
-
puid_infos(puid).first[:TYPE]
|
52
|
-
rescue StandardError
|
53
|
-
nil
|
54
|
-
end
|
55
|
-
end
|
56
|
-
unless (mime = info[mapper[:MIME]]).blank?
|
57
|
-
info[mapper[:TYPE]] ||= begin
|
58
|
-
mime_infos(mime).first[:TYPE]
|
59
|
-
rescue StandardError
|
60
|
-
nil
|
61
|
-
end
|
62
|
-
end
|
63
|
-
unless (type_name = info[mapper[:TYPE]]).nil?
|
64
|
-
info[mapper[:MIME]] = type_mimetypes(type_name).first if type_mimetypes(type_name).first
|
65
|
-
info[mapper[:GROUP]] = type_group(type_name)
|
66
|
-
end
|
67
|
-
info
|
68
|
-
end
|
69
|
-
|
70
|
-
def self.get(type_name, key)
|
71
|
-
case key
|
72
|
-
when :MIME
|
73
|
-
type_mimetypes(type_name).first
|
74
|
-
when :PUID
|
75
|
-
type_puids(type_name).first
|
76
|
-
when :EXTENSION
|
77
|
-
type_extentions(type_name).first
|
78
|
-
else
|
79
|
-
typeinfo(type_name)[key]
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
def self.type_group(ftype)
|
84
|
-
typeinfo(ftype)[:GROUP]
|
85
|
-
end
|
86
|
-
|
87
|
-
def self.type_mimetypes(ftype)
|
88
|
-
typeinfo(ftype)[:MIME] || []
|
89
|
-
end
|
90
|
-
|
91
|
-
def self.type_puids(ftype)
|
92
|
-
typeinfo(ftype)[:PUID] || []
|
93
|
-
end
|
94
|
-
|
95
|
-
def self.type_extentions(ftype)
|
96
|
-
typeinfo(ftype)[:EXTENSIONS] || []
|
97
|
-
end
|
98
|
-
|
99
|
-
def self.typeinfo(ftype)
|
100
|
-
@implementation.typeinfo(ftype)
|
101
|
-
end
|
102
|
-
|
103
|
-
def self.group_types(group)
|
104
|
-
@implementation.group_types(group)
|
105
|
-
end
|
106
|
-
|
107
|
-
def self.puid_infos(puid)
|
108
|
-
@implementation.puid_infos(puid)
|
109
|
-
end
|
110
|
-
|
111
|
-
def self.puid_types(puid)
|
112
|
-
@implementation.puid_types(puid)
|
113
|
-
end
|
114
|
-
|
115
|
-
def self.puid_groups(puid)
|
116
|
-
puid_types(puid).map(&method(:type_group))
|
117
|
-
end
|
118
|
-
|
119
|
-
def self.mime_infos(mime)
|
120
|
-
@implementation.mime_infos(mime)
|
121
|
-
end
|
122
|
-
|
123
|
-
def self.mime_types(mime)
|
124
|
-
@implementation.mime_types(mime)
|
125
|
-
end
|
126
|
-
|
127
|
-
def self.mime_groups(mime)
|
128
|
-
mime_types(mime).map(&method(:type_group))
|
129
|
-
end
|
130
|
-
|
131
|
-
def self.ext_infos(ext)
|
132
|
-
@implementation.ext_infos(ext)
|
133
|
-
end
|
134
|
-
|
135
|
-
def self.ext_types(ext)
|
136
|
-
@implementation.ext_types(ext)
|
137
|
-
end
|
138
|
-
|
139
|
-
def self.puid_typeinfo(puid)
|
140
|
-
@implementation.puid_typeinfo(puid)
|
141
|
-
end
|
142
|
-
|
143
|
-
def self.known_mime?(mime)
|
144
|
-
@implementation.known_mime?(mime)
|
145
|
-
end
|
146
|
-
|
147
|
-
def self.groups
|
148
|
-
@implementation.groups
|
149
|
-
end
|
150
|
-
|
151
|
-
def self.export_csv(filename, **options)
|
152
|
-
@implementation.export_csv(filename, **options)
|
153
|
-
end
|
154
|
-
end
|
155
|
-
end
|
156
|
-
end
|
@@ -1,153 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'singleton'
|
4
|
-
require 'yaml'
|
5
|
-
require 'csv'
|
6
|
-
|
7
|
-
require 'libis/tools/logger'
|
8
|
-
require 'libis/tools/extend/hash'
|
9
|
-
require 'libis/tools/extend/string'
|
10
|
-
require 'libis/tools/extend/symbol'
|
11
|
-
|
12
|
-
module Libis
|
13
|
-
module Format
|
14
|
-
class TypeDatabaseImpl
|
15
|
-
include Singleton
|
16
|
-
include ::Libis::Tools::Logger
|
17
|
-
|
18
|
-
def typeinfo(ftype)
|
19
|
-
@types[ftype.to_sym] || {}
|
20
|
-
end
|
21
|
-
|
22
|
-
def group_types(group)
|
23
|
-
@types.select do |_, v|
|
24
|
-
v[:GROUP] == group.to_sym
|
25
|
-
end.keys
|
26
|
-
end
|
27
|
-
|
28
|
-
def puid_infos(puid)
|
29
|
-
@types.select do |_, v|
|
30
|
-
v[:PUID].include? puid
|
31
|
-
rescue StandardError
|
32
|
-
false
|
33
|
-
end.values
|
34
|
-
end
|
35
|
-
|
36
|
-
def puid_types(puid)
|
37
|
-
@types.select do |_, v|
|
38
|
-
v[:PUID].include? puid
|
39
|
-
rescue StandardError
|
40
|
-
false
|
41
|
-
end.keys
|
42
|
-
end
|
43
|
-
|
44
|
-
def mime_infos(mime)
|
45
|
-
@types.select do |_, v|
|
46
|
-
v[:MIME].include? mime
|
47
|
-
rescue StandardError
|
48
|
-
false
|
49
|
-
end.values
|
50
|
-
end
|
51
|
-
|
52
|
-
def mime_types(mime)
|
53
|
-
@types.select do |_, v|
|
54
|
-
v[:MIME].include? mime
|
55
|
-
rescue StandardError
|
56
|
-
false
|
57
|
-
end.keys
|
58
|
-
end
|
59
|
-
|
60
|
-
def ext_infos(ext)
|
61
|
-
ext = ext.gsub(/^\./, '')
|
62
|
-
@types.select do |_, v|
|
63
|
-
v[:EXTENSIONS].include?(ext)
|
64
|
-
rescue StandardError
|
65
|
-
false
|
66
|
-
end.values
|
67
|
-
end
|
68
|
-
|
69
|
-
def ext_types(ext)
|
70
|
-
ext = ext.gsub(/^\./, '')
|
71
|
-
@types.select do |_, v|
|
72
|
-
v[:EXTENSIONS].include?(ext)
|
73
|
-
rescue StandardError
|
74
|
-
false
|
75
|
-
end.keys
|
76
|
-
end
|
77
|
-
|
78
|
-
def puid_typeinfo(puid)
|
79
|
-
@types.each do |_, v|
|
80
|
-
return v if v[:PUID]&.include?(puid)
|
81
|
-
end
|
82
|
-
nil
|
83
|
-
end
|
84
|
-
|
85
|
-
def known_mime?(mime)
|
86
|
-
@types.each do |_, v|
|
87
|
-
return true if v[:MIME]&.include? mime
|
88
|
-
end
|
89
|
-
false
|
90
|
-
end
|
91
|
-
|
92
|
-
def groups
|
93
|
-
@types.values.map(&:dig.with(:GROUP)).uniq
|
94
|
-
end
|
95
|
-
|
96
|
-
def export_csv(filename, **options)
|
97
|
-
headers = @types.values.each_with_object(Set.new) { |v, s| v.each_key { |k| s << k.to_s } }
|
98
|
-
options[:headers] = headers.to_a
|
99
|
-
CSV.open(filename, 'w', **options) do |csv|
|
100
|
-
@types.each_value do |v|
|
101
|
-
csv << CSV::Row.new(v.keys, v.values.map { |x| x.is_a?(Array) ? x.join(', ') : x })
|
102
|
-
end
|
103
|
-
end
|
104
|
-
end
|
105
|
-
|
106
|
-
def load_types(file_or_hash = {}, append = true)
|
107
|
-
hash = file_or_hash.is_a?(Hash) ? file_or_hash : YAML.load_file(file_or_hash)
|
108
|
-
# noinspection RubyResolve
|
109
|
-
hash.each do |group, type_info|
|
110
|
-
type_info.each do |type_name, info|
|
111
|
-
type_key = type_name.to_sym
|
112
|
-
info.symbolize_keys!
|
113
|
-
info[:TYPE] = type_key
|
114
|
-
info[:GROUP] = group.to_sym
|
115
|
-
info[:MIME] = begin
|
116
|
-
info[:MIME].strip.split(/[\s,]+/).map(&:strip)
|
117
|
-
rescue StandardError
|
118
|
-
[]
|
119
|
-
end
|
120
|
-
info[:EXTENSIONS] = begin
|
121
|
-
info[:EXTENSIONS].strip.split(/[\s,]+/).map(&:strip)
|
122
|
-
rescue StandardError
|
123
|
-
[]
|
124
|
-
end
|
125
|
-
info[:PUID] = info[:PUID].strip.split(/[\s,]+/).map(&:strip) if info[:PUID]
|
126
|
-
if @types.key?(type_key)
|
127
|
-
warn 'Type %s already defined; merging with info from %s.', type_name, file_or_hash
|
128
|
-
info.merge!(@types[type_key]) do |_, v_new, v_old|
|
129
|
-
case v_old
|
130
|
-
when Array
|
131
|
-
append ? v_old + v_new : v_new + v_old
|
132
|
-
when Hash
|
133
|
-
append ? v_new.merge(v_old) : v_old.merge(v_new)
|
134
|
-
else
|
135
|
-
append ? v_old : v_new
|
136
|
-
end
|
137
|
-
end
|
138
|
-
end
|
139
|
-
@types[type_key] = info
|
140
|
-
end
|
141
|
-
end
|
142
|
-
end
|
143
|
-
|
144
|
-
protected
|
145
|
-
|
146
|
-
def initialize
|
147
|
-
@types = {}
|
148
|
-
type_database = Libis::Format::Config[:type_database]
|
149
|
-
load_types(type_database)
|
150
|
-
end
|
151
|
-
end
|
152
|
-
end
|
153
|
-
end
|