checking-you-out 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +661 -0
- data/README.md +14 -0
- data/bin/are-we-unallocated-yet +9 -0
- data/bin/benchmark +66 -0
- data/bin/checking-you-out +8 -0
- data/bin/repl +7 -0
- data/bin/test-my-best +4 -0
- data/lib/checking-you-out.rb +40 -0
- data/lib/checking-you-out/auslandsgesprach.rb +253 -0
- data/lib/checking-you-out/ghost_revival.rb +71 -0
- data/lib/checking-you-out/ghost_revival/mr_mime.rb +390 -0
- data/lib/checking-you-out/ghost_revival/xross_infection.rb +146 -0
- data/lib/checking-you-out/inner_spirit.rb +215 -0
- data/lib/checking-you-out/party_starter.rb +202 -0
- data/lib/checking-you-out/party_starter/stick_around.rb +260 -0
- data/lib/checking-you-out/party_starter/weighted_action.rb +41 -0
- data/lib/checking-you-out/sweet_sweet_love_magic.rb +226 -0
- data/mime/packages/distorted-types.xml +68 -0
- data/mime/packages/third-party/shared-mime-info/freedesktop.org.xml.in +7672 -0
- data/mime/packages/third-party/tika-mimetypes/tika-mimetypes.xml +2762 -0
- metadata +232 -0
data/README.md
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
CHECKING::YOU::OUT is DistorteD's file-identification library, originally a
|
2
|
+
`ruby-mime-types` wrapper/monkey-patch — now its own full implementation
|
3
|
+
of a `shared-mime-info` parser and in-memory representation.
|
4
|
+
|
5
|
+
Designed with speed and memory-efficiency in mind. See `bin/benchmark` for more.
|
6
|
+
|
7
|
+
Prior Art:
|
8
|
+
https://github.com/mime-types/ruby-mime-types
|
9
|
+
https://github.com/hanklords/shared-mime-info
|
10
|
+
https://github.com/mimemagicrb/mimemagic
|
11
|
+
https://github.com/discourse/mini_mime
|
12
|
+
https://github.com/rails/marcel/
|
13
|
+
https://github.com/blackwinter/ruby-filemagic/
|
14
|
+
https://github.com/glongman/ffiruby-filemagic/
|
data/bin/benchmark
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'benchmark/ips'
|
4
|
+
require 'memory_profiler'
|
5
|
+
|
6
|
+
# Add our development CYO to LOAD_PATH so we can run it without Bundler.
|
7
|
+
$: << File.expand_path('../../lib', __FILE__)
|
8
|
+
|
9
|
+
puts
|
10
|
+
puts "Memory stats for requiring mime/types/columnar"
|
11
|
+
result = MemoryProfiler.report do
|
12
|
+
require 'mime/types/columnar'
|
13
|
+
end
|
14
|
+
|
15
|
+
# Note: Some creative accounting is necessary to accurately count the number of unique `Mime::Type`s
|
16
|
+
# included in this library's data, since it treats aliases as separate types, e.g.:
|
17
|
+
# "doc"=>[#<MIME::Type: application/msword>, #<MIME::Type: application/word>, #<MIME::Type: application/x-msword>, #<MIME::Type: application/x-word>],
|
18
|
+
# "rtf"=>[#<MIME::Type: application/rtf>, #<MIME::Type: application/x-rtf>, #<MIME::Type: text/rtf>, #<MIME::Type: text/x-rtf>],
|
19
|
+
# "csv"=>[#<MIME::Type: text/comma-separated-values>, #<MIME::Type: text/csv>],
|
20
|
+
#
|
21
|
+
# I recognize that there is some overlap between `:preferred_extension`s where the types are *not*
|
22
|
+
# the same thing, but this is the best way I have to get a count anywhere close to correct.
|
23
|
+
# You can see for yourself with `MIME::Types.to_a.group_by(&:preferred_extension)` to double-check me.
|
24
|
+
#
|
25
|
+
# There are also many many `MIME::Type`s with no recorded extname at all, and I am intentionally
|
26
|
+
# not counting them because this library doesn't do content-matching, meaning without that filename
|
27
|
+
# metadata there's no other way we could identify files of that type with this library.
|
28
|
+
# irb> MIME::Types.to_a.group_by(&:preferred_extension)[nil].size => 1350
|
29
|
+
puts "Total known file extensions: #{MIME::Types.to_a.group_by(&:preferred_extension).size}"
|
30
|
+
puts "Total allocated: #{result.total_allocated_memsize} bytes (#{result.total_allocated} objects)"
|
31
|
+
puts "Total retained: #{result.total_retained_memsize} bytes (#{result.total_retained} objects)"
|
32
|
+
|
33
|
+
puts
|
34
|
+
puts "Memory stats for requiring mini_mime"
|
35
|
+
result = MemoryProfiler.report do
|
36
|
+
require 'mini_mime'
|
37
|
+
end
|
38
|
+
|
39
|
+
puts "Total known file extensions: #{File.readlines(MiniMime::Configuration.ext_db_path).size}"
|
40
|
+
puts "Total allocated: #{result.total_allocated_memsize} bytes (#{result.total_allocated} objects)"
|
41
|
+
puts "Total retained: #{result.total_retained_memsize} bytes (#{result.total_retained} objects)"
|
42
|
+
|
43
|
+
puts
|
44
|
+
puts "Memory stats for requiring CHECKING-YOU-OUT"
|
45
|
+
result = MemoryProfiler.report do
|
46
|
+
require 'checking-you-out'
|
47
|
+
end
|
48
|
+
|
49
|
+
puts "Total known file extensions: #{CHECKING::YOU::OUT::instance_variable_get(:@after_forever).size}"
|
50
|
+
puts "Total allocated: #{result.total_allocated_memsize} bytes (#{result.total_allocated} objects)"
|
51
|
+
puts "Total retained: #{result.total_retained_memsize} bytes (#{result.total_retained} objects)"
|
52
|
+
puts
|
53
|
+
|
54
|
+
Benchmark.ips do |bm|
|
55
|
+
bm.report 'cached content_type lookup MiniMime' do
|
56
|
+
MiniMime.lookup_by_filename("a.txt").content_type
|
57
|
+
end
|
58
|
+
|
59
|
+
bm.report 'content_type lookup MIME::Types' do
|
60
|
+
MIME::Types.type_for("a.txt")[0].content_type
|
61
|
+
end
|
62
|
+
|
63
|
+
bm.report 'content_type lookup CHECKING::YOU::OUT' do
|
64
|
+
CHECKING::YOU::OUT::from_postfix("txt")
|
65
|
+
end
|
66
|
+
end
|
data/bin/repl
ADDED
data/bin/test-my-best
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'pathname' unless defined? ::Pathname
|
2
|
+
|
3
|
+
# Silence warning for Ractor use in Ruby 3.0.
|
4
|
+
# See https://ruby-doc.org/core/Warning.html#method-c-5B-5D for more.
|
5
|
+
# TODO: Remove this when Ractors are "stable".
|
6
|
+
Warning[:experimental] = false
|
7
|
+
|
8
|
+
|
9
|
+
class CHECKING; end
|
10
|
+
require_relative 'checking-you-out/inner_spirit' unless defined? ::CHECKING::YOU::IN
|
11
|
+
|
12
|
+
|
13
|
+
# I'm not trying to be an exact clone of `shared-mime-info`, but I think its "Recommended checking order"
|
14
|
+
# is pretty sane: https://specifications.freedesktop.org/shared-mime-info-spec/latest/
|
15
|
+
#
|
16
|
+
# In addition to the above, CYO() supports IETF-style Media Type strings like "application/xhtml+xml"
|
17
|
+
# and supports `stat`-less testing of `.extname`-style Strings.
|
18
|
+
class CHECKING::YOU
|
19
|
+
def self.OUT(unknown_identifier, so_deep: true)
|
20
|
+
case unknown_identifier
|
21
|
+
when ::Pathname
|
22
|
+
TEST_EXTANT_PATHNAME.call(unknown_identifier)
|
23
|
+
when ::String
|
24
|
+
case
|
25
|
+
when unknown_identifier.count(-?/) == 1 then # TODO: Additional String validation here.
|
26
|
+
::CHECKING::YOU::OUT::from_ietf_media_type(unknown_identifier)
|
27
|
+
when unknown_identifier.start_with?(-?.) && unknown_identifier.count(-?.) == 1 then
|
28
|
+
::CHECKING::YOU::OUT::from_pathname(unknown_identifier)
|
29
|
+
else
|
30
|
+
if File::exist?(File::expand_path(unknown_identifier)) and so_deep then
|
31
|
+
TEST_EXTANT_PATHNAME.call(Pathname.new(File::expand_path(unknown_identifier)))
|
32
|
+
else
|
33
|
+
LEGENDARY_HEAVY_GLOW.call(::CHECKING::YOU::OUT::from_glob(unknown_identifier), :weight) || ::CHECKING::YOU::OUT::from_postfix(unknown_identifier)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
when ::CHECKING::YOU::IN
|
37
|
+
unknown_identifier.out
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,253 @@
|
|
1
|
+
|
2
|
+
# https://github.com/jarib/ffi-xattr
|
3
|
+
require 'ffi-xattr'
|
4
|
+
|
5
|
+
|
6
|
+
module CHECKING::YOU::IN::AUSLANDSGESPRÄCH
|
7
|
+
|
8
|
+
# Parse IETF Media-Type String → `::CHECKING::YOU::IN`
|
9
|
+
FROM_IETF_TYPE = proc {
|
10
|
+
# Keep these allocated instead of fragmenting our heap, since this will be called very frequently.
|
11
|
+
scratch = Array.allocate
|
12
|
+
hold = Array.allocate
|
13
|
+
my_base = ::CHECKING::YOU::IN::allocate
|
14
|
+
|
15
|
+
# Clear out the contents of the above temporary vars,
|
16
|
+
# called to ensure we never leak the contents of one parse into another.
|
17
|
+
the_bomb = proc {
|
18
|
+
scratch.clear
|
19
|
+
hold.clear
|
20
|
+
my_base.members.each { |gentleman|
|
21
|
+
my_base[gentleman] = nil
|
22
|
+
}
|
23
|
+
}
|
24
|
+
|
25
|
+
# Take a single codepoint from a reversed-then-NULL-terminated IETF Type String,
|
26
|
+
# e.g. "ttub=traf;lmbe+fnb.ppg3.dnv/noitacilppa#{-?\u{0}}".
|
27
|
+
#
|
28
|
+
#
|
29
|
+
# I switched from `String#each_char` to `String#each_codepoint` to avoid allocating single-character Strings
|
30
|
+
# before they could be deduplicated with `-zig`. The Integer codepoints, on the other hand,
|
31
|
+
# will always be the same `object_id` for the same codepoint:
|
32
|
+
#
|
33
|
+
# rb(main):162:0> -"あああ".each_char { |c| p c.object_id }
|
34
|
+
# 420
|
35
|
+
# 440
|
36
|
+
# 460
|
37
|
+
#
|
38
|
+
# rb(main):163:0> -"あああ".each_codepoint { |c| p c.object_id }
|
39
|
+
# 4709
|
40
|
+
# 4709
|
41
|
+
# 4709
|
42
|
+
#
|
43
|
+
#
|
44
|
+
# Putting the codepoints back together with `Array#pack` is the fastest way I've found,
|
45
|
+
# but it is mildly annoying that it won't intern the packed Strings in the C `pack_pack` code,
|
46
|
+
# which lives here: https://github.com/ruby/ruby/blob/master/pack.c
|
47
|
+
#
|
48
|
+
# This means that interning them here in Ruby-land forces us to eat the spurious allocation
|
49
|
+
# and is generally slower than it needs to be, e.g. `memory_profiler` report without post-allocation interning:
|
50
|
+
#
|
51
|
+
# Retained String Report
|
52
|
+
# -----------------------------------
|
53
|
+
# 1038 "application"
|
54
|
+
# 1037 <internal:pack>:135
|
55
|
+
#
|
56
|
+
# 171 "text"
|
57
|
+
# 170 <internal:pack>:135
|
58
|
+
#
|
59
|
+
# 140 "image"
|
60
|
+
# 139 <internal:pack>:135
|
61
|
+
# […]
|
62
|
+
# -----------------------------------
|
63
|
+
#
|
64
|
+
# …vs `memory_profiler` report *with* post-allocation interning, where the duplicate packed Strings
|
65
|
+
# are now "Allocated" instead of "Retained", i.e. they will be GCed:
|
66
|
+
#
|
67
|
+
# Allocated String Report
|
68
|
+
# -----------------------------------
|
69
|
+
# 2863 ""
|
70
|
+
# 2818 /home/okeeblow/Works/DistorteD/CHECKING-YOU-OUT/lib/checking-you-out/party_starter.rb:90
|
71
|
+
#
|
72
|
+
# 2271 "application"
|
73
|
+
# 2269 <internal:pack>:135
|
74
|
+
# […]
|
75
|
+
# -----------------------------------
|
76
|
+
#
|
77
|
+
# This ends up being a difference of ~2000 Objects for us, comparing the same before/after as above:
|
78
|
+
#
|
79
|
+
# [okeeblow@emi#CHECKING-YOU-OUT] ./bin/are-we-unallocated-yet|grep Total
|
80
|
+
# Total allocated: 18178861 bytes (318640 objects)
|
81
|
+
# Total retained: 1999675 bytes (26815 objects)
|
82
|
+
# [okeeblow@emi#CHECKING-YOU-OUT] ./bin/are-we-unallocated-yet|grep Total
|
83
|
+
# Total allocated: 18231779 bytes (319963 objects)
|
84
|
+
# Total retained: 1926675 bytes (24996 objects)
|
85
|
+
move_zig = proc { |zig|
|
86
|
+
case zig
|
87
|
+
when 0 then # NULL
|
88
|
+
my_base[:phylum] = scratch.reverse!.pack(-'U*').-@
|
89
|
+
when 61 then # =
|
90
|
+
# TODO: Implement Fragment-based Type variations
|
91
|
+
hold.push(*scratch)
|
92
|
+
scratch.clear
|
93
|
+
when 59 then # ;
|
94
|
+
# TODO: Implement Fragment-based Type variations
|
95
|
+
scratch.clear
|
96
|
+
hold.clear
|
97
|
+
when 43 then # +
|
98
|
+
#TODO: Implement tagged parent Types e.g. `+xml`
|
99
|
+
scratch.clear
|
100
|
+
when 47 then # /
|
101
|
+
# When this character is encountered in a reversed Type String, `scratch` will contain the facet
|
102
|
+
# which lets us determine if this Type belongs to a vendor tree, to the e`x`perimental tree, etc.
|
103
|
+
my_base[:kingdom] = case
|
104
|
+
when scratch[-3..] == (-'dnv').codepoints then
|
105
|
+
scratch.pop(3);
|
106
|
+
# https://datatracker.ietf.org/doc/html/rfc6838#section-3.2
|
107
|
+
# We will be in a vendor tree, but let's additionally inspect `hold` to count its facets.
|
108
|
+
# There will be only a single facet for vendor-tree types like `application/vnd.wordperfect`.
|
109
|
+
# There will be multiple facets for vendor-tree types like `application/vnd.tcpdump.pcap`.
|
110
|
+
#
|
111
|
+
# If we have multiple facets, split the (reversed) last facet out and use it as the vendor-tree name,
|
112
|
+
# e.g. for `application/vnd/tcpdump.pcap` we will use `tcpdump` as the tree naame instead of `vnd`,
|
113
|
+
# in fact not even storing the `vnd` at all.
|
114
|
+
#
|
115
|
+
# This increases the likelihood of `hold`'s remainder fitting inside a single RValue,
|
116
|
+
# e.g. for yuge Types like `application/vnd.oasis.opendocument.graphics` we will store `oasis`
|
117
|
+
# and `opendocument.graphics` (fits!) instead of `vnd` and `oasis.opendocument.graphics` (doesn't fit!).
|
118
|
+
#
|
119
|
+
# The dropped `vnd` will be reconstructed by `CYO#to_s` when it detects a non-standard tree name.
|
120
|
+
hold.rindex(46) ? -hold.slice!(hold.rindex(46)..).reverse!.tap(&:pop).pack(-'U*') : -'vnd'
|
121
|
+
when scratch[-3..] == (-'srp').codepoints then
|
122
|
+
# https://datatracker.ietf.org/doc/html/rfc6838#section-3.3
|
123
|
+
# "Media types created experimentally or as part of products that are not distributed commercially".
|
124
|
+
# This is mostly an early-Internet legacy and there are only a few of these in `shared-mime-info`,
|
125
|
+
# e.g. `audio/prs.sid` for the C=64 Sound Interface Device audio format,
|
126
|
+
# but they can still be registered.
|
127
|
+
scratch.pop(3); -'prs'
|
128
|
+
when scratch[-5..] == (-'-sm-x').codepoints then
|
129
|
+
# Microsoft formats like `text/x-ms-regedit`.
|
130
|
+
# I'm treating this separately from the IETF `x-` tree just because there are so many of them,
|
131
|
+
# and it's nice to keep Winders formats logically-grouped.
|
132
|
+
scratch.pop(5); -'x-ms'
|
133
|
+
when scratch[-2..] == (-'-x').codepoints then
|
134
|
+
# Deprecated experimental tree (`x-`): https://datatracker.ietf.org/doc/html/rfc6648
|
135
|
+
# I'm giving this deprecated tree the canonical `x` tree in CYO because it has legacy dating back
|
136
|
+
# to the mid '70s and has many many many more Types than post-2012 `x.` tree,
|
137
|
+
# RE: https://datatracker.ietf.org/doc/html/rfc6648#appendix-A
|
138
|
+
scratch.pop(2); -?x
|
139
|
+
when scratch.one? && scratch.last == 100 then # x
|
140
|
+
# Faceted experimental tree (`x.`): https://datatracker.ietf.org/doc/html/rfc6838#section-3.4
|
141
|
+
# There are only a few of these since "use of both `x-` and `x.` forms is discouraged",
|
142
|
+
# e.g. `model/x.stl-binary`, and there aren't likely to be many more.
|
143
|
+
scratch.pop; -'kayo-dot'
|
144
|
+
else
|
145
|
+
# Otherwise we are in the "standards" tree: https://datatracker.ietf.org/doc/html/rfc6838#section-3.1
|
146
|
+
-'possum'
|
147
|
+
end.-@
|
148
|
+
# Everything remaining in `hold` and `scratch` will comprise the most-specific Type component.
|
149
|
+
hold.push(*scratch)
|
150
|
+
my_base[:genus] = hold.reverse!.pack(-'U*').-@
|
151
|
+
scratch.clear
|
152
|
+
hold.clear
|
153
|
+
when 46 then # .
|
154
|
+
hold << 46 unless hold.empty?
|
155
|
+
hold.push(*scratch)
|
156
|
+
scratch.clear
|
157
|
+
else
|
158
|
+
scratch << zig
|
159
|
+
end
|
160
|
+
}
|
161
|
+
|
162
|
+
# 𝘐𝘛'𝘚 𝘠𝘖𝘜 !!
|
163
|
+
cats = ->(gentlemen) {
|
164
|
+
gentlemen.each_codepoint.reverse_each(&move_zig)
|
165
|
+
move_zig.call(0)
|
166
|
+
return my_base.dup.freeze.tap(&the_bomb)
|
167
|
+
}
|
168
|
+
-> (gentlemen) {
|
169
|
+
return cats.call((gentlemen.encoding == Encoding::UTF_8) ? gentlemen : gentlemen.encode(Encoding::UTF_8))
|
170
|
+
}
|
171
|
+
}.call
|
172
|
+
|
173
|
+
# Call the above singleton Proc to do the thing.
|
174
|
+
def from_ietf_media_type(ietf_string)
|
175
|
+
return if ietf_string.nil?
|
176
|
+
FROM_IETF_TYPE.call(ietf_string)
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
module CHECKING::YOU::IN::INLANDGESPRÄCH
|
181
|
+
# Non-IETF-tree as a CY(I|O)'s `kingdom` signifies the need for a leading `vnd.` facet
|
182
|
+
# when reconstructing the Media-Type String.
|
183
|
+
IETF_TREES = [
|
184
|
+
# Current top-level registries are shown here: https://www.iana.org/assignments/media-types/media-types.xhtml
|
185
|
+
# The latest addition reflected here is `font` from RFC 8081: https://datatracker.ietf.org/doc/html/rfc8081
|
186
|
+
-'application',
|
187
|
+
-'audio',
|
188
|
+
-'example',
|
189
|
+
-'font',
|
190
|
+
-'image',
|
191
|
+
-'message',
|
192
|
+
-'model',
|
193
|
+
-'multipart',
|
194
|
+
-'text',
|
195
|
+
-'video',
|
196
|
+
]
|
197
|
+
|
198
|
+
# Reconstruct an IETF Media-Type String from a loaded CYI/CYO's `#members`
|
199
|
+
def to_s
|
200
|
+
# TODO: Fragments (e.g. `;what=ever`), and syntax identifiers (e.g. `+xml`)
|
201
|
+
-(String.new(encoding: Encoding::UTF_8, capacity: 128) << self.phylum.to_s << -'/' << case
|
202
|
+
when self.kingdom == -'kayo-dot' then -'x.'
|
203
|
+
when self.kingdom == -?x then -'x-'
|
204
|
+
when self.kingdom == -'x-ms' then -'x-ms-'
|
205
|
+
when self.kingdom == -'prs' then -'prs.'
|
206
|
+
when self.kingdom == -'vnd' then -'vnd.'
|
207
|
+
when self.kingdom == -'possum' then nil.to_s
|
208
|
+
when !IETF_TREES.include?(self.kingdom.to_s) then 'vnd.' << self.kingdom.to_s << -'.'
|
209
|
+
else self.kingdom.to_s << -'.'
|
210
|
+
end << self.genus.to_s)
|
211
|
+
end
|
212
|
+
|
213
|
+
# Pretty-print objects using our custom `#:to_s`
|
214
|
+
def inspect
|
215
|
+
"#<#{self.class.to_s} #{self.to_s}>"
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
module CHECKING::YOU::OUT::AUSLANDSGESPRÄCH
|
220
|
+
|
221
|
+
def from_ietf_media_type(ietf_string)
|
222
|
+
return if ietf_string.nil?
|
223
|
+
self.new(super)
|
224
|
+
end
|
225
|
+
|
226
|
+
# CHECK OUT a filesystem path.
|
227
|
+
# This might be a String, or might be an instance of the actual stdlib class `Pathname`:
|
228
|
+
# https://ruby-doc.org/stdlib/libdoc/pathname/rdoc/Pathname.html
|
229
|
+
def from_xattr(pathname)
|
230
|
+
# T0DO: Handle relative paths and all the other corner cases that could be here when given String.
|
231
|
+
|
232
|
+
# Check the filesystem extended attributes for manually-defined types.
|
233
|
+
#
|
234
|
+
# The freedesktop-dot-org specification is `user.mime_type`:
|
235
|
+
# https://www.freedesktop.org/wiki/CommonExtendedAttributes/
|
236
|
+
#
|
237
|
+
# At least one other application I can find (lighttpd a.k.a. "lighty")
|
238
|
+
# will use `Content-Type` just like would be found in an HTTP header:
|
239
|
+
# https://redmine.lighttpd.net/projects/1/wiki/Mimetype_use-xattrDetails
|
240
|
+
#
|
241
|
+
# Both of these should contain IETF-style `media/sub`-type Strings,
|
242
|
+
# but they are technically freeform and must be assumed to contain anything.
|
243
|
+
# It's very very unlikely that anybody will ever use one of these at all,
|
244
|
+
# but hey how cool is it that we will support it if they do? :)
|
245
|
+
#
|
246
|
+
# T0DO: Figure out if NTFS has anything to offer us since `ffi-xattr` does support Winders.
|
247
|
+
# https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-fscc/a82e9105-2405-4e37-b2c3-28c773902d85
|
248
|
+
from_ietf_media_type(
|
249
|
+
Xattr.new(pathname).to_h.slice('user.mime_type', 'Content-Type').values.first
|
250
|
+
)
|
251
|
+
end
|
252
|
+
|
253
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
|
2
|
+
require_relative 'ghost_revival/mr_mime' unless defined? ::CHECKING::YOU::MrMIME
|
3
|
+
require_relative 'ghost_revival/xross_infection' unless defined? ::CHECKING::YOU::OUT::XROSS_INFECTION
|
4
|
+
|
5
|
+
module CHECKING::YOU::OUT::GHOST_REVIVAL
|
6
|
+
|
7
|
+
# Filename for the main fdo `shared-mime-info` source XML.
|
8
|
+
# We will look for this file in system `XDG_DATA_DIRS` and use our own bundled copy
|
9
|
+
# if the system version is missing or outdated.
|
10
|
+
FDO_MIMETYPES_FILENAME = -'freedesktop.org.xml'
|
11
|
+
|
12
|
+
# For now, unconditionally load all available files at startup.
|
13
|
+
# TODO: Support partial on-the-fly loading à la `mini_mime`.
|
14
|
+
def self.extended(otra)
|
15
|
+
# Init a handler that will be passed to multiple instances of Ox::sax_parse()
|
16
|
+
# for our multiple data files. I will just read them sequentially for now.
|
17
|
+
handler = ::CHECKING::YOU::MrMIME.new
|
18
|
+
|
19
|
+
# CYO bundles a copy of `freedesktop.org.xml` from `shared-mime-info` but will prefer a system-level copy
|
20
|
+
# if one is available and not out of date. This flag will be disabled if we find a suitable copy,
|
21
|
+
# otherwise our bundled copy will be loaded after we finish scanning the PATHs givin in our environment.
|
22
|
+
load_bundled_fdo_xml = true
|
23
|
+
|
24
|
+
# Search `XDG_DATA_DIRS` for any additional `shared-mime-info`-format data files we can load,
|
25
|
+
# hopefully including the all-important `freedesktop.org.xml`.
|
26
|
+
::CHECKING::YOU::OUT::XROSS_INFECTION::XDG.DATA.push(
|
27
|
+
# Append out Gem-local path to the very end (lowest priority)
|
28
|
+
::CHECKING::YOU::OUT::GEM_ROOT.call
|
29
|
+
).map {
|
30
|
+
# Add path fragments for finding `shared-mime-info` package files.
|
31
|
+
# This same subdir path applies when searching *any* `PATH` for `shared-mime-info` XML,
|
32
|
+
# e.g. '/usr/share' + 'mime/packages' <-- this part
|
33
|
+
# For consistency the same path is used for our local data under the Gem root.
|
34
|
+
_1.join(-'mime', -'packages')
|
35
|
+
}.flat_map {
|
36
|
+
# Find all XML files under all subdirectories of all given `Pathname`s.
|
37
|
+
#
|
38
|
+
# `#glob` follows the same conventions as `File::fnmatch?`:
|
39
|
+
# https://ruby-doc.org/core-3.0.2/File.html#method-c-fnmatch
|
40
|
+
#
|
41
|
+
# `EXTGLOB` enables the brace-delimited glob syntax, used here to allow an optional `'.in'` extname
|
42
|
+
# as found on the `'freedesktop.org.xml.in'` bundled with our Gem since I don't want to rename
|
43
|
+
# the file from the XDG repo even though that extname means they don't want us to use that file directly.
|
44
|
+
_1.glob(File.join(-'**', -'*.xml{.in,}'), File::FNM_EXTGLOB)
|
45
|
+
}.each_with_object(::CHECKING::YOU::OUT::GEM_ROOT.call) { |xml_path, gem_root|
|
46
|
+
|
47
|
+
# Load the bundled `shared-mime-info` database if the system-level one exists but is out of date
|
48
|
+
# compared to our Gem. Using `String#include?` here since the system-level file will be
|
49
|
+
# `'freedesktop.org.xml'` but the bundled copy will be `'freedesktop.org.xml.in'`.
|
50
|
+
if xml_path.basename.to_s.include?(FDO_MIMETYPES_FILENAME)
|
51
|
+
# `Pathname#ascend` returns an `Enumerator` of `Pathname`s up one level at a time until reaching fs root.
|
52
|
+
# If *any* of these are equal to `GEM_ROOT` then we have found the bundled copy, otherwise system copy.
|
53
|
+
if (xml_path.ascend { break true if _1 == gem_root} || false) then
|
54
|
+
# Found bundled copy.
|
55
|
+
# A new-enough system-level copy will disable this flag to prevent loading outdated bundled data.
|
56
|
+
next unless load_bundled_fdo_xml
|
57
|
+
else
|
58
|
+
# Found system-level copy.
|
59
|
+
# Use this if it's newer than our Gem, and set a flag to prevent loading the bundled copy if so.
|
60
|
+
next if ::CHECKING::YOU::OUT::GEM_PACKAGE_TIME.call > xml_path.mtime
|
61
|
+
load_bundled_fdo_xml = false
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# If we made it here (no `next` above) then we should load the file.
|
66
|
+
handler.open(xml_path)
|
67
|
+
|
68
|
+
} # each_with_object
|
69
|
+
|
70
|
+
end # def self.extended
|
71
|
+
end # module CHECKING::YOU::OUT::GHOST_REVIVAL
|