checking-you-out 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +661 -0
- data/README.md +14 -0
- data/bin/are-we-unallocated-yet +9 -0
- data/bin/benchmark +66 -0
- data/bin/checking-you-out +8 -0
- data/bin/repl +7 -0
- data/bin/test-my-best +4 -0
- data/lib/checking-you-out.rb +40 -0
- data/lib/checking-you-out/auslandsgesprach.rb +253 -0
- data/lib/checking-you-out/ghost_revival.rb +71 -0
- data/lib/checking-you-out/ghost_revival/mr_mime.rb +390 -0
- data/lib/checking-you-out/ghost_revival/xross_infection.rb +146 -0
- data/lib/checking-you-out/inner_spirit.rb +215 -0
- data/lib/checking-you-out/party_starter.rb +202 -0
- data/lib/checking-you-out/party_starter/stick_around.rb +260 -0
- data/lib/checking-you-out/party_starter/weighted_action.rb +41 -0
- data/lib/checking-you-out/sweet_sweet_love_magic.rb +226 -0
- data/mime/packages/distorted-types.xml +68 -0
- data/mime/packages/third-party/shared-mime-info/freedesktop.org.xml.in +7672 -0
- data/mime/packages/third-party/tika-mimetypes/tika-mimetypes.xml +2762 -0
- metadata +232 -0
data/README.md
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
CHECKING::YOU::OUT is DistorteD's file-identification library, originally a
|
2
|
+
`ruby-mime-types` wrapper/monkey-patch — now its own full implementation
|
3
|
+
of a `shared-mime-info` parser and in-memory representation.
|
4
|
+
|
5
|
+
Designed with speed and memory-efficiency in mind. See `bin/benchmark` for more.
|
6
|
+
|
7
|
+
Prior Art:
|
8
|
+
https://github.com/mime-types/ruby-mime-types
|
9
|
+
https://github.com/hanklords/shared-mime-info
|
10
|
+
https://github.com/mimemagicrb/mimemagic
|
11
|
+
https://github.com/discourse/mini_mime
|
12
|
+
https://github.com/rails/marcel/
|
13
|
+
https://github.com/blackwinter/ruby-filemagic/
|
14
|
+
https://github.com/glongman/ffiruby-filemagic/
|
data/bin/benchmark
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'benchmark/ips'
|
4
|
+
require 'memory_profiler'
|
5
|
+
|
6
|
+
# Add our development CYO to LOAD_PATH so we can run it without Bundler.
|
7
|
+
$: << File.expand_path('../../lib', __FILE__)
|
8
|
+
|
9
|
+
puts
|
10
|
+
puts "Memory stats for requiring mime/types/columnar"
|
11
|
+
result = MemoryProfiler.report do
|
12
|
+
require 'mime/types/columnar'
|
13
|
+
end
|
14
|
+
|
15
|
+
# Note: Some creative accounting is necessary to accurately count the number of unique `Mime::Type`s
|
16
|
+
# included in this library's data, since it treats aliases as separate types, e.g.:
|
17
|
+
# "doc"=>[#<MIME::Type: application/msword>, #<MIME::Type: application/word>, #<MIME::Type: application/x-msword>, #<MIME::Type: application/x-word>],
|
18
|
+
# "rtf"=>[#<MIME::Type: application/rtf>, #<MIME::Type: application/x-rtf>, #<MIME::Type: text/rtf>, #<MIME::Type: text/x-rtf>],
|
19
|
+
# "csv"=>[#<MIME::Type: text/comma-separated-values>, #<MIME::Type: text/csv>],
|
20
|
+
#
|
21
|
+
# I recognize that there is some overlap between `:preferred_extension`s where the types are *not*
|
22
|
+
# the same thing, but this is the best way I have to get a count anywhere close to correct.
|
23
|
+
# You can see for yourself with `MIME::Types.to_a.group_by(&:preferred_extension)` to double-check me.
|
24
|
+
#
|
25
|
+
# There are also many many `MIME::Type`s with no recorded extname at all, and I am intentionally
|
26
|
+
# not counting them because this library doesn't do content-matching, meaning without that filename
|
27
|
+
# metadata there's no other way we could identify files of that type with this library.
|
28
|
+
# irb> MIME::Types.to_a.group_by(&:preferred_extension)[nil].size => 1350
|
29
|
+
puts "Total known file extensions: #{MIME::Types.to_a.group_by(&:preferred_extension).size}"
|
30
|
+
puts "Total allocated: #{result.total_allocated_memsize} bytes (#{result.total_allocated} objects)"
|
31
|
+
puts "Total retained: #{result.total_retained_memsize} bytes (#{result.total_retained} objects)"
|
32
|
+
|
33
|
+
puts
|
34
|
+
puts "Memory stats for requiring mini_mime"
|
35
|
+
result = MemoryProfiler.report do
|
36
|
+
require 'mini_mime'
|
37
|
+
end
|
38
|
+
|
39
|
+
puts "Total known file extensions: #{File.readlines(MiniMime::Configuration.ext_db_path).size}"
|
40
|
+
puts "Total allocated: #{result.total_allocated_memsize} bytes (#{result.total_allocated} objects)"
|
41
|
+
puts "Total retained: #{result.total_retained_memsize} bytes (#{result.total_retained} objects)"
|
42
|
+
|
43
|
+
puts
|
44
|
+
puts "Memory stats for requiring CHECKING-YOU-OUT"
|
45
|
+
result = MemoryProfiler.report do
|
46
|
+
require 'checking-you-out'
|
47
|
+
end
|
48
|
+
|
49
|
+
puts "Total known file extensions: #{CHECKING::YOU::OUT::instance_variable_get(:@after_forever).size}"
|
50
|
+
puts "Total allocated: #{result.total_allocated_memsize} bytes (#{result.total_allocated} objects)"
|
51
|
+
puts "Total retained: #{result.total_retained_memsize} bytes (#{result.total_retained} objects)"
|
52
|
+
puts
|
53
|
+
|
54
|
+
Benchmark.ips do |bm|
|
55
|
+
bm.report 'cached content_type lookup MiniMime' do
|
56
|
+
MiniMime.lookup_by_filename("a.txt").content_type
|
57
|
+
end
|
58
|
+
|
59
|
+
bm.report 'content_type lookup MIME::Types' do
|
60
|
+
MIME::Types.type_for("a.txt")[0].content_type
|
61
|
+
end
|
62
|
+
|
63
|
+
bm.report 'content_type lookup CHECKING::YOU::OUT' do
|
64
|
+
CHECKING::YOU::OUT::from_postfix("txt")
|
65
|
+
end
|
66
|
+
end
|
data/bin/repl
ADDED
data/bin/test-my-best
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'pathname' unless defined? ::Pathname
|
2
|
+
|
3
|
+
# Silence warning for Ractor use in Ruby 3.0.
|
4
|
+
# See https://ruby-doc.org/core/Warning.html#method-c-5B-5D for more.
|
5
|
+
# TODO: Remove this when Ractors are "stable".
|
6
|
+
Warning[:experimental] = false
|
7
|
+
|
8
|
+
|
9
|
+
class CHECKING; end
|
10
|
+
require_relative 'checking-you-out/inner_spirit' unless defined? ::CHECKING::YOU::IN
|
11
|
+
|
12
|
+
|
13
|
+
# I'm not trying to be an exact clone of `shared-mime-info`, but I think its "Recommended checking order"
|
14
|
+
# is pretty sane: https://specifications.freedesktop.org/shared-mime-info-spec/latest/
|
15
|
+
#
|
16
|
+
# In addition to the above, CYO() supports IETF-style Media Type strings like "application/xhtml+xml"
|
17
|
+
# and supports `stat`-less testing of `.extname`-style Strings.
|
18
|
+
class CHECKING::YOU
|
19
|
+
def self.OUT(unknown_identifier, so_deep: true)
|
20
|
+
case unknown_identifier
|
21
|
+
when ::Pathname
|
22
|
+
TEST_EXTANT_PATHNAME.call(unknown_identifier)
|
23
|
+
when ::String
|
24
|
+
case
|
25
|
+
when unknown_identifier.count(-?/) == 1 then # TODO: Additional String validation here.
|
26
|
+
::CHECKING::YOU::OUT::from_ietf_media_type(unknown_identifier)
|
27
|
+
when unknown_identifier.start_with?(-?.) && unknown_identifier.count(-?.) == 1 then
|
28
|
+
::CHECKING::YOU::OUT::from_pathname(unknown_identifier)
|
29
|
+
else
|
30
|
+
if File::exist?(File::expand_path(unknown_identifier)) and so_deep then
|
31
|
+
TEST_EXTANT_PATHNAME.call(Pathname.new(File::expand_path(unknown_identifier)))
|
32
|
+
else
|
33
|
+
LEGENDARY_HEAVY_GLOW.call(::CHECKING::YOU::OUT::from_glob(unknown_identifier), :weight) || ::CHECKING::YOU::OUT::from_postfix(unknown_identifier)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
when ::CHECKING::YOU::IN
|
37
|
+
unknown_identifier.out
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,253 @@
|
|
1
|
+
|
2
|
+
# https://github.com/jarib/ffi-xattr
|
3
|
+
require 'ffi-xattr'
|
4
|
+
|
5
|
+
|
6
|
+
module CHECKING::YOU::IN::AUSLANDSGESPRÄCH
|
7
|
+
|
8
|
+
# Parse IETF Media-Type String → `::CHECKING::YOU::IN`
|
9
|
+
FROM_IETF_TYPE = proc {
|
10
|
+
# Keep these allocated instead of fragmenting our heap, since this will be called very frequently.
|
11
|
+
scratch = Array.allocate
|
12
|
+
hold = Array.allocate
|
13
|
+
my_base = ::CHECKING::YOU::IN::allocate
|
14
|
+
|
15
|
+
# Clear out the contents of the above temporary vars,
|
16
|
+
# called to ensure we never leak the contents of one parse into another.
|
17
|
+
the_bomb = proc {
|
18
|
+
scratch.clear
|
19
|
+
hold.clear
|
20
|
+
my_base.members.each { |gentleman|
|
21
|
+
my_base[gentleman] = nil
|
22
|
+
}
|
23
|
+
}
|
24
|
+
|
25
|
+
# Take a single codepoint from a reversed-then-NULL-terminated IETF Type String,
|
26
|
+
# e.g. "ttub=traf;lmbe+fnb.ppg3.dnv/noitacilppa#{-?\u{0}}".
|
27
|
+
#
|
28
|
+
#
|
29
|
+
# I switched from `String#each_char` to `String#each_codepoint` to avoid allocating single-character Strings
|
30
|
+
# before they could be deduplicated with `-zig`. The Integer codepoints, on the other hand,
|
31
|
+
# will always be the same `object_id` for the same codepoint:
|
32
|
+
#
|
33
|
+
# rb(main):162:0> -"あああ".each_char { |c| p c.object_id }
|
34
|
+
# 420
|
35
|
+
# 440
|
36
|
+
# 460
|
37
|
+
#
|
38
|
+
# rb(main):163:0> -"あああ".each_codepoint { |c| p c.object_id }
|
39
|
+
# 4709
|
40
|
+
# 4709
|
41
|
+
# 4709
|
42
|
+
#
|
43
|
+
#
|
44
|
+
# Putting the codepoints back together with `Array#pack` is the fastest way I've found,
|
45
|
+
# but it is mildly annoying that it won't intern the packed Strings in the C `pack_pack` code,
|
46
|
+
# which lives here: https://github.com/ruby/ruby/blob/master/pack.c
|
47
|
+
#
|
48
|
+
# This means that interning them here in Ruby-land forces us to eat the spurious allocation
|
49
|
+
# and is generally slower than it needs to be, e.g. `memory_profiler` report without post-allocation interning:
|
50
|
+
#
|
51
|
+
# Retained String Report
|
52
|
+
# -----------------------------------
|
53
|
+
# 1038 "application"
|
54
|
+
# 1037 <internal:pack>:135
|
55
|
+
#
|
56
|
+
# 171 "text"
|
57
|
+
# 170 <internal:pack>:135
|
58
|
+
#
|
59
|
+
# 140 "image"
|
60
|
+
# 139 <internal:pack>:135
|
61
|
+
# […]
|
62
|
+
# -----------------------------------
|
63
|
+
#
|
64
|
+
# …vs `memory_profiler` report *with* post-allocation interning, where the duplicate packed Strings
|
65
|
+
# are now "Allocated" instead of "Retained", i.e. they will be GCed:
|
66
|
+
#
|
67
|
+
# Allocated String Report
|
68
|
+
# -----------------------------------
|
69
|
+
# 2863 ""
|
70
|
+
# 2818 /home/okeeblow/Works/DistorteD/CHECKING-YOU-OUT/lib/checking-you-out/party_starter.rb:90
|
71
|
+
#
|
72
|
+
# 2271 "application"
|
73
|
+
# 2269 <internal:pack>:135
|
74
|
+
# […]
|
75
|
+
# -----------------------------------
|
76
|
+
#
|
77
|
+
# This ends up being a difference of ~2000 Objects for us, comparing the same before/after as above:
|
78
|
+
#
|
79
|
+
# [okeeblow@emi#CHECKING-YOU-OUT] ./bin/are-we-unallocated-yet|grep Total
|
80
|
+
# Total allocated: 18178861 bytes (318640 objects)
|
81
|
+
# Total retained: 1999675 bytes (26815 objects)
|
82
|
+
# [okeeblow@emi#CHECKING-YOU-OUT] ./bin/are-we-unallocated-yet|grep Total
|
83
|
+
# Total allocated: 18231779 bytes (319963 objects)
|
84
|
+
# Total retained: 1926675 bytes (24996 objects)
|
85
|
+
move_zig = proc { |zig|
|
86
|
+
case zig
|
87
|
+
when 0 then # NULL
|
88
|
+
my_base[:phylum] = scratch.reverse!.pack(-'U*').-@
|
89
|
+
when 61 then # =
|
90
|
+
# TODO: Implement Fragment-based Type variations
|
91
|
+
hold.push(*scratch)
|
92
|
+
scratch.clear
|
93
|
+
when 59 then # ;
|
94
|
+
# TODO: Implement Fragment-based Type variations
|
95
|
+
scratch.clear
|
96
|
+
hold.clear
|
97
|
+
when 43 then # +
|
98
|
+
#TODO: Implement tagged parent Types e.g. `+xml`
|
99
|
+
scratch.clear
|
100
|
+
when 47 then # /
|
101
|
+
# When this character is encountered in a reversed Type String, `scratch` will contain the facet
|
102
|
+
# which lets us determine if this Type belongs to a vendor tree, to the e`x`perimental tree, etc.
|
103
|
+
my_base[:kingdom] = case
|
104
|
+
when scratch[-3..] == (-'dnv').codepoints then
|
105
|
+
scratch.pop(3);
|
106
|
+
# https://datatracker.ietf.org/doc/html/rfc6838#section-3.2
|
107
|
+
# We will be in a vendor tree, but let's additionally inspect `hold` to count its facets.
|
108
|
+
# There will be only a single facet for vendor-tree types like `application/vnd.wordperfect`.
|
109
|
+
# There will be multiple facets for vendor-tree types like `application/vnd.tcpdump.pcap`.
|
110
|
+
#
|
111
|
+
# If we have multiple facets, split the (reversed) last facet out and use it as the vendor-tree name,
|
112
|
+
# e.g. for `application/vnd/tcpdump.pcap` we will use `tcpdump` as the tree naame instead of `vnd`,
|
113
|
+
# in fact not even storing the `vnd` at all.
|
114
|
+
#
|
115
|
+
# This increases the likelihood of `hold`'s remainder fitting inside a single RValue,
|
116
|
+
# e.g. for yuge Types like `application/vnd.oasis.opendocument.graphics` we will store `oasis`
|
117
|
+
# and `opendocument.graphics` (fits!) instead of `vnd` and `oasis.opendocument.graphics` (doesn't fit!).
|
118
|
+
#
|
119
|
+
# The dropped `vnd` will be reconstructed by `CYO#to_s` when it detects a non-standard tree name.
|
120
|
+
hold.rindex(46) ? -hold.slice!(hold.rindex(46)..).reverse!.tap(&:pop).pack(-'U*') : -'vnd'
|
121
|
+
when scratch[-3..] == (-'srp').codepoints then
|
122
|
+
# https://datatracker.ietf.org/doc/html/rfc6838#section-3.3
|
123
|
+
# "Media types created experimentally or as part of products that are not distributed commercially".
|
124
|
+
# This is mostly an early-Internet legacy and there are only a few of these in `shared-mime-info`,
|
125
|
+
# e.g. `audio/prs.sid` for the C=64 Sound Interface Device audio format,
|
126
|
+
# but they can still be registered.
|
127
|
+
scratch.pop(3); -'prs'
|
128
|
+
when scratch[-5..] == (-'-sm-x').codepoints then
|
129
|
+
# Microsoft formats like `text/x-ms-regedit`.
|
130
|
+
# I'm treating this separately from the IETF `x-` tree just because there are so many of them,
|
131
|
+
# and it's nice to keep Winders formats logically-grouped.
|
132
|
+
scratch.pop(5); -'x-ms'
|
133
|
+
when scratch[-2..] == (-'-x').codepoints then
|
134
|
+
# Deprecated experimental tree (`x-`): https://datatracker.ietf.org/doc/html/rfc6648
|
135
|
+
# I'm giving this deprecated tree the canonical `x` tree in CYO because it has legacy dating back
|
136
|
+
# to the mid '70s and has many many many more Types than post-2012 `x.` tree,
|
137
|
+
# RE: https://datatracker.ietf.org/doc/html/rfc6648#appendix-A
|
138
|
+
scratch.pop(2); -?x
|
139
|
+
when scratch.one? && scratch.last == 100 then # x
|
140
|
+
# Faceted experimental tree (`x.`): https://datatracker.ietf.org/doc/html/rfc6838#section-3.4
|
141
|
+
# There are only a few of these since "use of both `x-` and `x.` forms is discouraged",
|
142
|
+
# e.g. `model/x.stl-binary`, and there aren't likely to be many more.
|
143
|
+
scratch.pop; -'kayo-dot'
|
144
|
+
else
|
145
|
+
# Otherwise we are in the "standards" tree: https://datatracker.ietf.org/doc/html/rfc6838#section-3.1
|
146
|
+
-'possum'
|
147
|
+
end.-@
|
148
|
+
# Everything remaining in `hold` and `scratch` will comprise the most-specific Type component.
|
149
|
+
hold.push(*scratch)
|
150
|
+
my_base[:genus] = hold.reverse!.pack(-'U*').-@
|
151
|
+
scratch.clear
|
152
|
+
hold.clear
|
153
|
+
when 46 then # .
|
154
|
+
hold << 46 unless hold.empty?
|
155
|
+
hold.push(*scratch)
|
156
|
+
scratch.clear
|
157
|
+
else
|
158
|
+
scratch << zig
|
159
|
+
end
|
160
|
+
}
|
161
|
+
|
162
|
+
# 𝘐𝘛'𝘚 𝘠𝘖𝘜 !!
|
163
|
+
cats = ->(gentlemen) {
|
164
|
+
gentlemen.each_codepoint.reverse_each(&move_zig)
|
165
|
+
move_zig.call(0)
|
166
|
+
return my_base.dup.freeze.tap(&the_bomb)
|
167
|
+
}
|
168
|
+
-> (gentlemen) {
|
169
|
+
return cats.call((gentlemen.encoding == Encoding::UTF_8) ? gentlemen : gentlemen.encode(Encoding::UTF_8))
|
170
|
+
}
|
171
|
+
}.call
|
172
|
+
|
173
|
+
# Call the above singleton Proc to do the thing.
|
174
|
+
def from_ietf_media_type(ietf_string)
|
175
|
+
return if ietf_string.nil?
|
176
|
+
FROM_IETF_TYPE.call(ietf_string)
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
module CHECKING::YOU::IN::INLANDGESPRÄCH
|
181
|
+
# Non-IETF-tree as a CY(I|O)'s `kingdom` signifies the need for a leading `vnd.` facet
|
182
|
+
# when reconstructing the Media-Type String.
|
183
|
+
IETF_TREES = [
|
184
|
+
# Current top-level registries are shown here: https://www.iana.org/assignments/media-types/media-types.xhtml
|
185
|
+
# The latest addition reflected here is `font` from RFC 8081: https://datatracker.ietf.org/doc/html/rfc8081
|
186
|
+
-'application',
|
187
|
+
-'audio',
|
188
|
+
-'example',
|
189
|
+
-'font',
|
190
|
+
-'image',
|
191
|
+
-'message',
|
192
|
+
-'model',
|
193
|
+
-'multipart',
|
194
|
+
-'text',
|
195
|
+
-'video',
|
196
|
+
]
|
197
|
+
|
198
|
+
# Reconstruct an IETF Media-Type String from a loaded CYI/CYO's `#members`
|
199
|
+
def to_s
|
200
|
+
# TODO: Fragments (e.g. `;what=ever`), and syntax identifiers (e.g. `+xml`)
|
201
|
+
-(String.new(encoding: Encoding::UTF_8, capacity: 128) << self.phylum.to_s << -'/' << case
|
202
|
+
when self.kingdom == -'kayo-dot' then -'x.'
|
203
|
+
when self.kingdom == -?x then -'x-'
|
204
|
+
when self.kingdom == -'x-ms' then -'x-ms-'
|
205
|
+
when self.kingdom == -'prs' then -'prs.'
|
206
|
+
when self.kingdom == -'vnd' then -'vnd.'
|
207
|
+
when self.kingdom == -'possum' then nil.to_s
|
208
|
+
when !IETF_TREES.include?(self.kingdom.to_s) then 'vnd.' << self.kingdom.to_s << -'.'
|
209
|
+
else self.kingdom.to_s << -'.'
|
210
|
+
end << self.genus.to_s)
|
211
|
+
end
|
212
|
+
|
213
|
+
# Pretty-print objects using our custom `#:to_s`
|
214
|
+
def inspect
|
215
|
+
"#<#{self.class.to_s} #{self.to_s}>"
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
module CHECKING::YOU::OUT::AUSLANDSGESPRÄCH
|
220
|
+
|
221
|
+
def from_ietf_media_type(ietf_string)
|
222
|
+
return if ietf_string.nil?
|
223
|
+
self.new(super)
|
224
|
+
end
|
225
|
+
|
226
|
+
# CHECK OUT a filesystem path.
|
227
|
+
# This might be a String, or might be an instance of the actual stdlib class `Pathname`:
|
228
|
+
# https://ruby-doc.org/stdlib/libdoc/pathname/rdoc/Pathname.html
|
229
|
+
def from_xattr(pathname)
|
230
|
+
# T0DO: Handle relative paths and all the other corner cases that could be here when given String.
|
231
|
+
|
232
|
+
# Check the filesystem extended attributes for manually-defined types.
|
233
|
+
#
|
234
|
+
# The freedesktop-dot-org specification is `user.mime_type`:
|
235
|
+
# https://www.freedesktop.org/wiki/CommonExtendedAttributes/
|
236
|
+
#
|
237
|
+
# At least one other application I can find (lighttpd a.k.a. "lighty")
|
238
|
+
# will use `Content-Type` just like would be found in an HTTP header:
|
239
|
+
# https://redmine.lighttpd.net/projects/1/wiki/Mimetype_use-xattrDetails
|
240
|
+
#
|
241
|
+
# Both of these should contain IETF-style `media/sub`-type Strings,
|
242
|
+
# but they are technically freeform and must be assumed to contain anything.
|
243
|
+
# It's very very unlikely that anybody will ever use one of these at all,
|
244
|
+
# but hey how cool is it that we will support it if they do? :)
|
245
|
+
#
|
246
|
+
# T0DO: Figure out if NTFS has anything to offer us since `ffi-xattr` does support Winders.
|
247
|
+
# https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-fscc/a82e9105-2405-4e37-b2c3-28c773902d85
|
248
|
+
from_ietf_media_type(
|
249
|
+
Xattr.new(pathname).to_h.slice('user.mime_type', 'Content-Type').values.first
|
250
|
+
)
|
251
|
+
end
|
252
|
+
|
253
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
|
2
|
+
require_relative 'ghost_revival/mr_mime' unless defined? ::CHECKING::YOU::MrMIME
|
3
|
+
require_relative 'ghost_revival/xross_infection' unless defined? ::CHECKING::YOU::OUT::XROSS_INFECTION
|
4
|
+
|
5
|
+
module CHECKING::YOU::OUT::GHOST_REVIVAL
|
6
|
+
|
7
|
+
# Filename for the main fdo `shared-mime-info` source XML.
|
8
|
+
# We will look for this file in system `XDG_DATA_DIRS` and use our own bundled copy
|
9
|
+
# if the system version is missing or outdated.
|
10
|
+
FDO_MIMETYPES_FILENAME = -'freedesktop.org.xml'
|
11
|
+
|
12
|
+
# For now, unconditionally load all available files at startup.
|
13
|
+
# TODO: Support partial on-the-fly loading à la `mini_mime`.
|
14
|
+
def self.extended(otra)
|
15
|
+
# Init a handler that will be passed to multiple instances of Ox::sax_parse()
|
16
|
+
# for our multiple data files. I will just read them sequentially for now.
|
17
|
+
handler = ::CHECKING::YOU::MrMIME.new
|
18
|
+
|
19
|
+
# CYO bundles a copy of `freedesktop.org.xml` from `shared-mime-info` but will prefer a system-level copy
|
20
|
+
# if one is available and not out of date. This flag will be disabled if we find a suitable copy,
|
21
|
+
# otherwise our bundled copy will be loaded after we finish scanning the PATHs givin in our environment.
|
22
|
+
load_bundled_fdo_xml = true
|
23
|
+
|
24
|
+
# Search `XDG_DATA_DIRS` for any additional `shared-mime-info`-format data files we can load,
|
25
|
+
# hopefully including the all-important `freedesktop.org.xml`.
|
26
|
+
::CHECKING::YOU::OUT::XROSS_INFECTION::XDG.DATA.push(
|
27
|
+
# Append out Gem-local path to the very end (lowest priority)
|
28
|
+
::CHECKING::YOU::OUT::GEM_ROOT.call
|
29
|
+
).map {
|
30
|
+
# Add path fragments for finding `shared-mime-info` package files.
|
31
|
+
# This same subdir path applies when searching *any* `PATH` for `shared-mime-info` XML,
|
32
|
+
# e.g. '/usr/share' + 'mime/packages' <-- this part
|
33
|
+
# For consistency the same path is used for our local data under the Gem root.
|
34
|
+
_1.join(-'mime', -'packages')
|
35
|
+
}.flat_map {
|
36
|
+
# Find all XML files under all subdirectories of all given `Pathname`s.
|
37
|
+
#
|
38
|
+
# `#glob` follows the same conventions as `File::fnmatch?`:
|
39
|
+
# https://ruby-doc.org/core-3.0.2/File.html#method-c-fnmatch
|
40
|
+
#
|
41
|
+
# `EXTGLOB` enables the brace-delimited glob syntax, used here to allow an optional `'.in'` extname
|
42
|
+
# as found on the `'freedesktop.org.xml.in'` bundled with our Gem since I don't want to rename
|
43
|
+
# the file from the XDG repo even though that extname means they don't want us to use that file directly.
|
44
|
+
_1.glob(File.join(-'**', -'*.xml{.in,}'), File::FNM_EXTGLOB)
|
45
|
+
}.each_with_object(::CHECKING::YOU::OUT::GEM_ROOT.call) { |xml_path, gem_root|
|
46
|
+
|
47
|
+
# Load the bundled `shared-mime-info` database if the system-level one exists but is out of date
|
48
|
+
# compared to our Gem. Using `String#include?` here since the system-level file will be
|
49
|
+
# `'freedesktop.org.xml'` but the bundled copy will be `'freedesktop.org.xml.in'`.
|
50
|
+
if xml_path.basename.to_s.include?(FDO_MIMETYPES_FILENAME)
|
51
|
+
# `Pathname#ascend` returns an `Enumerator` of `Pathname`s up one level at a time until reaching fs root.
|
52
|
+
# If *any* of these are equal to `GEM_ROOT` then we have found the bundled copy, otherwise system copy.
|
53
|
+
if (xml_path.ascend { break true if _1 == gem_root} || false) then
|
54
|
+
# Found bundled copy.
|
55
|
+
# A new-enough system-level copy will disable this flag to prevent loading outdated bundled data.
|
56
|
+
next unless load_bundled_fdo_xml
|
57
|
+
else
|
58
|
+
# Found system-level copy.
|
59
|
+
# Use this if it's newer than our Gem, and set a flag to prevent loading the bundled copy if so.
|
60
|
+
next if ::CHECKING::YOU::OUT::GEM_PACKAGE_TIME.call > xml_path.mtime
|
61
|
+
load_bundled_fdo_xml = false
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# If we made it here (no `next` above) then we should load the file.
|
66
|
+
handler.open(xml_path)
|
67
|
+
|
68
|
+
} # each_with_object
|
69
|
+
|
70
|
+
end # def self.extended
|
71
|
+
end # module CHECKING::YOU::OUT::GHOST_REVIVAL
|