checking-you-out 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +661 -0
- data/README.md +14 -0
- data/bin/are-we-unallocated-yet +9 -0
- data/bin/benchmark +66 -0
- data/bin/checking-you-out +8 -0
- data/bin/repl +7 -0
- data/bin/test-my-best +4 -0
- data/lib/checking-you-out.rb +40 -0
- data/lib/checking-you-out/auslandsgesprach.rb +253 -0
- data/lib/checking-you-out/ghost_revival.rb +71 -0
- data/lib/checking-you-out/ghost_revival/mr_mime.rb +390 -0
- data/lib/checking-you-out/ghost_revival/xross_infection.rb +146 -0
- data/lib/checking-you-out/inner_spirit.rb +215 -0
- data/lib/checking-you-out/party_starter.rb +202 -0
- data/lib/checking-you-out/party_starter/stick_around.rb +260 -0
- data/lib/checking-you-out/party_starter/weighted_action.rb +41 -0
- data/lib/checking-you-out/sweet_sweet_love_magic.rb +226 -0
- data/mime/packages/distorted-types.xml +68 -0
- data/mime/packages/third-party/shared-mime-info/freedesktop.org.xml.in +7672 -0
- data/mime/packages/third-party/tika-mimetypes/tika-mimetypes.xml +2762 -0
- metadata +232 -0
@@ -0,0 +1,390 @@
|
|
1
|
+
|
2
|
+
require 'ox'
|
3
|
+
|
4
|
+
|
5
|
+
# Push-event-based parser for freedesktop-dot-org `shared-mime-info`-format XML package files,
|
6
|
+
# including the main `shared-mime-info` database itself (GPLv2+), Apache Tika (MIT), and our own (AGPLv3).
|
7
|
+
# https://specifications.freedesktop.org/shared-mime-info-spec/shared-mime-info-spec-latest.html
|
8
|
+
# https://gitlab.freedesktop.org/xdg/shared-mime-info/-/blob/master/src/update-mime-database.c
|
9
|
+
#
|
10
|
+
#
|
11
|
+
# Example pulled from `freedesktop.org.xml.in`:
|
12
|
+
#
|
13
|
+
# <mime-type type="application/vnd.oasis.opendocument.text">
|
14
|
+
# <comment>ODT document</comment>
|
15
|
+
# <acronym>ODT</acronym>
|
16
|
+
# <expanded-acronym>OpenDocument Text</expanded-acronym>
|
17
|
+
# <sub-class-of type="application/zip"/>
|
18
|
+
# <generic-icon name="x-office-document"/>
|
19
|
+
# <magic priority="70">
|
20
|
+
# <match type="string" value="PK\003\004" offset="0">
|
21
|
+
# <match type="string" value="mimetype" offset="30">
|
22
|
+
# <match type="string" value="application/vnd.oasis.opendocument.text" offset="38"/>
|
23
|
+
# </match>
|
24
|
+
# </match>
|
25
|
+
# </magic>
|
26
|
+
# <glob pattern="*.odt"/>
|
27
|
+
# </mime-type>
|
28
|
+
class CHECKING::YOU::MrMIME < ::Ox::Sax
|
29
|
+
|
30
|
+
|
31
|
+
ORIGIN_OF_SYMMETRY = proc {
|
32
|
+
# Little-endian systems:
|
33
|
+
# - VAX
|
34
|
+
# - x86 / AMD64
|
35
|
+
# Big-endian systems:
|
36
|
+
# - Motorola 68k
|
37
|
+
# - Internet https://en.wikipedia.org/wiki/Endianness#Networking
|
38
|
+
# - IBM mainframes
|
39
|
+
# Bi-endian systems:
|
40
|
+
# - AArch64
|
41
|
+
# - PowerPC / POWER
|
42
|
+
# - MIPS
|
43
|
+
# - Alpha
|
44
|
+
# - PA-RISC
|
45
|
+
# - SuperH
|
46
|
+
# - Itanium
|
47
|
+
# - RISC-V
|
48
|
+
@host_endianness ||= [1].yield_self { |bliss|
|
49
|
+
# Pack the test Integer as a native-endianness 'I'nt and a 'N'etwork-endianess (BE) Int and compare.
|
50
|
+
bliss.pack(-?I) == bliss.pack(-?N) ? :BE : :LE
|
51
|
+
}
|
52
|
+
}
|
53
|
+
|
54
|
+
# Turn an arbitrary String into the correctly-based Integer it represents.
|
55
|
+
# It would be nice if I could do this directly in `Ox::Sax::Value`.
|
56
|
+
# Base-16 Ints can be written as literals in Ruby, e.g.
|
57
|
+
# irb> 0xFF
|
58
|
+
# => 255
|
59
|
+
BASED_STRING = proc {
|
60
|
+
# Operate on codepoints to avoid `String` allocation from slicing, e.g. `_1[...1]`
|
61
|
+
# would allocate a new two-character `String` before we have chance to dedupe it.
|
62
|
+
# The `shared-mime-info` XML is explicitly only in UTF-8, so this is safe.
|
63
|
+
#
|
64
|
+
# The below is equivalent to:
|
65
|
+
# ```case
|
66
|
+
# when -s[0..1].downcase == -'0x' then s.to_i(16)
|
67
|
+
# when s.chr == -?0 then s.to_i(8)
|
68
|
+
# else s.to_i(10)
|
69
|
+
# end```
|
70
|
+
#
|
71
|
+
# …but rewritten to check for first-codepoint `'0'`, then second-codepoint `'x'/'X'`:
|
72
|
+
# irb> ?0.ord => 48
|
73
|
+
# irb> [?X.ord, ?x.ord] => [88, 120]
|
74
|
+
#
|
75
|
+
# Relies on the fact that `#ord` of a long `String` is the same as `#ord` of its first character:
|
76
|
+
# irb> 'l'.ord => 108
|
77
|
+
# irb> 'lmfao'.ord => 108
|
78
|
+
(_1.ord == 48) ?
|
79
|
+
([88, 120].include?(_1.codepoints[1]) ? _1.to_i(16) : _1.to_i(8)) :
|
80
|
+
_1.to_i(10)
|
81
|
+
}
|
82
|
+
FDO_MAGIC_FORMATS = {
|
83
|
+
# "The string type supports the C character escapes (\0, \t, \n, \r, \xAB for hex, \777 for octal)."
|
84
|
+
-'string' => proc { |s| s },
|
85
|
+
-'byte' => proc { |s| BASED_STRING.call(s).chr },
|
86
|
+
-'little32' => proc { |s| BASED_STRING.call(s).yield_self { |value|
|
87
|
+
((value & 0xFF).chr + ((value >> 8) & 0xFF).chr + ((value >> 16) & 0xFF).chr + ((value >> 24) & 0xFF).chr)
|
88
|
+
}},
|
89
|
+
-'big32' => proc { |s| BASED_STRING.call(s).yield_self { |value|
|
90
|
+
(((value >> 24) & 0xFF).chr + ((value >> 16) & 0xFF).chr + ((value >> 8) & 0xFF).chr + (value & 0xFF).chr)
|
91
|
+
}},
|
92
|
+
-'little16' => proc { |s| BASED_STRING.call(s).yield_self { |value|
|
93
|
+
((value & 0xFF).chr + (value >> 8).chr)
|
94
|
+
}},
|
95
|
+
-'big16' => proc { |s| BASED_STRING.call(s).yield_self { |value|
|
96
|
+
((value >> 8).chr + (value & 0xFF).chr)
|
97
|
+
}},
|
98
|
+
}.tap { |f|
|
99
|
+
|
100
|
+
# TODO: Actually implement `stringignorecase`. This is a Tika thing not found in the fd.o XML.
|
101
|
+
f[-'stringignorecase'] = f[-'string']
|
102
|
+
|
103
|
+
# Returning `string` as default will probably not result in a successful match
|
104
|
+
# but will avoid blowing up our entire program if we encounter an unhandled format.
|
105
|
+
f.default = f[-'string']
|
106
|
+
|
107
|
+
# Set `host` formats according to system endianness.
|
108
|
+
if ORIGIN_OF_SYMMETRY.call == :BE then
|
109
|
+
f[-'host16'] = f[-'big16']
|
110
|
+
f[-'host32'] = f[-'big32']
|
111
|
+
else
|
112
|
+
f[-'host16'] = f[-'little16']
|
113
|
+
f[-'host32'] = f[-'little32']
|
114
|
+
end
|
115
|
+
|
116
|
+
}
|
117
|
+
|
118
|
+
# Map of `shared-mime-info` XML Element names to our generic category names.
|
119
|
+
FDO_ELEMENT_CATEGORY = {
|
120
|
+
:magic => :content_match,
|
121
|
+
:match => :content_match,
|
122
|
+
:alias => :family_tree,
|
123
|
+
:comment => :textual_metadata,
|
124
|
+
:"sub-child-of" => :family_tree,
|
125
|
+
:"generic-icon" => :host_metadata,
|
126
|
+
:glob => :pathname_match,
|
127
|
+
}
|
128
|
+
|
129
|
+
# `MrMIME::new` will take any of these keys as keyword arguments
|
130
|
+
# whose value will override the default defined in this Hash.
|
131
|
+
DEFAULT_LOADS = {
|
132
|
+
:textual_metadata => false,
|
133
|
+
:host_metadata => false,
|
134
|
+
:pathname_match => true,
|
135
|
+
:content_match => true,
|
136
|
+
:family_tree => true,
|
137
|
+
}
|
138
|
+
|
139
|
+
# You shouldn't abuse the power of the Solid.
|
140
|
+
def skips
|
141
|
+
@skips ||= self.class::DEFAULT_LOADS.keep_if { |k,v| v == false }.keys.to_set
|
142
|
+
end
|
143
|
+
|
144
|
+
def initialize(**kwargs)
|
145
|
+
# Per the `Ox::Sax` dox:
|
146
|
+
# "Initializing `line` attribute in the initializer will cause that variable to
|
147
|
+
# be updated before each callback with the XML line number.
|
148
|
+
# The same is true for the `column` attribute, but it will be updated with
|
149
|
+
# the column in the XML file that is the start of the element or node just read.
|
150
|
+
# `@pos`, if defined, will hold the number of bytes from the start of the document."
|
151
|
+
#@pos = nil
|
152
|
+
#@line = nil
|
153
|
+
#@column = nil
|
154
|
+
|
155
|
+
# We receive separate events for Elements and Attributes, so we need to keep track of
|
156
|
+
# the current Element to know what to do with Attributes since we can't rely on Attribute
|
157
|
+
# names to be unique. For example, `shared-mime-info` has an attribute `type` on
|
158
|
+
# `<mime-type>`, `<alias>`, `<match>`, and `<sub-class-of>`.
|
159
|
+
@parse_stack = Array.new
|
160
|
+
|
161
|
+
# We need a separate stack and a flag boolean for building content-match structures since the source XML
|
162
|
+
# represents OR and AND byte-sequence relationships as sibling and child Elements respectively, e.g.
|
163
|
+
# <magic><match1><match2/><match3/></match1><match4/></magic> => [m1 AND m2] OR [m1 AND m3], OR [m4].
|
164
|
+
@i_can_haz_magic = true
|
165
|
+
|
166
|
+
# Allow the user to control the conditions on which we ignore data from the source XML.
|
167
|
+
@skips = self.class::DEFAULT_LOADS.merge(kwargs.slice(*self.class::DEFAULT_LOADS.keys)).keep_if { |k,v|
|
168
|
+
v == false
|
169
|
+
}.keys.to_set
|
170
|
+
|
171
|
+
# Here's where I would put a call to `super()` a.k.a `Ox::Sax#initialize` — IF IT HAD ONE
|
172
|
+
end
|
173
|
+
|
174
|
+
def cyo
|
175
|
+
@cyo ||= ::CHECKING::YOU::OUT::from_ietf_media_type(@media_type)
|
176
|
+
end
|
177
|
+
|
178
|
+
|
179
|
+
# Callback methods we can implement in this Handler per http://www.ohler.com/ox/Ox/Sax.html
|
180
|
+
#
|
181
|
+
# def instruct(target); end
|
182
|
+
# def end_instruct(target); end
|
183
|
+
# def attr(name, str); end
|
184
|
+
# def attr_value(name, value); end
|
185
|
+
# def attrs_done(); end
|
186
|
+
# def doctype(str); end
|
187
|
+
# def comment(str); end
|
188
|
+
# def cdata(str); end
|
189
|
+
# def text(str); end
|
190
|
+
# def value(value); end
|
191
|
+
# def start_element(name); end
|
192
|
+
# def end_element(name); end
|
193
|
+
# def error(message, line, column); end
|
194
|
+
# def abort(name); end
|
195
|
+
#
|
196
|
+
# PROTIPs:
|
197
|
+
# - Our callback methods must be public or they won't be called!
|
198
|
+
# - Some argument names in the documented method definitions describe the type (in the Ruby sense)
|
199
|
+
# of the argument, and some argument names describe their semantic meaning w/r/t the XML document!
|
200
|
+
# - Arguments called `name` will be Symbols by default unless `Ox::parse_sax` was given
|
201
|
+
# an options hash where `:symbolize` => `false` in which case `name` arguments will contain Strings.
|
202
|
+
# - Arguments called `str` will be Strings always.
|
203
|
+
# - Arguments called `value` will be `Ox::Sax::Value` objects that can be further differentiated in several ways:
|
204
|
+
# http://www.ohler.com/ox/Ox/Sax/Value.html
|
205
|
+
# - For example, an invocation of `attr(name, str)` emitted while parsing a fd.o `<magic>` Element might have
|
206
|
+
# a `name` argument containing the Symbol `:priority` and a `str` argument containing its String value e.g. `"50"`.
|
207
|
+
# - The `value` versions of these callback methods have priority over their `str` equivalents
|
208
|
+
# if both are defined, and only one of them will ever be called,
|
209
|
+
# e.g. `attr_value()` > `attr()` iff `defined? attr_value()`.
|
210
|
+
|
211
|
+
def start_element(name)
|
212
|
+
@parse_stack.push(name)
|
213
|
+
return if self.skips.include?(name)
|
214
|
+
case name
|
215
|
+
when :"mime-type" then
|
216
|
+
@media_type = String.new if @media_type.nil?
|
217
|
+
@cyo = nil
|
218
|
+
when :match then
|
219
|
+
# Mark any newly-added Sequence as eligible for a full match candidate.
|
220
|
+
@i_can_haz_magic = true
|
221
|
+
@speedy_cat.append(::CHECKING::YOU::OUT::SequenceCat.new)
|
222
|
+
when :magic then
|
223
|
+
@speedy_cat = ::CHECKING::YOU::OUT::SpeedyCat.new if @speedy_cat.nil?
|
224
|
+
when :"magic-deleteall" then
|
225
|
+
# TODO
|
226
|
+
when :glob then
|
227
|
+
@stick_around = ::CHECKING::YOU::StickAround.new
|
228
|
+
when :"glob-deleteall" then
|
229
|
+
# TODO
|
230
|
+
when :treemagic then
|
231
|
+
# TODO
|
232
|
+
when :acronym then
|
233
|
+
# TODO
|
234
|
+
when :"expanded-acronym" then
|
235
|
+
# TODO
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
def attr_value(attr_name, value)
|
240
|
+
return if self.skips.include?(@parse_stack.last)
|
241
|
+
# `parse_stack` can be empty here in which case its `#last` will be `nil`.
|
242
|
+
# This happens e.g. for the two attributes of the XML declaration '<?xml version="1.0" encoding="UTF-8"?>'.
|
243
|
+
#
|
244
|
+
# Avoid the `Array` allocation necessary when using pattern matching `case` syntax.
|
245
|
+
# I'd still like to refactor this to avoid the redundant `attr_name` `case`s.
|
246
|
+
# Maybe a `Hash` of `proc`s?
|
247
|
+
case @parse_stack.last
|
248
|
+
when :"mime-type" then
|
249
|
+
case attr_name
|
250
|
+
when :type then @media_type.replace(value.as_s)
|
251
|
+
end
|
252
|
+
when :match then
|
253
|
+
case attr_name
|
254
|
+
when :type then @speedy_cat.last.format = FDO_MAGIC_FORMATS[value.as_s]
|
255
|
+
when :value then @speedy_cat.last.cat = value.as_s
|
256
|
+
when :offset then @speedy_cat.last.boundary = value.as_s
|
257
|
+
when :mask then @speedy_cat.last.mask = BASED_STRING.call(value.as_s)
|
258
|
+
end
|
259
|
+
when :magic then
|
260
|
+
case attr_name
|
261
|
+
when :priority then @speedy_cat&.weight = value.as_i
|
262
|
+
end
|
263
|
+
when :alias then
|
264
|
+
case attr_name
|
265
|
+
when :type then self.cyo.add_aka(::CHECKING::YOU::IN::from_ietf_media_type(value.as_s))
|
266
|
+
end
|
267
|
+
when :"sub-class-of" then
|
268
|
+
case attr_name
|
269
|
+
when :type then self.cyo.add_parent(::CHECKING::YOU::OUT::from_ietf_media_type(value.as_s))
|
270
|
+
end
|
271
|
+
when :glob then
|
272
|
+
case attr_name
|
273
|
+
when :weight then @stick_around.weight = value.as_i
|
274
|
+
when :pattern then @stick_around.replace(value.as_s)
|
275
|
+
when :"case-sensitive" then @stick_around.case_sensitive = value.as_bool
|
276
|
+
end
|
277
|
+
when :"root-XML" then
|
278
|
+
#case attr_name
|
279
|
+
#when :namespaceURI then # TODO
|
280
|
+
#when :localName then # TODO
|
281
|
+
#end
|
282
|
+
end
|
283
|
+
end
|
284
|
+
|
285
|
+
def text(element_text)
|
286
|
+
return if self.skips.include?(@parse_stack.last)
|
287
|
+
case @parse_stack.last
|
288
|
+
when :comment then
|
289
|
+
self.cyo.description = element_text
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
def end_element(name)
|
294
|
+
raise Exception.new('Parse stack element mismatch') unless @parse_stack.pop == name
|
295
|
+
return if self.skips.include?(@parse_stack.last)
|
296
|
+
case name
|
297
|
+
when :"mime-type" then
|
298
|
+
@media_type.clear
|
299
|
+
@cyo = nil
|
300
|
+
when :match then
|
301
|
+
# The Sequence stack represents a complete match once we start popping Sequences from it,
|
302
|
+
# which we can know because every `<match>` stack push sets `@i_can_haz_magic = true`.
|
303
|
+
# If there is only a single sub-sequence we can just add that instead of the container.
|
304
|
+
self.cyo.add_content_match(
|
305
|
+
# Add single-sequences directly instead of adding their container.
|
306
|
+
@speedy_cat.one? ?
|
307
|
+
# Transfer any non-default `weight` from the container to that single-sequence.
|
308
|
+
@speedy_cat.pop.tap { _1.weight = @speedy_cat.weight } :
|
309
|
+
# Otherwise go ahead and add a copy of the container while also preparing the
|
310
|
+
# local container for a possible next-branch to the `<magic>` tree.
|
311
|
+
@speedy_cat.dup.tap { @speedy_cat.pop }
|
312
|
+
) if @i_can_haz_magic
|
313
|
+
# Mark any remaining partial Sequences as ineligible to be a full match candidate,
|
314
|
+
# e.g. if we had a stack of [<match1/><match2/><match3/>] we would want to add a
|
315
|
+
# candidate [m1, m2, m3] but not the partials [m1, m2] or [m1] as we clear out the stack.
|
316
|
+
@i_can_haz_magic = false
|
317
|
+
when :magic then
|
318
|
+
# `SpeedyCat#clear` will unset any non-default `weight` so we can re-use it cleanly.
|
319
|
+
@speedy_cat.clear
|
320
|
+
when :glob then
|
321
|
+
self.cyo.add_pathname_fragment(@stick_around) unless @stick_around.nil?
|
322
|
+
end
|
323
|
+
end
|
324
|
+
|
325
|
+
def open(path, **kwargs)
|
326
|
+
# Use the block form of `IO::open` so the file handle is implicitly closed after we leave this scope.
|
327
|
+
# Per Ruby's `IO` module docs:
|
328
|
+
# "With no associated block, `::open` is a synonym for `::new`. If the optional code block is given,
|
329
|
+
# it will be passed the opened file as an argument and the File object will automatically be closed
|
330
|
+
# when the block terminates. The value of the block will be returned from `::open`."
|
331
|
+
File.open(path, File::Constants::RDONLY) { |mime_xml|
|
332
|
+
|
333
|
+
# "Announce an intention to access data from the current file in a specific pattern.
|
334
|
+
# On platforms that do not support the posix_fadvise(2) system call, this method is a no-op."
|
335
|
+
#
|
336
|
+
# This was probably a bigger deal when we all stored our files on spinning rust, but it shouldn't hurt :)
|
337
|
+
#
|
338
|
+
# I'm using `:sequential` because I am doing event-based XML parsing, and I'm avoiding `:noreuse`
|
339
|
+
# because back-to-back invocations of DistorteD will benefit from the OS caching the data files.
|
340
|
+
#
|
341
|
+
# N0TE: `:noreuse` is a no-op on Lunix anyway, at least as of ver 5.12 as I write this in 2021:
|
342
|
+
# https://linux.die.net/man/2/posix_fadvise
|
343
|
+
# https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/mm/fadvise.c
|
344
|
+
# https://web.archive.org/web/20130513093816/http://kerneltrap.org/node/7563
|
345
|
+
#
|
346
|
+
# But it works on FreeBSD, amusingly even when using Linux ABI compat:
|
347
|
+
# https://www.freebsd.org/cgi/man.cgi?query=posix_fadvise&sektion=2
|
348
|
+
# https://cgit.freebsd.org/src/tree/sys/kern/vfs_syscalls.c
|
349
|
+
# https://cgit.freebsd.org/src/tree/sys/compat/linux/linux_file.c
|
350
|
+
mime_xml.advise(:sequential)
|
351
|
+
|
352
|
+
# Docs: http://www.ohler.com/ox/Ox.html#method-c-sax_parse
|
353
|
+
#
|
354
|
+
# Code for this method is defined in Ox's C extension, not in Ox's Ruby lib:
|
355
|
+
# https://github.com/ohler55/ox/blob/master/ext/ox/ox.c CTRL+F "call-seq: sax_parse".
|
356
|
+
# (Not linking a particular line number since that would require linking a particular revision too)
|
357
|
+
#
|
358
|
+
# Here is an actual example `<magic><match/>` element pulled from `freedesktop.org.xml`
|
359
|
+
# showing why I am using Ox's `:convert_special` here:
|
360
|
+
# `<match type="string" value="<<< QEMU VM Virtual Disk Image >>>\n" offset="0"/>`
|
361
|
+
#
|
362
|
+
# I can't figure out how exactly `:skip_none` and `:skip_off` differ here.
|
363
|
+
# They appear a few times as equal/fall-through `case`s in some C extension `switch` statements:
|
364
|
+
# https://github.com/ohler55/ox/search?q=OffSkip
|
365
|
+
# https://github.com/ohler55/ox/search?q=NoSkip
|
366
|
+
# In `sax.c`'s `read_text` function they are used slightly differently in the following conditional
|
367
|
+
# where `:skip_none` checks if the end of the Element has been reached but `:skip_off` doesn't.
|
368
|
+
# https://github.com/ohler55/ox/blob/master/ext/ox/sax.c CTRL+F "read_text"
|
369
|
+
# `((NoSkip == dr->options.skip && !isEnd) || (OffSkip == dr->options.skip)))`
|
370
|
+
#
|
371
|
+
# TOD0: Probably String allocation gainz to be had inside Ox's C extension once the API is available:
|
372
|
+
# https://bugs.ruby-lang.org/issues/13381
|
373
|
+
# https://bugs.ruby-lang.org/issues/16029
|
374
|
+
# e.g. https://github.com/msgpack/msgpack-ruby/pull/196
|
375
|
+
Ox.sax_parse(
|
376
|
+
self, # Instance of a class that responds to `Ox::Sax`'s callback messages.
|
377
|
+
mime_xml, # IO stream or String of XML to parse. Won't close File handles automatically.
|
378
|
+
**{
|
379
|
+
convert_special: true, # [boolean] Convert encoded entities back to their unencoded form, e.g. `"<"` to `"<"`.
|
380
|
+
skip: :skip_off, # [:skip_none|:skip_return|:skip_white|:skip_off] (from Element text/value) Strip CRs, whitespace, or nothing.
|
381
|
+
smart: false, # [boolean] Toggle Ox's built-in hints for HTML parsing: https://github.com/ohler55/ox/blob/master/ext/ox/sax_hint.c
|
382
|
+
strip_namespace: true, # [nil|String|true|false] (from Element names) Strip no namespaces, all namespaces, or a specific namespace.
|
383
|
+
symbolize: true, # [boolean] Fill callback method `name` arguments with Symbols instead of with Strings.
|
384
|
+
intern_strings: true, # [boolean] Intern (freeze and deduplicate) String return values.
|
385
|
+
}.update(kwargs),
|
386
|
+
)
|
387
|
+
}
|
388
|
+
end
|
389
|
+
end
|
390
|
+
|
@@ -0,0 +1,146 @@
|
|
1
|
+
require 'pathname' unless defined?(::Pathname)
|
2
|
+
|
3
|
+
|
4
|
+
# Cross-OS / Cross-Desktop / Cross-Ruby t00lz.
|
5
|
+
class ::CHECKING::YOU::OUT::XROSS_INFECTION
|
6
|
+
|
7
|
+
# Host operating system detection.
|
8
|
+
class SYSTEM
|
9
|
+
require 'rbconfig'
|
10
|
+
|
11
|
+
# I used to check `RUBY_PLATFORM` alone until I learned about `RbConfig`:
|
12
|
+
# https://idiosyncratic-ruby.com/42-ruby-config.html
|
13
|
+
CHAIN = -case
|
14
|
+
when defined?(::RbConfig::CONFIG) then
|
15
|
+
# Created by `mkconfig.rb` when Ruby is built.
|
16
|
+
::RbConfig::CONFIG[-'host_os']
|
17
|
+
when defined?(RUBY_PLATFORM) then
|
18
|
+
# This is misleading because it will be e.g. `'Java'` for JRuby,
|
19
|
+
# and the paths we care about are more OS-dependent than Ruby-dependent.
|
20
|
+
RUBY_PLATFORM
|
21
|
+
when defined?(ENV) && ENV&.has_key?('OS') then
|
22
|
+
ENV[-'OS'] # I've seen examples where this is `'Windows_NT'` but don't expect it on *nix.
|
23
|
+
else
|
24
|
+
begin
|
25
|
+
# Try to `require` something that will definitely fail on non-Windows:
|
26
|
+
# https://ruby-doc.org/stdlib/libdoc/win32ole/rdoc/WIN32OLE.html
|
27
|
+
require 'win32ole'
|
28
|
+
rescue ::LoadError
|
29
|
+
'Winders'
|
30
|
+
end
|
31
|
+
end # CHAIN
|
32
|
+
|
33
|
+
# This is kinda redundant with `Gem.win_platform?`:
|
34
|
+
# https://github.com/rubygems/rubygems/blob/master/lib/rubygems.rb Ctrl+F 'WIN_PATTERNS'
|
35
|
+
def self.Windows?
|
36
|
+
(self::CHAIN =~ %r&
|
37
|
+
mswin| # MS VC compiler / MS VC runtime
|
38
|
+
mingw| # GNU compiler / MS VC runtime
|
39
|
+
cygwin| # GNU compiler / Cygwin POSIX runtime
|
40
|
+
interix| # GNU compiler / MS POSIX runtime
|
41
|
+
bccwin| # Borland C++ compiler and runtime (dead since Embarcadero C++ Builder uses Clang)
|
42
|
+
windows| # e.g. `ENV['OS']` can be `'Windows_NT'`
|
43
|
+
wince| # Can Ruby even run on CE? idk
|
44
|
+
djgpp| # http://www.delorie.com/djgpp/
|
45
|
+
winders # lol
|
46
|
+
&xi) != nil || Gem.win_platform?
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.OS∕2?
|
50
|
+
# This could also be DOS, but I'll assume OS/2:
|
51
|
+
# http://emx.sourceforge.net/
|
52
|
+
# http://www.os2ezine.com/20020716/page_7.html
|
53
|
+
(self::CHAIN =~ /emx/i) != nil
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.macOS?
|
57
|
+
(self::CHAIN =~ /darwin/i) != nil
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.BSD?
|
61
|
+
(self::CHAIN =~ /bsd/i) != nil
|
62
|
+
end
|
63
|
+
|
64
|
+
def self.Lunix?
|
65
|
+
# LUNIX TRULY IS THE SUPERIOR OPERATING SYSTEM!!!1
|
66
|
+
# http://www.somethingawful.com/jeffk/usar.swf
|
67
|
+
(self::CHAIN =~ /linux/i) != nil
|
68
|
+
end
|
69
|
+
end # SYSTEM
|
70
|
+
|
71
|
+
|
72
|
+
# Our implementation of freedesktop-dot-org XDG directory handling:
|
73
|
+
# https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html
|
74
|
+
#
|
75
|
+
# There is a nice-looking Gem for this already: https://www.alchemists.io/projects/xdg/
|
76
|
+
# However I'm not using it because CYO I want to do some nonstandard stuff ;)
|
77
|
+
class XDG
|
78
|
+
|
79
|
+
# Generic method to return absolute `Pathname`s for the contents of a given environment variable.
|
80
|
+
def self.ENVIRONMENTAL_PATHNAMES(variable)
|
81
|
+
# Skip empty-`String` variables as well as missing variables.
|
82
|
+
if ENV.has_key?(variable) and not ENV[variable]&.empty?
|
83
|
+
# `PATH_SEPARATE` will be a colon (:) on UNIX-like systems and semi-colon (;) on Windows.
|
84
|
+
# Convert path variable contents to `Pathname`s with…
|
85
|
+
# - :expand_path — Does shell expansion of path `String`s, e.g. `File.expand_path('~') == Dir::home`
|
86
|
+
# - :directory? — Drop any expanded `Pathname`s that don't refer to extant directories.
|
87
|
+
# - :realpath — Convert to absolute paths, e.g. following symbolic links.
|
88
|
+
ENV[variable]
|
89
|
+
.split(::File::PATH_SEPARATOR)
|
90
|
+
.map(&::Pathname::method(:new))
|
91
|
+
.map(&:expand_path)
|
92
|
+
.keep_if(&:directory?)
|
93
|
+
.map(&:realpath)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# "`$XDG_DATA_DIRS` defines the preference-ordered set of base directories to
|
98
|
+
# search for data files in addition to the `$XDG_DATA_HOME` base directory."
|
99
|
+
def self.DATA_DIRS
|
100
|
+
# "If `$XDG_DATA_DIRS` is either not set or empty,
|
101
|
+
# a value equal to `/usr/local/share/:/usr/share/` should be used."
|
102
|
+
self.ENVIRONMENTAL_PATHNAMES(-'XDG_DATA_DIRS') || ['/usr/local/share/', '/usr/share/'].tap {
|
103
|
+
# Fixup platforms where we know to expect filez outside the fd.o defaults.
|
104
|
+
if SYSTEM::mac? then
|
105
|
+
_1.append('/opt/homebrew/share/') # Homebrew
|
106
|
+
_1.append('/opt/local/share/') # MacPorts
|
107
|
+
end
|
108
|
+
}.map(&::Pathname::method(:new)).keep_if(&:directory?).map(&:realpath)
|
109
|
+
end # DATA_DIRS
|
110
|
+
|
111
|
+
# "`$XDG_DATA_HOME` defines the base directory relative to which user-specific data files should be stored."
|
112
|
+
def self.DATA_HOME
|
113
|
+
self.ENVIRONMENTAL_PATHNAMES(-'XDG_DATA_HOME') || [
|
114
|
+
# "If `$XDG_DATA_HOME` is either not set or empty, a default equal to $HOME/.local/share should be used."
|
115
|
+
::Pathname.new(::Dir::home).expand_path.realpath.join(-'.local', -'share')
|
116
|
+
]
|
117
|
+
end
|
118
|
+
|
119
|
+
# "`$XDG_CONFIG_HOME` defines the base directory relative to which user-specific configuration files should be stored."
|
120
|
+
def self.CONFIG_HOME
|
121
|
+
self.ENVIRONMENTAL_PATHNAMES(-'XDG_CONFIG_HOME') || [
|
122
|
+
# "If `$XDG_CONFIG_HOME` is either not set or empty, a default equal to `$HOME/.config` should be used."
|
123
|
+
::Pathname.new(::Dir::home).expand_path.realpath.join(-'.config')
|
124
|
+
]
|
125
|
+
end
|
126
|
+
|
127
|
+
# "`$XDG_STATE_HOME` defines the base directory relative to which user-specific state files should be stored."
|
128
|
+
def self.STATE_HOME
|
129
|
+
self.ENVIRONMENTAL_PATHNAMES(-'XDG_STATE_HOME') || [
|
130
|
+
# "If `$XDG_STATE_HOME` is either not set or empty, a default equal to `$HOME/.local/state` should be used. "
|
131
|
+
::Pathname.new(::Dir::home).expand_path.realpath.join(-'.local', -'state')
|
132
|
+
]
|
133
|
+
end
|
134
|
+
|
135
|
+
# Returns a combined `Array` of user-specific and system-wide XDG Data `Pathname`s.
|
136
|
+
def self.DATA
|
137
|
+
# The base directory defined by `$XDG_DATA_HOME` is considered more important
|
138
|
+
# than any of the base directories defined by `$XDG_DATA_DIRS`.
|
139
|
+
self.DATA_HOME + self.DATA_DIRS
|
140
|
+
end
|
141
|
+
|
142
|
+
# Hide the `Pathname`-making helper method.
|
143
|
+
private_class_method(:ENVIRONMENTAL_PATHNAMES)
|
144
|
+
end # XDG
|
145
|
+
|
146
|
+
end
|