checking-you-out 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +661 -0
- data/README.md +14 -0
- data/bin/are-we-unallocated-yet +9 -0
- data/bin/benchmark +66 -0
- data/bin/checking-you-out +8 -0
- data/bin/repl +7 -0
- data/bin/test-my-best +4 -0
- data/lib/checking-you-out.rb +40 -0
- data/lib/checking-you-out/auslandsgesprach.rb +253 -0
- data/lib/checking-you-out/ghost_revival.rb +71 -0
- data/lib/checking-you-out/ghost_revival/mr_mime.rb +390 -0
- data/lib/checking-you-out/ghost_revival/xross_infection.rb +146 -0
- data/lib/checking-you-out/inner_spirit.rb +215 -0
- data/lib/checking-you-out/party_starter.rb +202 -0
- data/lib/checking-you-out/party_starter/stick_around.rb +260 -0
- data/lib/checking-you-out/party_starter/weighted_action.rb +41 -0
- data/lib/checking-you-out/sweet_sweet_love_magic.rb +226 -0
- data/mime/packages/distorted-types.xml +68 -0
- data/mime/packages/third-party/shared-mime-info/freedesktop.org.xml.in +7672 -0
- data/mime/packages/third-party/tika-mimetypes/tika-mimetypes.xml +2762 -0
- metadata +232 -0
@@ -0,0 +1,390 @@
|
|
1
|
+
|
2
|
+
require 'ox'
|
3
|
+
|
4
|
+
|
5
|
+
# Push-event-based parser for freedesktop-dot-org `shared-mime-info`-format XML package files,
|
6
|
+
# including the main `shared-mime-info` database itself (GPLv2+), Apache Tika (MIT), and our own (AGPLv3).
|
7
|
+
# https://specifications.freedesktop.org/shared-mime-info-spec/shared-mime-info-spec-latest.html
|
8
|
+
# https://gitlab.freedesktop.org/xdg/shared-mime-info/-/blob/master/src/update-mime-database.c
|
9
|
+
#
|
10
|
+
#
|
11
|
+
# Example pulled from `freedesktop.org.xml.in`:
|
12
|
+
#
|
13
|
+
# <mime-type type="application/vnd.oasis.opendocument.text">
|
14
|
+
# <comment>ODT document</comment>
|
15
|
+
# <acronym>ODT</acronym>
|
16
|
+
# <expanded-acronym>OpenDocument Text</expanded-acronym>
|
17
|
+
# <sub-class-of type="application/zip"/>
|
18
|
+
# <generic-icon name="x-office-document"/>
|
19
|
+
# <magic priority="70">
|
20
|
+
# <match type="string" value="PK\003\004" offset="0">
|
21
|
+
# <match type="string" value="mimetype" offset="30">
|
22
|
+
# <match type="string" value="application/vnd.oasis.opendocument.text" offset="38"/>
|
23
|
+
# </match>
|
24
|
+
# </match>
|
25
|
+
# </magic>
|
26
|
+
# <glob pattern="*.odt"/>
|
27
|
+
# </mime-type>
|
28
|
+
class CHECKING::YOU::MrMIME < ::Ox::Sax
|
29
|
+
|
30
|
+
|
31
|
+
ORIGIN_OF_SYMMETRY = proc {
|
32
|
+
# Little-endian systems:
|
33
|
+
# - VAX
|
34
|
+
# - x86 / AMD64
|
35
|
+
# Big-endian systems:
|
36
|
+
# - Motorola 68k
|
37
|
+
# - Internet https://en.wikipedia.org/wiki/Endianness#Networking
|
38
|
+
# - IBM mainframes
|
39
|
+
# Bi-endian systems:
|
40
|
+
# - AArch64
|
41
|
+
# - PowerPC / POWER
|
42
|
+
# - MIPS
|
43
|
+
# - Alpha
|
44
|
+
# - PA-RISC
|
45
|
+
# - SuperH
|
46
|
+
# - Itanium
|
47
|
+
# - RISC-V
|
48
|
+
@host_endianness ||= [1].yield_self { |bliss|
|
49
|
+
# Pack the test Integer as a native-endianness 'I'nt and a 'N'etwork-endianess (BE) Int and compare.
|
50
|
+
bliss.pack(-?I) == bliss.pack(-?N) ? :BE : :LE
|
51
|
+
}
|
52
|
+
}
|
53
|
+
|
54
|
+
# Turn an arbitrary String into the correctly-based Integer it represents.
|
55
|
+
# It would be nice if I could do this directly in `Ox::Sax::Value`.
|
56
|
+
# Base-16 Ints can be written as literals in Ruby, e.g.
|
57
|
+
# irb> 0xFF
|
58
|
+
# => 255
|
59
|
+
BASED_STRING = proc {
|
60
|
+
# Operate on codepoints to avoid `String` allocation from slicing, e.g. `_1[...1]`
|
61
|
+
# would allocate a new two-character `String` before we have chance to dedupe it.
|
62
|
+
# The `shared-mime-info` XML is explicitly only in UTF-8, so this is safe.
|
63
|
+
#
|
64
|
+
# The below is equivalent to:
|
65
|
+
# ```case
|
66
|
+
# when -s[0..1].downcase == -'0x' then s.to_i(16)
|
67
|
+
# when s.chr == -?0 then s.to_i(8)
|
68
|
+
# else s.to_i(10)
|
69
|
+
# end```
|
70
|
+
#
|
71
|
+
# …but rewritten to check for first-codepoint `'0'`, then second-codepoint `'x'/'X'`:
|
72
|
+
# irb> ?0.ord => 48
|
73
|
+
# irb> [?X.ord, ?x.ord] => [88, 120]
|
74
|
+
#
|
75
|
+
# Relies on the fact that `#ord` of a long `String` is the same as `#ord` of its first character:
|
76
|
+
# irb> 'l'.ord => 108
|
77
|
+
# irb> 'lmfao'.ord => 108
|
78
|
+
(_1.ord == 48) ?
|
79
|
+
([88, 120].include?(_1.codepoints[1]) ? _1.to_i(16) : _1.to_i(8)) :
|
80
|
+
_1.to_i(10)
|
81
|
+
}
|
82
|
+
FDO_MAGIC_FORMATS = {
|
83
|
+
# "The string type supports the C character escapes (\0, \t, \n, \r, \xAB for hex, \777 for octal)."
|
84
|
+
-'string' => proc { |s| s },
|
85
|
+
-'byte' => proc { |s| BASED_STRING.call(s).chr },
|
86
|
+
-'little32' => proc { |s| BASED_STRING.call(s).yield_self { |value|
|
87
|
+
((value & 0xFF).chr + ((value >> 8) & 0xFF).chr + ((value >> 16) & 0xFF).chr + ((value >> 24) & 0xFF).chr)
|
88
|
+
}},
|
89
|
+
-'big32' => proc { |s| BASED_STRING.call(s).yield_self { |value|
|
90
|
+
(((value >> 24) & 0xFF).chr + ((value >> 16) & 0xFF).chr + ((value >> 8) & 0xFF).chr + (value & 0xFF).chr)
|
91
|
+
}},
|
92
|
+
-'little16' => proc { |s| BASED_STRING.call(s).yield_self { |value|
|
93
|
+
((value & 0xFF).chr + (value >> 8).chr)
|
94
|
+
}},
|
95
|
+
-'big16' => proc { |s| BASED_STRING.call(s).yield_self { |value|
|
96
|
+
((value >> 8).chr + (value & 0xFF).chr)
|
97
|
+
}},
|
98
|
+
}.tap { |f|
|
99
|
+
|
100
|
+
# TODO: Actually implement `stringignorecase`. This is a Tika thing not found in the fd.o XML.
|
101
|
+
f[-'stringignorecase'] = f[-'string']
|
102
|
+
|
103
|
+
# Returning `string` as default will probably not result in a successful match
|
104
|
+
# but will avoid blowing up our entire program if we encounter an unhandled format.
|
105
|
+
f.default = f[-'string']
|
106
|
+
|
107
|
+
# Set `host` formats according to system endianness.
|
108
|
+
if ORIGIN_OF_SYMMETRY.call == :BE then
|
109
|
+
f[-'host16'] = f[-'big16']
|
110
|
+
f[-'host32'] = f[-'big32']
|
111
|
+
else
|
112
|
+
f[-'host16'] = f[-'little16']
|
113
|
+
f[-'host32'] = f[-'little32']
|
114
|
+
end
|
115
|
+
|
116
|
+
}
|
117
|
+
|
118
|
+
# Map of `shared-mime-info` XML Element names to our generic category names.
|
119
|
+
FDO_ELEMENT_CATEGORY = {
|
120
|
+
:magic => :content_match,
|
121
|
+
:match => :content_match,
|
122
|
+
:alias => :family_tree,
|
123
|
+
:comment => :textual_metadata,
|
124
|
+
:"sub-child-of" => :family_tree,
|
125
|
+
:"generic-icon" => :host_metadata,
|
126
|
+
:glob => :pathname_match,
|
127
|
+
}
|
128
|
+
|
129
|
+
# `MrMIME::new` will take any of these keys as keyword arguments
|
130
|
+
# whose value will override the default defined in this Hash.
|
131
|
+
DEFAULT_LOADS = {
|
132
|
+
:textual_metadata => false,
|
133
|
+
:host_metadata => false,
|
134
|
+
:pathname_match => true,
|
135
|
+
:content_match => true,
|
136
|
+
:family_tree => true,
|
137
|
+
}
|
138
|
+
|
139
|
+
# You shouldn't abuse the power of the Solid.
|
140
|
+
def skips
|
141
|
+
@skips ||= self.class::DEFAULT_LOADS.keep_if { |k,v| v == false }.keys.to_set
|
142
|
+
end
|
143
|
+
|
144
|
+
def initialize(**kwargs)
|
145
|
+
# Per the `Ox::Sax` dox:
|
146
|
+
# "Initializing `line` attribute in the initializer will cause that variable to
|
147
|
+
# be updated before each callback with the XML line number.
|
148
|
+
# The same is true for the `column` attribute, but it will be updated with
|
149
|
+
# the column in the XML file that is the start of the element or node just read.
|
150
|
+
# `@pos`, if defined, will hold the number of bytes from the start of the document."
|
151
|
+
#@pos = nil
|
152
|
+
#@line = nil
|
153
|
+
#@column = nil
|
154
|
+
|
155
|
+
# We receive separate events for Elements and Attributes, so we need to keep track of
|
156
|
+
# the current Element to know what to do with Attributes since we can't rely on Attribute
|
157
|
+
# names to be unique. For example, `shared-mime-info` has an attribute `type` on
|
158
|
+
# `<mime-type>`, `<alias>`, `<match>`, and `<sub-class-of>`.
|
159
|
+
@parse_stack = Array.new
|
160
|
+
|
161
|
+
# We need a separate stack and a flag boolean for building content-match structures since the source XML
|
162
|
+
# represents OR and AND byte-sequence relationships as sibling and child Elements respectively, e.g.
|
163
|
+
# <magic><match1><match2/><match3/></match1><match4/></magic> => [m1 AND m2] OR [m1 AND m3], OR [m4].
|
164
|
+
@i_can_haz_magic = true
|
165
|
+
|
166
|
+
# Allow the user to control the conditions on which we ignore data from the source XML.
|
167
|
+
@skips = self.class::DEFAULT_LOADS.merge(kwargs.slice(*self.class::DEFAULT_LOADS.keys)).keep_if { |k,v|
|
168
|
+
v == false
|
169
|
+
}.keys.to_set
|
170
|
+
|
171
|
+
# Here's where I would put a call to `super()` a.k.a `Ox::Sax#initialize` — IF IT HAD ONE
|
172
|
+
end
|
173
|
+
|
174
|
+
def cyo
|
175
|
+
@cyo ||= ::CHECKING::YOU::OUT::from_ietf_media_type(@media_type)
|
176
|
+
end
|
177
|
+
|
178
|
+
|
179
|
+
# Callback methods we can implement in this Handler per http://www.ohler.com/ox/Ox/Sax.html
|
180
|
+
#
|
181
|
+
# def instruct(target); end
|
182
|
+
# def end_instruct(target); end
|
183
|
+
# def attr(name, str); end
|
184
|
+
# def attr_value(name, value); end
|
185
|
+
# def attrs_done(); end
|
186
|
+
# def doctype(str); end
|
187
|
+
# def comment(str); end
|
188
|
+
# def cdata(str); end
|
189
|
+
# def text(str); end
|
190
|
+
# def value(value); end
|
191
|
+
# def start_element(name); end
|
192
|
+
# def end_element(name); end
|
193
|
+
# def error(message, line, column); end
|
194
|
+
# def abort(name); end
|
195
|
+
#
|
196
|
+
# PROTIPs:
|
197
|
+
# - Our callback methods must be public or they won't be called!
|
198
|
+
# - Some argument names in the documented method definitions describe the type (in the Ruby sense)
|
199
|
+
# of the argument, and some argument names describe their semantic meaning w/r/t the XML document!
|
200
|
+
# - Arguments called `name` will be Symbols by default unless `Ox::parse_sax` was given
|
201
|
+
# an options hash where `:symbolize` => `false` in which case `name` arguments will contain Strings.
|
202
|
+
# - Arguments called `str` will be Strings always.
|
203
|
+
# - Arguments called `value` will be `Ox::Sax::Value` objects that can be further differentiated in several ways:
|
204
|
+
# http://www.ohler.com/ox/Ox/Sax/Value.html
|
205
|
+
# - For example, an invocation of `attr(name, str)` emitted while parsing a fd.o `<magic>` Element might have
|
206
|
+
# a `name` argument containing the Symbol `:priority` and a `str` argument containing its String value e.g. `"50"`.
|
207
|
+
# - The `value` versions of these callback methods have priority over their `str` equivalents
|
208
|
+
# if both are defined, and only one of them will ever be called,
|
209
|
+
# e.g. `attr_value()` > `attr()` iff `defined? attr_value()`.
|
210
|
+
|
211
|
+
def start_element(name)
|
212
|
+
@parse_stack.push(name)
|
213
|
+
return if self.skips.include?(name)
|
214
|
+
case name
|
215
|
+
when :"mime-type" then
|
216
|
+
@media_type = String.new if @media_type.nil?
|
217
|
+
@cyo = nil
|
218
|
+
when :match then
|
219
|
+
# Mark any newly-added Sequence as eligible for a full match candidate.
|
220
|
+
@i_can_haz_magic = true
|
221
|
+
@speedy_cat.append(::CHECKING::YOU::OUT::SequenceCat.new)
|
222
|
+
when :magic then
|
223
|
+
@speedy_cat = ::CHECKING::YOU::OUT::SpeedyCat.new if @speedy_cat.nil?
|
224
|
+
when :"magic-deleteall" then
|
225
|
+
# TODO
|
226
|
+
when :glob then
|
227
|
+
@stick_around = ::CHECKING::YOU::StickAround.new
|
228
|
+
when :"glob-deleteall" then
|
229
|
+
# TODO
|
230
|
+
when :treemagic then
|
231
|
+
# TODO
|
232
|
+
when :acronym then
|
233
|
+
# TODO
|
234
|
+
when :"expanded-acronym" then
|
235
|
+
# TODO
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
def attr_value(attr_name, value)
|
240
|
+
return if self.skips.include?(@parse_stack.last)
|
241
|
+
# `parse_stack` can be empty here in which case its `#last` will be `nil`.
|
242
|
+
# This happens e.g. for the two attributes of the XML declaration '<?xml version="1.0" encoding="UTF-8"?>'.
|
243
|
+
#
|
244
|
+
# Avoid the `Array` allocation necessary when using pattern matching `case` syntax.
|
245
|
+
# I'd still like to refactor this to avoid the redundant `attr_name` `case`s.
|
246
|
+
# Maybe a `Hash` of `proc`s?
|
247
|
+
case @parse_stack.last
|
248
|
+
when :"mime-type" then
|
249
|
+
case attr_name
|
250
|
+
when :type then @media_type.replace(value.as_s)
|
251
|
+
end
|
252
|
+
when :match then
|
253
|
+
case attr_name
|
254
|
+
when :type then @speedy_cat.last.format = FDO_MAGIC_FORMATS[value.as_s]
|
255
|
+
when :value then @speedy_cat.last.cat = value.as_s
|
256
|
+
when :offset then @speedy_cat.last.boundary = value.as_s
|
257
|
+
when :mask then @speedy_cat.last.mask = BASED_STRING.call(value.as_s)
|
258
|
+
end
|
259
|
+
when :magic then
|
260
|
+
case attr_name
|
261
|
+
when :priority then @speedy_cat&.weight = value.as_i
|
262
|
+
end
|
263
|
+
when :alias then
|
264
|
+
case attr_name
|
265
|
+
when :type then self.cyo.add_aka(::CHECKING::YOU::IN::from_ietf_media_type(value.as_s))
|
266
|
+
end
|
267
|
+
when :"sub-class-of" then
|
268
|
+
case attr_name
|
269
|
+
when :type then self.cyo.add_parent(::CHECKING::YOU::OUT::from_ietf_media_type(value.as_s))
|
270
|
+
end
|
271
|
+
when :glob then
|
272
|
+
case attr_name
|
273
|
+
when :weight then @stick_around.weight = value.as_i
|
274
|
+
when :pattern then @stick_around.replace(value.as_s)
|
275
|
+
when :"case-sensitive" then @stick_around.case_sensitive = value.as_bool
|
276
|
+
end
|
277
|
+
when :"root-XML" then
|
278
|
+
#case attr_name
|
279
|
+
#when :namespaceURI then # TODO
|
280
|
+
#when :localName then # TODO
|
281
|
+
#end
|
282
|
+
end
|
283
|
+
end
|
284
|
+
|
285
|
+
def text(element_text)
|
286
|
+
return if self.skips.include?(@parse_stack.last)
|
287
|
+
case @parse_stack.last
|
288
|
+
when :comment then
|
289
|
+
self.cyo.description = element_text
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
def end_element(name)
|
294
|
+
raise Exception.new('Parse stack element mismatch') unless @parse_stack.pop == name
|
295
|
+
return if self.skips.include?(@parse_stack.last)
|
296
|
+
case name
|
297
|
+
when :"mime-type" then
|
298
|
+
@media_type.clear
|
299
|
+
@cyo = nil
|
300
|
+
when :match then
|
301
|
+
# The Sequence stack represents a complete match once we start popping Sequences from it,
|
302
|
+
# which we can know because every `<match>` stack push sets `@i_can_haz_magic = true`.
|
303
|
+
# If there is only a single sub-sequence we can just add that instead of the container.
|
304
|
+
self.cyo.add_content_match(
|
305
|
+
# Add single-sequences directly instead of adding their container.
|
306
|
+
@speedy_cat.one? ?
|
307
|
+
# Transfer any non-default `weight` from the container to that single-sequence.
|
308
|
+
@speedy_cat.pop.tap { _1.weight = @speedy_cat.weight } :
|
309
|
+
# Otherwise go ahead and add a copy of the container while also preparing the
|
310
|
+
# local container for a possible next-branch to the `<magic>` tree.
|
311
|
+
@speedy_cat.dup.tap { @speedy_cat.pop }
|
312
|
+
) if @i_can_haz_magic
|
313
|
+
# Mark any remaining partial Sequences as ineligible to be a full match candidate,
|
314
|
+
# e.g. if we had a stack of [<match1/><match2/><match3/>] we would want to add a
|
315
|
+
# candidate [m1, m2, m3] but not the partials [m1, m2] or [m1] as we clear out the stack.
|
316
|
+
@i_can_haz_magic = false
|
317
|
+
when :magic then
|
318
|
+
# `SpeedyCat#clear` will unset any non-default `weight` so we can re-use it cleanly.
|
319
|
+
@speedy_cat.clear
|
320
|
+
when :glob then
|
321
|
+
self.cyo.add_pathname_fragment(@stick_around) unless @stick_around.nil?
|
322
|
+
end
|
323
|
+
end
|
324
|
+
|
325
|
+
def open(path, **kwargs)
|
326
|
+
# Use the block form of `IO::open` so the file handle is implicitly closed after we leave this scope.
|
327
|
+
# Per Ruby's `IO` module docs:
|
328
|
+
# "With no associated block, `::open` is a synonym for `::new`. If the optional code block is given,
|
329
|
+
# it will be passed the opened file as an argument and the File object will automatically be closed
|
330
|
+
# when the block terminates. The value of the block will be returned from `::open`."
|
331
|
+
File.open(path, File::Constants::RDONLY) { |mime_xml|
|
332
|
+
|
333
|
+
# "Announce an intention to access data from the current file in a specific pattern.
|
334
|
+
# On platforms that do not support the posix_fadvise(2) system call, this method is a no-op."
|
335
|
+
#
|
336
|
+
# This was probably a bigger deal when we all stored our files on spinning rust, but it shouldn't hurt :)
|
337
|
+
#
|
338
|
+
# I'm using `:sequential` because I am doing event-based XML parsing, and I'm avoiding `:noreuse`
|
339
|
+
# because back-to-back invocations of DistorteD will benefit from the OS caching the data files.
|
340
|
+
#
|
341
|
+
# N0TE: `:noreuse` is a no-op on Lunix anyway, at least as of ver 5.12 as I write this in 2021:
|
342
|
+
# https://linux.die.net/man/2/posix_fadvise
|
343
|
+
# https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/mm/fadvise.c
|
344
|
+
# https://web.archive.org/web/20130513093816/http://kerneltrap.org/node/7563
|
345
|
+
#
|
346
|
+
# But it works on FreeBSD, amusingly even when using Linux ABI compat:
|
347
|
+
# https://www.freebsd.org/cgi/man.cgi?query=posix_fadvise&sektion=2
|
348
|
+
# https://cgit.freebsd.org/src/tree/sys/kern/vfs_syscalls.c
|
349
|
+
# https://cgit.freebsd.org/src/tree/sys/compat/linux/linux_file.c
|
350
|
+
mime_xml.advise(:sequential)
|
351
|
+
|
352
|
+
# Docs: http://www.ohler.com/ox/Ox.html#method-c-sax_parse
|
353
|
+
#
|
354
|
+
# Code for this method is defined in Ox's C extension, not in Ox's Ruby lib:
|
355
|
+
# https://github.com/ohler55/ox/blob/master/ext/ox/ox.c CTRL+F "call-seq: sax_parse".
|
356
|
+
# (Not linking a particular line number since that would require linking a particular revision too)
|
357
|
+
#
|
358
|
+
# Here is an actual example `<magic><match/>` element pulled from `freedesktop.org.xml`
|
359
|
+
# showing why I am using Ox's `:convert_special` here:
|
360
|
+
# `<match type="string" value="<<< QEMU VM Virtual Disk Image >>>\n" offset="0"/>`
|
361
|
+
#
|
362
|
+
# I can't figure out how exactly `:skip_none` and `:skip_off` differ here.
|
363
|
+
# They appear a few times as equal/fall-through `case`s in some C extension `switch` statements:
|
364
|
+
# https://github.com/ohler55/ox/search?q=OffSkip
|
365
|
+
# https://github.com/ohler55/ox/search?q=NoSkip
|
366
|
+
# In `sax.c`'s `read_text` function they are used slightly differently in the following conditional
|
367
|
+
# where `:skip_none` checks if the end of the Element has been reached but `:skip_off` doesn't.
|
368
|
+
# https://github.com/ohler55/ox/blob/master/ext/ox/sax.c CTRL+F "read_text"
|
369
|
+
# `((NoSkip == dr->options.skip && !isEnd) || (OffSkip == dr->options.skip)))`
|
370
|
+
#
|
371
|
+
# TOD0: Probably String allocation gainz to be had inside Ox's C extension once the API is available:
|
372
|
+
# https://bugs.ruby-lang.org/issues/13381
|
373
|
+
# https://bugs.ruby-lang.org/issues/16029
|
374
|
+
# e.g. https://github.com/msgpack/msgpack-ruby/pull/196
|
375
|
+
Ox.sax_parse(
|
376
|
+
self, # Instance of a class that responds to `Ox::Sax`'s callback messages.
|
377
|
+
mime_xml, # IO stream or String of XML to parse. Won't close File handles automatically.
|
378
|
+
**{
|
379
|
+
convert_special: true, # [boolean] Convert encoded entities back to their unencoded form, e.g. `"<"` to `"<"`.
|
380
|
+
skip: :skip_off, # [:skip_none|:skip_return|:skip_white|:skip_off] (from Element text/value) Strip CRs, whitespace, or nothing.
|
381
|
+
smart: false, # [boolean] Toggle Ox's built-in hints for HTML parsing: https://github.com/ohler55/ox/blob/master/ext/ox/sax_hint.c
|
382
|
+
strip_namespace: true, # [nil|String|true|false] (from Element names) Strip no namespaces, all namespaces, or a specific namespace.
|
383
|
+
symbolize: true, # [boolean] Fill callback method `name` arguments with Symbols instead of with Strings.
|
384
|
+
intern_strings: true, # [boolean] Intern (freeze and deduplicate) String return values.
|
385
|
+
}.update(kwargs),
|
386
|
+
)
|
387
|
+
}
|
388
|
+
end
|
389
|
+
end
|
390
|
+
|
@@ -0,0 +1,146 @@
|
|
1
|
+
require 'pathname' unless defined?(::Pathname)
|
2
|
+
|
3
|
+
|
4
|
+
# Cross-OS / Cross-Desktop / Cross-Ruby t00lz.
|
5
|
+
class ::CHECKING::YOU::OUT::XROSS_INFECTION
|
6
|
+
|
7
|
+
# Host operating system detection.
|
8
|
+
class SYSTEM
|
9
|
+
require 'rbconfig'
|
10
|
+
|
11
|
+
# I used to check `RUBY_PLATFORM` alone until I learned about `RbConfig`:
|
12
|
+
# https://idiosyncratic-ruby.com/42-ruby-config.html
|
13
|
+
CHAIN = -case
|
14
|
+
when defined?(::RbConfig::CONFIG) then
|
15
|
+
# Created by `mkconfig.rb` when Ruby is built.
|
16
|
+
::RbConfig::CONFIG[-'host_os']
|
17
|
+
when defined?(RUBY_PLATFORM) then
|
18
|
+
# This is misleading because it will be e.g. `'Java'` for JRuby,
|
19
|
+
# and the paths we care about are more OS-dependent than Ruby-dependent.
|
20
|
+
RUBY_PLATFORM
|
21
|
+
when defined?(ENV) && ENV&.has_key?('OS') then
|
22
|
+
ENV[-'OS'] # I've seen examples where this is `'Windows_NT'` but don't expect it on *nix.
|
23
|
+
else
|
24
|
+
begin
|
25
|
+
# Try to `require` something that will definitely fail on non-Windows:
|
26
|
+
# https://ruby-doc.org/stdlib/libdoc/win32ole/rdoc/WIN32OLE.html
|
27
|
+
require 'win32ole'
|
28
|
+
rescue ::LoadError
|
29
|
+
'Winders'
|
30
|
+
end
|
31
|
+
end # CHAIN
|
32
|
+
|
33
|
+
# This is kinda redundant with `Gem.win_platform?`:
|
34
|
+
# https://github.com/rubygems/rubygems/blob/master/lib/rubygems.rb Ctrl+F 'WIN_PATTERNS'
|
35
|
+
def self.Windows?
|
36
|
+
(self::CHAIN =~ %r&
|
37
|
+
mswin| # MS VC compiler / MS VC runtime
|
38
|
+
mingw| # GNU compiler / MS VC runtime
|
39
|
+
cygwin| # GNU compiler / Cygwin POSIX runtime
|
40
|
+
interix| # GNU compiler / MS POSIX runtime
|
41
|
+
bccwin| # Borland C++ compiler and runtime (dead since Embarcadero C++ Builder uses Clang)
|
42
|
+
windows| # e.g. `ENV['OS']` can be `'Windows_NT'`
|
43
|
+
wince| # Can Ruby even run on CE? idk
|
44
|
+
djgpp| # http://www.delorie.com/djgpp/
|
45
|
+
winders # lol
|
46
|
+
&xi) != nil || Gem.win_platform?
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.OS∕2?
|
50
|
+
# This could also be DOS, but I'll assume OS/2:
|
51
|
+
# http://emx.sourceforge.net/
|
52
|
+
# http://www.os2ezine.com/20020716/page_7.html
|
53
|
+
(self::CHAIN =~ /emx/i) != nil
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.macOS?
|
57
|
+
(self::CHAIN =~ /darwin/i) != nil
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.BSD?
|
61
|
+
(self::CHAIN =~ /bsd/i) != nil
|
62
|
+
end
|
63
|
+
|
64
|
+
def self.Lunix?
|
65
|
+
# LUNIX TRULY IS THE SUPERIOR OPERATING SYSTEM!!!1
|
66
|
+
# http://www.somethingawful.com/jeffk/usar.swf
|
67
|
+
(self::CHAIN =~ /linux/i) != nil
|
68
|
+
end
|
69
|
+
end # SYSTEM
|
70
|
+
|
71
|
+
|
72
|
+
# Our implementation of freedesktop-dot-org XDG directory handling:
|
73
|
+
# https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html
|
74
|
+
#
|
75
|
+
# There is a nice-looking Gem for this already: https://www.alchemists.io/projects/xdg/
|
76
|
+
# However I'm not using it because CYO I want to do some nonstandard stuff ;)
|
77
|
+
class XDG
|
78
|
+
|
79
|
+
# Generic method to return absolute `Pathname`s for the contents of a given environment variable.
|
80
|
+
def self.ENVIRONMENTAL_PATHNAMES(variable)
|
81
|
+
# Skip empty-`String` variables as well as missing variables.
|
82
|
+
if ENV.has_key?(variable) and not ENV[variable]&.empty?
|
83
|
+
# `PATH_SEPARATE` will be a colon (:) on UNIX-like systems and semi-colon (;) on Windows.
|
84
|
+
# Convert path variable contents to `Pathname`s with…
|
85
|
+
# - :expand_path — Does shell expansion of path `String`s, e.g. `File.expand_path('~') == Dir::home`
|
86
|
+
# - :directory? — Drop any expanded `Pathname`s that don't refer to extant directories.
|
87
|
+
# - :realpath — Convert to absolute paths, e.g. following symbolic links.
|
88
|
+
ENV[variable]
|
89
|
+
.split(::File::PATH_SEPARATOR)
|
90
|
+
.map(&::Pathname::method(:new))
|
91
|
+
.map(&:expand_path)
|
92
|
+
.keep_if(&:directory?)
|
93
|
+
.map(&:realpath)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# "`$XDG_DATA_DIRS` defines the preference-ordered set of base directories to
|
98
|
+
# search for data files in addition to the `$XDG_DATA_HOME` base directory."
|
99
|
+
def self.DATA_DIRS
|
100
|
+
# "If `$XDG_DATA_DIRS` is either not set or empty,
|
101
|
+
# a value equal to `/usr/local/share/:/usr/share/` should be used."
|
102
|
+
self.ENVIRONMENTAL_PATHNAMES(-'XDG_DATA_DIRS') || ['/usr/local/share/', '/usr/share/'].tap {
|
103
|
+
# Fixup platforms where we know to expect filez outside the fd.o defaults.
|
104
|
+
if SYSTEM::mac? then
|
105
|
+
_1.append('/opt/homebrew/share/') # Homebrew
|
106
|
+
_1.append('/opt/local/share/') # MacPorts
|
107
|
+
end
|
108
|
+
}.map(&::Pathname::method(:new)).keep_if(&:directory?).map(&:realpath)
|
109
|
+
end # DATA_DIRS
|
110
|
+
|
111
|
+
# "`$XDG_DATA_HOME` defines the base directory relative to which user-specific data files should be stored."
|
112
|
+
def self.DATA_HOME
|
113
|
+
self.ENVIRONMENTAL_PATHNAMES(-'XDG_DATA_HOME') || [
|
114
|
+
# "If `$XDG_DATA_HOME` is either not set or empty, a default equal to $HOME/.local/share should be used."
|
115
|
+
::Pathname.new(::Dir::home).expand_path.realpath.join(-'.local', -'share')
|
116
|
+
]
|
117
|
+
end
|
118
|
+
|
119
|
+
# "`$XDG_CONFIG_HOME` defines the base directory relative to which user-specific configuration files should be stored."
|
120
|
+
def self.CONFIG_HOME
|
121
|
+
self.ENVIRONMENTAL_PATHNAMES(-'XDG_CONFIG_HOME') || [
|
122
|
+
# "If `$XDG_CONFIG_HOME` is either not set or empty, a default equal to `$HOME/.config` should be used."
|
123
|
+
::Pathname.new(::Dir::home).expand_path.realpath.join(-'.config')
|
124
|
+
]
|
125
|
+
end
|
126
|
+
|
127
|
+
# "`$XDG_STATE_HOME` defines the base directory relative to which user-specific state files should be stored."
|
128
|
+
def self.STATE_HOME
|
129
|
+
self.ENVIRONMENTAL_PATHNAMES(-'XDG_STATE_HOME') || [
|
130
|
+
# "If `$XDG_STATE_HOME` is either not set or empty, a default equal to `$HOME/.local/state` should be used. "
|
131
|
+
::Pathname.new(::Dir::home).expand_path.realpath.join(-'.local', -'state')
|
132
|
+
]
|
133
|
+
end
|
134
|
+
|
135
|
+
# Returns a combined `Array` of user-specific and system-wide XDG Data `Pathname`s.
|
136
|
+
def self.DATA
|
137
|
+
# The base directory defined by `$XDG_DATA_HOME` is considered more important
|
138
|
+
# than any of the base directories defined by `$XDG_DATA_DIRS`.
|
139
|
+
self.DATA_HOME + self.DATA_DIRS
|
140
|
+
end
|
141
|
+
|
142
|
+
# Hide the `Pathname`-making helper method.
|
143
|
+
private_class_method(:ENVIRONMENTAL_PATHNAMES)
|
144
|
+
end # XDG
|
145
|
+
|
146
|
+
end
|