checking-you-out 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,215 @@
1
+ require 'set' unless defined? ::Set
2
+ require 'pathname' unless defined? ::Pathname
3
+
4
+
5
+ # Utility Modules/procs/lambdas/etc for generic operations like checking WeightedActions.
6
+ require_relative 'party_starter' unless defined? ::CHECKING::YOU::WeightedAction
7
+
8
+
9
+ # This base Struct will be used as the Hash key for its matching `OUT` subclass object,
10
+ # and its members correspond to the three major parts of an IETF "Content-Type" String,
11
+ # e.g. "application/x-saturn-rom" → :x, :application, :"saturn-rom".
12
+ #
13
+ # This is kind of a leaky abstraction since I want to support non-IETF type systems too,
14
+ # but the IETF system is by far the most relevant one to us because the most exhaustive
15
+ # source data (`shared-mime-info`) is based on that format and because, you know, Internet.
16
+ # See the adjacent `auslandsgespräch.rb` for the parser and more info.
17
+ #
18
+ #
19
+ # The instances of a `Struct` subclass with at most `RSTRUCT_EMBED_LEN_MAX` members
20
+ # can fit entirely within an `RStruct` without additional heap allocation.
21
+ # In MRI (at least as of 3.0) the `RSTRUCT_EMBED_LEN_MAX` is 3, so CYI uses three members.
22
+ #
23
+ # For more info see:
24
+ # - https://github.com/ruby/ruby/blob/master/gc.c
25
+ # - http://patshaughnessy.net/2013/2/8/ruby-mri-source-code-idioms-3-embedded-objects
26
+ CHECKING::YOU::IN ||= Struct.new(
27
+ # Intentionally avoiding naming taxonomic ranks like "domain", "class", or "order"
28
+ # whose names are already common in computing.
29
+ :kingdom,
30
+ :phylum,
31
+ :genus,
32
+ ) do
33
+ # Promote any CYI to its CYO singleton. CYO has the opposites of these methods.
34
+ def out; ::CHECKING::YOU::OUT::new(self); end
35
+ def in; self; end
36
+ end
37
+
38
+ # Main Struct subclass for in-memory type representation.
39
+ # Instances of the base `CHECKING::YOU::IN` Struct will refer to only one of these,
40
+ # and this matching object will contain all relevant data about the type,
41
+ # such as file extension(s), `magic` bytes, and variations of a base type like all of:
42
+ # - "application/vnd.wordperfect;"
43
+ # - "application/vnd.wordperfect;version=4.2"
44
+ # - "application/vnd.wordperfect;version=5.0"
45
+ # - "application/vnd.wordperfect;version=5.1"
46
+ # - "application/vnd.wordperfect;version=6.x"
47
+ # …will be represented in a single `CHECKING::YOU::OUT` object.
48
+ class ::CHECKING::YOU::OUT < ::CHECKING::YOU::IN
49
+
50
+ # Absolute path to the root of the Gem — the directory containing `bin`,`docs`,`lib`, etc.
51
+ GEM_ROOT = proc { ::Pathname.new(__dir__).join(*Array.new(2, -'..')).expand_path.realpath }
52
+
53
+ # Time object representing the day this running CYO Gem was packaged.
54
+ #
55
+ # `Gem::Specification#date` can be slightly misleading when developing locally with Bundler using `bundle exec`.
56
+ # One might expect the result of `#date` to be "now" (including hours/minutes/seconds) in UTC for such a runtime-packaged Gem,
57
+ # but it will always be midnight UTC of the current day (also in UTC), i.e. a date that is always[0] in the past.
58
+ #
59
+ # After ${your-UTC-offset} hours before midnight localtime, this will give you a *day* that seems to be in the future
60
+ # compared to a system clock displaying localtime despite that *date* UTC still being in the past,
61
+ # e.g. as I write this comment at 2021-05-25 22:22 PST, `GEM_PACKAGE_TIME.call` returns `2021-05-26 00:00:00 UTC`.
62
+ #
63
+ # Rescue from `Gem::MissingSpecError`'s parent to support developing locally with just `require_relative` and no Bundler.
64
+ #
65
+ # [0]: unless you manage to `bundle exec` at exactly 00:00:00 UTC :)
66
+ GEM_PACKAGE_TIME = proc { begin; Gem::Specification::find_by_name(-'checking-you-out').date; rescue Gem::LoadError; Time.now; end }
67
+
68
+ Species = Struct.new(:name, :value) do
69
+ def self.from_string(param_string)
70
+ return self.new(*param_string.split(-?=))
71
+ end
72
+ end
73
+
74
+ # Main memoization Hash for our loaded Type data.
75
+ # { CHECKING::YOU::IN => CHECKING::YOU::OUT }
76
+ def self.all_night; @all_night ||= Hash.new(nil); end
77
+
78
+ # Return a singleton instance for any CYO.
79
+ def self.new(taxa)
80
+ # Support IETF String argument to this method, e.g. ::CHECKING::YOU::OUT::new('application/octet-stream')
81
+ return self.from_ietf_media_type(taxa) if taxa.is_a?(String)
82
+ # Otherwise return the memoized CYO singleton of this type.
83
+ self.all_night[
84
+ taxa.is_a?(::CHECKING::YOU::IN) ? taxa : super(*taxa)
85
+ ] ||= self.allocate.tap { |cyo| cyo.send(:initialize, *taxa) }
86
+ end
87
+
88
+ # Demote any CYO to a CYI that can be passed around in just 40 bytes.
89
+ # CYI has the opposites of these methods.
90
+ def out; self; end
91
+ def in; self.class.all_night.key(self); end
92
+
93
+
94
+ # Get a CYO, Set[CYO], or nil by file-extension, e.g. `doc` => { CYO msword, CYO rtf }.
95
+ POSTFIX_KEY = proc {
96
+ # Re-use a single search structure to avoid allocating an Object per search.
97
+ scratch = ::CHECKING::YOU::StickAround.new(-'')
98
+ # Additionally accelerate multiple searches for the same thing by avoiding `StickAround#replace`
99
+ # if the new search key already matches the previous search key.
100
+ # Mark `case_sensitive: false` here for testing arbitrarily-named input.
101
+ -> { scratch.eql?(_1) ? scratch : scratch.replace(_1, case_sensitive: false) }
102
+ }.call
103
+ def self.from_postfix(stick_around)
104
+ self.instance_variable_get(:@after_forever)[POSTFIX_KEY.call(stick_around)]
105
+ end
106
+
107
+ # Get a Hash[CYO] or nil for arbitrary non-file-extension glob match of a File basename.
108
+ def self.from_glob(stick_around)
109
+ self.instance_variable_get(:@stick_around).select { |k,v|
110
+ k.eql?(stick_around)
111
+ }.yield_self { |matched|
112
+ matched.empty? ? nil : matched
113
+ }
114
+ end
115
+
116
+ def self.from_pathname(pathname)
117
+ return self.from_glob(pathname) || self.from_postfix(pathname)
118
+ end
119
+
120
+
121
+ # Add a new Postfix or Glob for a specific type.
122
+ def add_pathname_fragment(fragment)
123
+ if fragment.start_with?(-'*.') and fragment.count(-?.) == 1 and fragment.count(-?*) == 1 then
124
+ ::CHECKING::YOU::INSTANCE_NEEDLEMAKER.call(:@postfixes, fragment, self)
125
+ ::CHECKING::YOU::CLASS_NEEDLEMAKER.call(:@after_forever, fragment, self)
126
+ else
127
+ ::CHECKING::YOU::INSTANCE_NEEDLEMAKER.call(:@globs, fragment, self)
128
+ ::CHECKING::YOU::CLASS_NEEDLEMAKER.call(:@stick_around, fragment, self)
129
+ end
130
+ end
131
+
132
+
133
+ # Get a `Set` of this CYO and all of its parent CYOs, at minimum just `Set[self]`.
134
+ def aka
135
+ return case @aka
136
+ when nil then Set[self.in]
137
+ when self.class, self.class.superclass then Set[self.in, @aka]
138
+ when ::Set then Set[self.in, *@aka]
139
+ end
140
+ end
141
+
142
+ # Take an additional CYI, store it locally, and memoize it as an alias for this CYO.
143
+ def add_aka(taxa)
144
+ taxa = taxa.is_a?(::CHECKING::YOU::IN) ? taxa : self.class.superclass.new(*taxa)
145
+ ::CHECKING::YOU::INSTANCE_NEEDLEMAKER.call(:@aka, taxa, self)
146
+ self.class.all_night[taxa] = self
147
+ end
148
+
149
+ # Forget a CYI alias of this Type. Capable of unsetting the "real" CYI as well if desired.
150
+ def remove_aka(taxa)
151
+ taxa = taxa.is_a?(::CHECKING::YOU::IN) ? taxa : self.class.superclass.new(*taxa)
152
+ self.class.all_night.delete(taxa) if self.class.all_night[taxa] === self
153
+ end
154
+
155
+ attr_reader :parents, :children
156
+
157
+ # Take an additional CYO, store it locally as our parent, and ask it to add ourselves as its child.
158
+ def add_parent(parent_cyo)
159
+ ::CHECKING::YOU::INSTANCE_NEEDLEMAKER.call(:@parents, parent_cyo, self)
160
+ parent_cyo.add_child(self) unless parent_cyo.children&.include?(self)
161
+ end
162
+
163
+ # Take an additional CYO, store it locally as our child, and ask it to add ourselves as its parent.
164
+ def add_child(child_cyo)
165
+ ::CHECKING::YOU::INSTANCE_NEEDLEMAKER.call(:@children, child_cyo, self)
166
+ child_cyo.add_parent(self) unless child_cyo.parents&.include?(self)
167
+ end
168
+
169
+ # Get a `Set` of this CYO and all of its parent CYOs, at minimum just `Set[self]`.
170
+ def adults_table
171
+ return case @parents
172
+ when nil then Set[self]
173
+ when self.class, self.class.superclass then Set[self, @parents]
174
+ when ::Set then Set[self, *@parents]
175
+ end
176
+ end
177
+
178
+ # Get a `Set` of this CYO and all of its child CYOs, at minimum just `Set[self]`.
179
+ def kids_table
180
+ return case @children
181
+ when nil then Set[self]
182
+ when self.class, self.class.superclass then Set[self, @children]
183
+ when ::Set then Set[self, *@children]
184
+ end
185
+ end
186
+
187
+ # Get a `Set` of this CYO and all parents and children, at minimum just `Set[self]`.
188
+ def family_tree; self.kids_table | self.adults_table; end
189
+
190
+ # Storage for descriptions (`<comment>`), acrnyms, suitable iconography, and other boring metadata, e.g.:
191
+ # <mime-type type="application/vnd.oasis.opendocument.text">
192
+ # <comment>ODT document</comment>
193
+ # <acronym>ODT</acronym>
194
+ # <expanded-acronym>OpenDocument Text</expanded-acronym>
195
+ # <generic-icon name="x-office-document"/>
196
+ # […]
197
+ # </mini-type>
198
+ attr_accessor :description
199
+
200
+ end
201
+
202
+ # IETF Media-Type parser and methods that use that parser.
203
+ require_relative 'auslandsgesprach' unless defined? ::CHECKING::YOU::IN::AUSLANDSGESPRÄCH
204
+ ::CHECKING::YOU::IN.extend(::CHECKING::YOU::IN::AUSLANDSGESPRÄCH)
205
+ ::CHECKING::YOU::IN.include(::CHECKING::YOU::IN::INLANDGESPRÄCH)
206
+ ::CHECKING::YOU::OUT.extend(::CHECKING::YOU::OUT::AUSLANDSGESPRÄCH)
207
+
208
+ # Content matching à la `libmagic`/`file`.
209
+ require_relative 'sweet_sweet_love_magic' unless defined? ::CHECKING::YOU::SweetSweet♥Magic
210
+ ::CHECKING::YOU::OUT.extend(::CHECKING::YOU::SweetSweet♡Magic)
211
+ ::CHECKING::YOU::OUT.prepend(::CHECKING::YOU::SweetSweet♥Magic)
212
+
213
+ # Methods for loading type data from `shared-mime-info` package XML files.
214
+ require_relative 'ghost_revival' unless defined? ::CHECKING::YOU::GHOST_REVIVAL
215
+ ::CHECKING::YOU::OUT.extend(::CHECKING::YOU::OUT::GHOST_REVIVAL)
@@ -0,0 +1,202 @@
1
+ require 'set' unless defined? ::Set
2
+ require 'pathname' unless defined? ::Pathname
3
+
4
+
5
+ # This file defines/imports various utility Modules/procs/etc that should be available
6
+ # to all other CYO components without `including`/`extending`.
7
+ require_relative 'party_starter/weighted_action' unless defined? ::CHECKING::YOU::WeightedAction
8
+ require_relative 'party_starter/stick_around' unless defined? ::CHECKING::YOU::StickAround
9
+
10
+ class CHECKING::YOU
11
+
12
+ # The following two `proc`s handle classwide-memoization and instance-level assignment
13
+ # for values that may be Enumerable but often refer to only a single Object.
14
+ #
15
+ # For example, most `Postfix`es (file extensions) will only ever belong to a single CYO Object,
16
+ # but a handful represent possibly-multiple types, like how `.doc` can be an MSWord file or WordPad RTF.
17
+ #
18
+ # These assignment procs take a storage haystack, a needle to store, and the CYO receiver to which the needle refers.
19
+ # They will set `haystack[needle] => CYO` if that needle is unique and unset, or they will convert
20
+ # an existing single `haystack[needle] => CYO` assignment to `haystack[needle] => Set[existingCYO, newCYO]`.
21
+ #
22
+ # This is an admittedly-annoying complexity-for-performance tradeoff with the goal of allocating
23
+ # as few spurious containers as possible instead of explicitly initializing a Set for every needle
24
+ # when most of them would wastefully be a Set of just a single thing.
25
+ CLASS_NEEDLEMAKER = proc { |haystack, needle, receiver|
26
+ # Create the container if this is the very first invocation.
27
+ receiver.class.instance_variable_set(haystack, Hash.new(nil)) unless receiver.class.instance_variable_defined?(haystack)
28
+
29
+ # Set the `haystack` Hash's `needle` key to the `receiver` if the `key` is unset, otherwise
30
+ # to a `Set` of the existing value plus `receiver` if that value is not `receiver` already.
31
+ receiver.class.instance_variable_get(haystack).tap { |awen|
32
+ case awen[needle]
33
+ when nil then awen[needle] = receiver
34
+ when ::Set then awen[needle].add(receiver)
35
+ when receiver.class then awen[needle] = Set[awen[needle], receiver] unless awen[needle] == receiver
36
+ end
37
+ }
38
+ }
39
+
40
+ # This is the instance-level version of the above, e.g. a CYO with only one Postfix
41
+ # will assign `cyo.:@postfixes = Postfix`, and a CYO with many Postfixes will assign
42
+ # e.g. `cyo.:@postfixes = Set[post, fix, es, …]`.
43
+ INSTANCE_NEEDLEMAKER = proc { |haystack, needle, receiver|
44
+ if receiver.instance_variable_defined?(haystack) then
45
+ receiver.instance_variable_get(haystack).add(needle)
46
+ else
47
+ receiver.instance_variable_set(haystack, Set[needle])
48
+ end
49
+ }
50
+
51
+
52
+ # Test a Pathname representing an extant file whose contents and metadata we can use.
53
+ # This is separated into a lambda due to the complexity, since the entry-point might
54
+ # be given a String that could represent a Media Type, a hypothetical path,
55
+ # an extant path, or even raw stream contents. It could be given a Pathname representing
56
+ # either a hypothetical or extant file. It could be given an IO/Stream object.
57
+ # Several input possibilities will end up callin this lambda.
58
+ #
59
+ # Some of this complexity is my fault, since I'm doing a lot of variable juggling
60
+ # to avoid as many new-Object-allocations as possible in the name of performance
61
+ # since this library is the very core-est core of DistorteD;
62
+ # things like assigning Hash values to single CYO objects the first time that key is stored
63
+ # then replacing that value with a Set iff that key needs to reference any additional CYO.
64
+ #
65
+ # - `::from_xattr` can return `nil` or a single `CYO` depending on filesystem extended attributes.
66
+ # It is very very unlikely that most people will ever use this, but I think it's cool 8)
67
+ #
68
+ # - `::from_postfix` can return `nil`, `CYO`, or `Set` since I decided to store Postfixes
69
+ # separately from freeform globs since file-extension matches are the vast majority of globs.
70
+ # Postfixes avoid needing to be weighted since they all represent the same final pathname component
71
+ # and should never result in multiple conflicting Postfix key matches.
72
+ # A single Postfix key can represent multiple CYOs, though; hence the possible `Set`.
73
+ #
74
+ # - `::from_glob` can return `nil` or `Hash` since even a single match will include the weighted key.
75
+ #
76
+ # - `::from_content` can return `nil` or `Hash` based on a `libmagic`-style match of file/stream contents.
77
+ # Many common types can be determined from the first four bytes alone, but we support matching
78
+ # arbitrarily-long sequences against arbitrarily-big byte range boundaries.
79
+ # These keys will also be weighted, even for a single match.
80
+ TEST_EXTANT_PATHNAME = -> (pathname, so_deep: true, only_one_match: true) {
81
+
82
+ # Never return empty Enumerables.
83
+ # Yielding-self to this proc will `nil`-ify anything that's `:empty?`
84
+ # and will pass any non-Enumerable Objects through.
85
+ point_zero = proc { _1.respond_to?(:empty) ? (_1.empty? ? nil : _1) : _1 }
86
+
87
+ # Our matching block will return a single CYO when possible, and can optionally
88
+ # return multiple CYO matches for ambiguous files/streams.
89
+ # Multiple matching must be opted into with `only_one_match: false` so it doesn't need to be
90
+ # checked by every caller that's is fine with best-effort and wants to minimize allocations.
91
+ one_or_eight = proc { |huh|
92
+ case
93
+ when huh.nil? then nil
94
+ when huh.respond_to?(:empty?), huh.respond_to?(:first?)
95
+ if huh.empty? then nil
96
+ elsif huh.size == 1 then huh.is_a?(::Hash) ? huh.values.first : huh.first
97
+ elsif huh.size > 1 and only_one_match then huh.is_a?(::Hash) ? huh.values.first : huh.first
98
+ else huh
99
+ end
100
+ else huh
101
+ end
102
+ }
103
+
104
+ # Test all "glob" matches against all child Types of all "magic" matches to allow for
105
+ # nuanced detection of ambiguous streams where a `magic` match returns multiple possibilities,
106
+ # e.g. using a `.doc` Postfix-match to choose a `text-plain` glob-match for non-Word `.doc` files
107
+ # or to choose a `application/msword` glob-match over a more generic `application/x-ole-storage`
108
+ # magic-match when the magic weights alone are not enough information to make the correct choice.
109
+ # irb> ::CHECKING::YOU::OUT::from_postfix('doc')
110
+ # => #<Set: {#<CHECKING::YOU::OUT application/msword>, #<CHECKING::YOU::OUT text/plain>}>
111
+ #
112
+ # Again, a lot of the complexity here is "my fault" in that I could avoid it by explicitly using
113
+ # the same data structures for all the different inputs, but I need this to be as fast
114
+ # and as low-overhead as possible which means avoiding allocations of things like
115
+ # Enumerables that end up holding only a single other object.
116
+ # Obviously that leads to a lot of variation in result values from helper methods,
117
+ # so I'll own that here instead of ever making callsites deal with it.
118
+ #
119
+ # This `proc`'s output will introduce a little more of that same complexity since it will be `nil`
120
+ # if either input is `nil`, will be a single CYO if there is only one union match,
121
+ # or a `Set` if there are still multiple possibilities.
122
+ magic_children = proc { |glob, magic|
123
+ # NOTE: CYO deviates from `shared-mime-info`'s behavior very slightly here!
124
+ #
125
+ # `shared-mime-info`'s "Recommended checking order" documentation sez:
126
+ # "If any of the mimetypes resulting from a glob match is equal to or a subclass of the result
127
+ # from the magic sniffing, use this as the result. This allows us for example to distinguish text files
128
+ # called 'foo.doc' from MS-Word files with the same name, as the magic match for the MS-Word file would be
129
+ # `application/x-ole-storage` which the MS-Word type inherits."
130
+ #
131
+ # Our behavior is identical except it allows glob matches which are a *superclass* of a
132
+ # magic-match in addition to subclass or equal-to, i.e. using `:family_tree` for comparison here
133
+ # instead of using `:kids_table`. There might be a downside to this that I haven't found yet
134
+ # but it allows CYO to better match some things, e.g. matching a `'.flv'` video file as
135
+ # `'video/x-flv'` instead of as `'video/x-javafx'`, since fd.o has the latter as a subclass of the former.
136
+ case [glob, magic]
137
+ in ::NilClass, * then nil
138
+ in *, ::NilClass then nil
139
+ in ::Set, ::Hash then glob & magic.values.to_set.map(&:family_tree).reduce(&:&)
140
+ in ::Set, ::CHECKING::YOU::OUT then glob & magic.kids_table
141
+ in ::Hash, ::Hash then glob.values.to_set & magic.values.to_set.map(&:family_tree).reduce(&:&)
142
+ in ::CHECKING::YOU::OUT, ::Hash then magic.values.to_set.map(&:family_tree).reduce(&:&)&.include?(glob) ? glob : nil
143
+ in ::Hash, ::CHECKING::YOU::OUT then glob.values.to_set & magic.kids_table
144
+ in ::CHECKING::YOU::OUT, ::CHECKING::YOU::OUT then glob == magic ? glob : nil
145
+ else nil
146
+ end.yield_self(&point_zero)
147
+ }
148
+
149
+ # "If a MIME type is provided explicitly (eg, by a ContentType HTTP header, a MIME email attachment,
150
+ # an extended attribute or some other means) then that should be used instead of guessing."
151
+ # This will probably always be `nil` since this is a niche feature, but we have to test it first.
152
+ ::CHECKING::YOU::OUT::from_xattr(pathname) || begin
153
+
154
+ # "Start by doing a glob match of the filename. Keep only globs with the biggest weight."
155
+ # "If the patterns are different, keep only matched with the longest pattern."
156
+ # If after this, there is one or more matching glob, and all the matching globs result in
157
+ # the same mimetype, use that mimetype as the result."
158
+ # This can be `nil`, `CYO`, a `Set` of Postfix matches, or a `Hash` of weighted Glob matches.
159
+ glob_matched = ::CHECKING::YOU::OUT::from_pathname(pathname)
160
+
161
+ # "If the glob matching fails or results in multiple conflicting mimetypes,
162
+ # read the contents of the file and do magic sniffing on it.
163
+ # This can be `nil` or a `Hash` of weighted magic matches.
164
+ magic_matched = (glob_matched.nil? || glob_matched.is_a?(Enumerable) || so_deep) ? ::CHECKING::YOU::OUT::from_content(pathname) : nil
165
+
166
+ # Make a decision based on the two possible matches above plus a third match category
167
+ # based on a union between the glob match and all children of all magic matches.
168
+ # See the relevant proc above. Its result will always be `nil` if either input is `nil`.
169
+ #
170
+ # "If there was no glob match, use the magic match as the result."
171
+ # "Otherwise use the result of the glob match that has the highest weight."
172
+ return case [glob_matched, magic_matched, magic_children.call(glob_matched, magic_matched)]
173
+ in ::NilClass, ::Hash, ::NilClass then LEGENDARY_HEAVY_GLOW.call(magic_matched, :weight)
174
+ in ::CHECKING::YOU::OUT, ::NilClass, ::NilClass then glob_matched
175
+ in ::Set, ::NilClass, ::NilClass then glob_matched
176
+ in ::Hash, ::NilClass, ::NilClass then LEGENDARY_HEAVY_GLOW.call(glob_matched, [:weight, :length])
177
+ in *, ::CHECKING::YOU::OUT => only_one_type then only_one_type
178
+ in ::Set, ::Hash, ::Set => magic_children then
179
+ # Choose the union-matched type having the the heaviest magic-matched weight.
180
+ LEGENDARY_HEAVY_GLOW.call(magic_matched.keep_if { |_magic, cyo| magic_children.include?(cyo) }, :weight)
181
+ in ::Hash, ::Hash, ::Set => magic_children then
182
+ # Choose the union-matched type having the heaviest glob-matched weight,
183
+ # and then additionally the longest glob string if there are still multiple matches.
184
+ LEGENDARY_HEAVY_GLOW.call(glob_matched.keep_if { |_glob, cyo| magic_children.include?(cyo) }, [:weight, :length])
185
+ in ::CHECKING::YOU::OUT, ::Hash, ::NilClass then glob_matched
186
+ in ::CHECKING::YOU::OUT, ::Hash, ::Set => magic_children then
187
+ # Choose the single glob-matched type iff it was also magic-matched,
188
+ # otherwise choose the heaviest magic-matched type.
189
+ magic_matched.values.include?(glob_matched) ? glob_matched : LEGENDARY_HEAVY_GLOW.call(magic_matched, :weight)
190
+ in ::NilClass, ::NilClass, ::NilClass then
191
+ # "If no magic rule matches the data (or if the content is not available),
192
+ # use the default type of application/octet-stream for binary data, or text/plain for textual data."
193
+ # "Note: Checking the first 128 bytes of the file for ASCII control characters is a good way to guess
194
+ # whether a file is binary or text, but note that files with high-bit-set characters should still be
195
+ # treated as text since these can appear in UTF-8 text, unlike control characters.
196
+ ::CHECKING::YOU::OUT::from_ietf_media_type('application/octet-stream')
197
+ else nil
198
+ end.yield_self(&one_or_eight)
199
+ end # ::CHECKING::YOU::OUT::from_xattr(pathname) || begin
200
+ } # TEST_EXTANT_PATHNAME
201
+
202
+ end # class CHECKING::YOU
@@ -0,0 +1,260 @@
1
+ require 'pathname' unless defined?(::Pathname)
2
+
3
+
4
+ class CHECKING::YOU
5
+ # Provide case-optional String-like keys for Postfixes, Globs, etc.
6
+ #
7
+ # From Ruby's `Hash` docs: "Two objects refer to the same hash key when their hash value is identical
8
+ # and the two objects are eql? to each other"
9
+ # I tried to subclass String and just override `:eql?` and `:hash` for case-insensitive lookups,
10
+ # but it turns out not be that easy due to MRI's C comparison functions for String, Symbol, etc.
11
+ #
12
+ # It was super-confusing because I could call e.g. `'DOC'.eql? 'doc'` manually and get `true`,
13
+ # but it would always fail to work when used as a `Hash` key, when calling `uniq`, or in a `Set`:
14
+ #
15
+ # irb(main):049:1* Lol = Class.new(String).tap {
16
+ # irb(main):050:1* _1.define_method(:hash) do; self[0..5].downcase!.hash; end;
17
+ # irb(main):051:1* _1.define_method(:eql?) do |lol|; self[0..5].casecmp?(lol[0..5]); end;
18
+ # irb(main):052:1* _1.alias_method(:==, :eql?)
19
+ # irb(main):053:0> }
20
+ # irb(main):054:0> fart = Lol.new("abcdefg")
21
+ # irb(main):055:0> butt = Lol.new("abcdefgh")
22
+ # irb(main):056:0> fart == butt
23
+ # => true
24
+ # irb(main):057:0> fart.eql? butt
25
+ # => true
26
+ # irb(main):058:0> fart.hash
27
+ # => 1243221847611081438
28
+ # irb(main):059:0> butt.hash
29
+ # => 1243221847611081438
30
+ # irb(main):060:0> {fart => "smella"}[butt]
31
+ # => nil
32
+ # irb(main):061:0> {fart => "smella"}[fart]
33
+ # => "smella"
34
+ #
35
+ # I'm not the first to run into this, as I found when searching for `"rb_str_hash_cmp"`:
36
+ # https://kate.io/blog/strange-hash-instances-in-ruby/
37
+ #
38
+ # To work around this I will explicitly `downcase` the actual String subclass' value
39
+ # and just let the hashes collide for differently-cased values, then `eql?` will decide.
40
+ # This is still slower than the all-C String code but is the fastest method I've found
41
+ # to achieve this without doubling my Object allocations by wrapping each String in a Struct.
42
+ StickAround = Class.new(::String) do
43
+
44
+ # Be case-insensitive by default so we can match any filename.
45
+ DEFAULT_SENSITIVITY = false
46
+
47
+ # These may be weighted just like byte sequences.
48
+ include WeightedAction
49
+
50
+ # This class needs to support being instantiated without a value due to the way our XML data gets loaded,
51
+ # but the superclass `String` has a default `str=""` argument here that works perfectly for that need.
52
+ def initialize(str=-'', *args, case_sensitive: DEFAULT_SENSITIVITY, **kwargs)
53
+ # Prime `#replace` to treat its next `String` as case-sensitive iff we were told.
54
+ instance_variable_set(:@case_sensitive, case_sensitive) if case_sensitive == true
55
+
56
+ # Don't pass an initial `str` value to `super` if we were given one,
57
+ # because `#replace` has case-sensitivity-handling functionality that must be called.
58
+ super(str, *args, **kwargs)
59
+ self.replace(str) unless str.empty?
60
+ end
61
+
62
+ # Mark intent to be case-sensitive. Our source data's `<glob>` Attributes are parsed one at a time,
63
+ # so we won't know at the time of instantiation if we want to be case sensitive.
64
+ def case_sensitive=(sensitivity)
65
+ # Don't bother allocating an IVar if we're just going to be the default (case-insensitive)
66
+ if sensitivity == false then
67
+ remove_instance_variable(:@case_sensitive) if instance_variable_defined?(:@case_sensitive)
68
+ else
69
+ instance_variable_set(:@case_sensitive, sensitivity)
70
+ end
71
+ end
72
+
73
+ # Return our case-sensitive String variation iff we are marked case-sensitive *and* have a String value,
74
+ # otherwise just return our frozen deduplicated self value.
75
+ def itself
76
+ instance_variable_get(:@case_sensitive)&.is_a?(::String) ? instance_variable_get(:@case_sensitive) : self
77
+ end
78
+
79
+ def case_sensitive
80
+ instance_variable_get(:@case_sensitive)&.is_a?(::String) ? instance_variable_get(:@case_sensitive) : nil
81
+ end
82
+
83
+ # Set an appropriate value for ourselves given a variety of input.
84
+ # Even though this is called `#replace` here and in `String`, this method will often be used
85
+ # to set initial instance values due to nondeterministic attribute order while parsing our XML data.
86
+ def replace(otra, case_sensitive: DEFAULT_SENSITIVITY)
87
+ # Extract a usable value from different input types/formats.
88
+ #
89
+ # `File::extname` will return the last dotted component of a String, prepended with the leading dot,
90
+ # e.g. `File::extname("hello.jpg")` => `".jpg"`. We will prepend an asterisk to these to make a glob pattern.
91
+ #
92
+ # `File::extname` will be an empty String for input Strings which contain no dotted components
93
+ # or only have a leading dot, e.g. `File::extname(".bash_profile") => `""`.
94
+ newbuild = case otra
95
+ when self.class then -otra.to_s
96
+ when ::Symbol then -otra.name
97
+ when ::Pathname then otra.extname.empty? ? otra.basename.to_s.-@ : otra.extname.prepend(-?*).-@
98
+ when ::String then (File.extname(otra).empty? or -otra[-1] == -?*) ? -otra : -File.extname(otra).prepend(-?*)
99
+ else -otra.to_s
100
+ end
101
+
102
+ # The `super` call in this condition statement will explicitly set the `self` value to the downcased version of our key,
103
+ # but we will then compare `super`'s return value to its input to decide if we should store a case-sensitive value too.
104
+ #
105
+ # If the computed key is already downcase we could still be case-sensitive if we were boolean-marked as such,
106
+ # otherwise we have no need for the IVar and can remove it if one is set.
107
+ #
108
+ # Explicitly check if the IVar == `true`, not just truthiness, because it may also be a `String`
109
+ # if we are `#replace`ing a previous case-sensitive value.
110
+ #
111
+ # NOTE: There is a hole in the logic here where any non-downcased input will cause case-sensitivity,
112
+ # but this is necessary since our XML parsing might give us a `pattern` attribute callback
113
+ # before we'd had a chance to set a `case-insensitive` mark.
114
+ # All of the `case-sensitive="true"` `<glob>`s in current fd.o XML have an upper-case component,
115
+ # so this hack will make sure we don't discard the proper-cased `String` if we see that callback before the mark.
116
+ if (super(-newbuild.downcase(:fold)) != newbuild) or case_sensitive or (instance_variable_get(:@case_sensitive) == true) then
117
+ instance_variable_set(:@case_sensitive, newbuild)
118
+ else
119
+ remove_instance_variable(:@case_sensitive) if instance_variable_defined?(:@case_sensitive)
120
+ end
121
+ self # return the new downcased value we just set when we called `super`
122
+ end # replace
123
+
124
+ # Return a boolean describing our case-sensitivity status.
125
+ def case_sensitive?
126
+ # The same-name IVar could contain a (non-default) boolean value, but it's far more likely to contain
127
+ # the desired-case variation of the `self` String. In that case this returns `true` instead of the value.
128
+ case instance_variable_get(:@case_sensitive)
129
+ when ::String then true # We have stored a String case-variation.
130
+ when ::TrueClass then true # We have been marked for case-sensitivity next `#replace`.
131
+ else false # NilClass, FalseClass, or anything else.
132
+ end
133
+ end
134
+
135
+ # Returns case-optional boolean equality between this `StickAround` and a given object `StickAround` or `String`.
136
+ # This is one of two methods necessary for matching Hash keys, but this method will be called only if `self#hash`
137
+ # and `otra#hash` return the same Integer value, complicated by the fact that MRI's C implementation of `rb_str_hash_cmp`
138
+ # won't use our overriden version of `#hash`.
139
+ # That's why we downcase ourselves in `#replace` and store case variations separately.
140
+ def eql?(otra)
141
+ # https://ruby-doc.org/core/File.html#method-c-fnmatch-3F
142
+ #
143
+ # The `File` Class has kinda-poorly-documented Integer constants to control the behavior of `File::fnmatch?`.
144
+ # If this feels non-Ruby-ish it's because this is a POSIX thing:
145
+ # https://pubs.opengroup.org/onlinepubs/9699919799/functions/fnmatch.html
146
+ #
147
+ # irb(main):061:0> File::constants::keep_if { _1.to_s.include?('FNM_') }
148
+ # => [:FNM_CASEFOLD, :FNM_EXTGLOB, :FNM_SYSCASE, :FNM_NOESCAPE, :FNM_PATHNAME, :FNM_DOTMATCH, :FNM_SHORTNAME]
149
+ # irb(main):062:0> File::constants::keep_if { _1.to_s.include?('FNM_') }.map(&File::method(:const_get))
150
+ # => [8, 16, 0, 1, 2, 4, 0]
151
+ #
152
+ #
153
+ # - `File::FNM_PATHNAME` controls wildcards in the haystack matching `File::SEPARATOR` in the needle:
154
+ #
155
+ # irb> File.fnmatch?('*.jpg', '/hello.jpg', File::FNM_PATHNAME)
156
+ # => false
157
+ # irb> File.fnmatch?('*.jpg', '/hello.jpg')
158
+ # => true
159
+ # irb> File.fnmatch?('*.jpg', 'hello.jpg', File::FNM_PATHNAME)
160
+ # => true
161
+ # irb> File.fnmatch?('*.jpg', 'hello.jpg')
162
+ # => true
163
+ #
164
+ #
165
+ # - `File::FNM_DOTMATCH` controls wildcard in the haystack matching `.` in the needle, like *nix-style "hidden" files:
166
+ #
167
+ # irb> File.fnmatch?('*.jpg', '.hello.jpg', File::FNM_DOTMATCH)
168
+ # => true
169
+ # irb> File.fnmatch?('*.jpg', '.hello.jpg')
170
+ # => false
171
+ #
172
+ #
173
+ # - `File::FNM_EXTGLOB` controls support for brace-delimited glob syntax for haystacks:
174
+ #
175
+ # irb> File.fnmatch?('*.jp{e,}g', 'hello.jpg', File::FNM_EXTGLOB)
176
+ # => true
177
+ # irb> File.fnmatch?('*.jp{e,}g', 'hello.jpeg', File::FNM_EXTGLOB)
178
+ # => true
179
+ # irb> File.fnmatch?('*.jp{e,}g', 'hello.jpeg')
180
+ # => false
181
+ # irb> File.fnmatch?('*.jp{e,}g', 'hello.jpg')
182
+ # => false
183
+ #
184
+ #
185
+ # - `File::FNM_CASEFOLD` and `File::FNM_SYSCASE` control the case-sensitivity when matching,
186
+ # either by folding (explicit case-insensitivity) or by matching the behavior of the host operating system,
187
+ # *not* the behavior of any specific filesystem on that OS (https://bugs.ruby-lang.org/issues/15363),
188
+ # e.g. case-sensitive on BSD/Linux:
189
+ #
190
+ # irb> RUBY_PLATFORM
191
+ # => "x86_64-linux"
192
+ # irb> File.fnmatch?('LOICENSE', 'loicense', File::FNM_SYSCASE)
193
+ # => false
194
+ # irb> File.fnmatch?('LOICENSE', 'loicense', File::FNM_CASEFOLD)
195
+ # => true
196
+ # irb> File.fnmatch?('LOICENSE', 'loicense')
197
+ # => false
198
+ #
199
+ #
200
+ # - `File::FNM_NOESCAPE` (ominously) controls matching escape sequences literally:
201
+ # https://github.com/ruby/ruby/blob/master/doc/syntax/literals.rdoc#label-Strings
202
+ #
203
+ # irb> File.fnmatch?("*.jpg\\", 'hello.jpg', File::FNM_NOESCAPE)
204
+ # => false
205
+ # irb> File.fnmatch?("*.jpg\\", 'hello.jpg')
206
+ # => true
207
+ #
208
+ #
209
+ # - `File::FNM_SHORTNAME` seems to control eight-dot-three filename matching, per the documentation:
210
+ # "Makes patterns to match short names if existing. Valid only on Microsoft Windows."
211
+ #
212
+ #
213
+ # - Multiple of these Integer Constants can be bitwise-`OR`ed together for simultaneous use:
214
+ #
215
+ # irb> File.fnmatch?('*.jp{e,}g', '/root/.HeLlO.jPEg', File::FNM_EXTGLOB | File::FNM_CASEFOLD | File::FNM_DOTMATCH)
216
+ # => true
217
+ # irb> File.fnmatch?('*.jp{e,}g', '/root/.HeLlO.jPEg', File::FNM_EXTGLOB | File::FNM_CASEFOLD | File::FNM_DOTMATCH | File::FNM_PATHNAME)
218
+ # => false
219
+ File.fnmatch?(
220
+ self.itself, # Haystack
221
+ otra.itself, # Needle
222
+ File::FNM_DOTMATCH |
223
+ File::FNM_EXTGLOB |
224
+ (
225
+ # Support testing `otra` as either another `StickAround` or as a plain `String`,
226
+ # in which case it will not have a method `#case_sensitive?`.
227
+ # Use our own case-sensitivity setting when comparing against plain `Strings`.
228
+ (self.case_sensitive? or (otra.respond_to?(:case_sensitive?) ? otra.case_sensitive? : self.case_sensitive?)) ?
229
+ 0 : File::FNM_CASEFOLD
230
+ )
231
+ )
232
+ end # eql?
233
+
234
+ # Hash-key usage depends on `#eql?`, but `:==` should have identical behavior for our own uses.
235
+ alias_method(:==, :eql?)
236
+
237
+ # Return an Integer hash value for this object. This method and `#eql?` are used by `Hash`, `Set`, and `#uniq` to
238
+ # associate separate Objects with each other for deduplication or for use as `Hash` keys.
239
+ # The `eql?` method will be called only *after* two Integer `#hash` values match!
240
+ #
241
+ # NOTE: MRI will not use this function in many cases!
242
+ # It has C implementations of methods like `rb_str_hash_cmp` for `Hash` lookups, and this is usually a Good Thing™
243
+ # since it makes `Hash`es fast when using `String` or `Symbol` as keys.
244
+ # Subclassing built-in types like `String` allows/forces us to use these same accelerated code paths,
245
+ # and it was incredibly confusing for me why my custom String subclass was behaving so strangely
246
+ # when used as a Hash key until I had a hunch to read MRI's `string.c` and `hash.c` and confirmed.
247
+ # I found this write-up once I knew to search for "rb_str_hash_cmp": https://kate.io/blog/strange-hash-instances-in-ruby/
248
+ #
249
+ # I'm going to define this anyway because it could still be useful in certain corner cases, but be aware of the above!
250
+ # This is the reason I explicitly `downcase` our self value in `#replace`, because otherwise the Hash keys will never match
251
+ # and `#eql?` will never even be called.
252
+ def hash
253
+ if self.include?(-?*) and not self.start_with?(-?*) then self[...6].downcase!.hash
254
+ elsif self.include?(-?*) and not File.extname(self).empty? then File.extname(self).delete_prefix!(-?.)
255
+ else super
256
+ end
257
+ end
258
+
259
+ end # StickAround
260
+ end # class CHECKING::YOU