checking-you-out 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,215 @@
1
+ require 'set' unless defined? ::Set
2
+ require 'pathname' unless defined? ::Pathname
3
+
4
+
5
+ # Utility Modules/procs/lambdas/etc for generic operations like checking WeightedActions.
6
+ require_relative 'party_starter' unless defined? ::CHECKING::YOU::WeightedAction
7
+
8
+
9
+ # This base Struct will be used as the Hash key for its matching `OUT` subclass object,
10
+ # and its members correspond to the three major parts of an IETF "Content-Type" String,
11
+ # e.g. "application/x-saturn-rom" → :x, :application, :"saturn-rom".
12
+ #
13
+ # This is kind of a leaky abstraction since I want to support non-IETF type systems too,
14
+ # but the IETF system is by far the most relevant one to us because the most exhaustive
15
+ # source data (`shared-mime-info`) is based on that format and because, you know, Internet.
16
+ # See the adjacent `auslandsgespräch.rb` for the parser and more info.
17
+ #
18
+ #
19
+ # The instances of a `Struct` subclass with at most `RSTRUCT_EMBED_LEN_MAX` members
20
+ # can fit entirely within an `RStruct` without additional heap allocation.
21
+ # In MRI (at least as of 3.0) the `RSTRUCT_EMBED_LEN_MAX` is 3, so CYI uses three members.
22
+ #
23
+ # For more info see:
24
+ # - https://github.com/ruby/ruby/blob/master/gc.c
25
+ # - http://patshaughnessy.net/2013/2/8/ruby-mri-source-code-idioms-3-embedded-objects
26
+ CHECKING::YOU::IN ||= Struct.new(
27
+ # Intentionally avoiding naming taxonomic ranks like "domain", "class", or "order"
28
+ # whose names are already common in computing.
29
+ :kingdom,
30
+ :phylum,
31
+ :genus,
32
+ ) do
33
+ # Promote any CYI to its CYO singleton. CYO has the opposites of these methods.
34
+ def out; ::CHECKING::YOU::OUT::new(self); end
35
+ def in; self; end
36
+ end
37
+
38
+ # Main Struct subclass for in-memory type representation.
39
+ # Instances of the base `CHECKING::YOU::IN` Struct will refer to only one of these,
40
+ # and this matching object will contain all relevant data about the type,
41
+ # such as file extension(s), `magic` bytes, and variations of a base type like all of:
42
+ # - "application/vnd.wordperfect;"
43
+ # - "application/vnd.wordperfect;version=4.2"
44
+ # - "application/vnd.wordperfect;version=5.0"
45
+ # - "application/vnd.wordperfect;version=5.1"
46
+ # - "application/vnd.wordperfect;version=6.x"
47
+ # …will be represented in a single `CHECKING::YOU::OUT` object.
48
+ class ::CHECKING::YOU::OUT < ::CHECKING::YOU::IN
49
+
50
+ # Absolute path to the root of the Gem — the directory containing `bin`,`docs`,`lib`, etc.
51
+ GEM_ROOT = proc { ::Pathname.new(__dir__).join(*Array.new(2, -'..')).expand_path.realpath }
52
+
53
+ # Time object representing the day this running CYO Gem was packaged.
54
+ #
55
+ # `Gem::Specification#date` can be slightly misleading when developing locally with Bundler using `bundle exec`.
56
+ # One might expect the result of `#date` to be "now" (including hours/minutes/seconds) in UTC for such a runtime-packaged Gem,
57
+ # but it will always be midnight UTC of the current day (also in UTC), i.e. a date that is always[0] in the past.
58
+ #
59
+ # After ${your-UTC-offset} hours before midnight localtime, this will give you a *day* that seems to be in the future
60
+ # compared to a system clock displaying localtime despite that *date* UTC still being in the past,
61
+ # e.g. as I write this comment at 2021-05-25 22:22 PST, `GEM_PACKAGE_TIME.call` returns `2021-05-26 00:00:00 UTC`.
62
+ #
63
+ # Rescue from `Gem::MissingSpecError`'s parent to support developing locally with just `require_relative` and no Bundler.
64
+ #
65
+ # [0]: unless you manage to `bundle exec` at exactly 00:00:00 UTC :)
66
+ GEM_PACKAGE_TIME = proc { begin; Gem::Specification::find_by_name(-'checking-you-out').date; rescue Gem::LoadError; Time.now; end }
67
+
68
+ Species = Struct.new(:name, :value) do
69
+ def self.from_string(param_string)
70
+ return self.new(*param_string.split(-?=))
71
+ end
72
+ end
73
+
74
+ # Main memoization Hash for our loaded Type data.
75
+ # { CHECKING::YOU::IN => CHECKING::YOU::OUT }
76
+ def self.all_night; @all_night ||= Hash.new(nil); end
77
+
78
+ # Return a singleton instance for any CYO.
79
+ def self.new(taxa)
80
+ # Support IETF String argument to this method, e.g. ::CHECKING::YOU::OUT::new('application/octet-stream')
81
+ return self.from_ietf_media_type(taxa) if taxa.is_a?(String)
82
+ # Otherwise return the memoized CYO singleton of this type.
83
+ self.all_night[
84
+ taxa.is_a?(::CHECKING::YOU::IN) ? taxa : super(*taxa)
85
+ ] ||= self.allocate.tap { |cyo| cyo.send(:initialize, *taxa) }
86
+ end
87
+
88
+ # Demote any CYO to a CYI that can be passed around in just 40 bytes.
89
+ # CYI has the opposites of these methods.
90
+ def out; self; end
91
+ def in; self.class.all_night.key(self); end
92
+
93
+
94
+ # Get a CYO, Set[CYO], or nil by file-extension, e.g. `doc` => { CYO msword, CYO rtf }.
95
+ POSTFIX_KEY = proc {
96
+ # Re-use a single search structure to avoid allocating an Object per search.
97
+ scratch = ::CHECKING::YOU::StickAround.new(-'')
98
+ # Additionally accelerate multiple searches for the same thing by avoiding `StickAround#replace`
99
+ # if the new search key already matches the previous search key.
100
+ # Mark `case_sensitive: false` here for testing arbitrarily-named input.
101
+ -> { scratch.eql?(_1) ? scratch : scratch.replace(_1, case_sensitive: false) }
102
+ }.call
103
+ def self.from_postfix(stick_around)
104
+ self.instance_variable_get(:@after_forever)[POSTFIX_KEY.call(stick_around)]
105
+ end
106
+
107
+ # Get a Hash[CYO] or nil for arbitrary non-file-extension glob match of a File basename.
108
+ def self.from_glob(stick_around)
109
+ self.instance_variable_get(:@stick_around).select { |k,v|
110
+ k.eql?(stick_around)
111
+ }.yield_self { |matched|
112
+ matched.empty? ? nil : matched
113
+ }
114
+ end
115
+
116
+ def self.from_pathname(pathname)
117
+ return self.from_glob(pathname) || self.from_postfix(pathname)
118
+ end
119
+
120
+
121
+ # Add a new Postfix or Glob for a specific type.
122
+ def add_pathname_fragment(fragment)
123
+ if fragment.start_with?(-'*.') and fragment.count(-?.) == 1 and fragment.count(-?*) == 1 then
124
+ ::CHECKING::YOU::INSTANCE_NEEDLEMAKER.call(:@postfixes, fragment, self)
125
+ ::CHECKING::YOU::CLASS_NEEDLEMAKER.call(:@after_forever, fragment, self)
126
+ else
127
+ ::CHECKING::YOU::INSTANCE_NEEDLEMAKER.call(:@globs, fragment, self)
128
+ ::CHECKING::YOU::CLASS_NEEDLEMAKER.call(:@stick_around, fragment, self)
129
+ end
130
+ end
131
+
132
+
133
+ # Get a `Set` of this CYO and all of its parent CYOs, at minimum just `Set[self]`.
134
+ def aka
135
+ return case @aka
136
+ when nil then Set[self.in]
137
+ when self.class, self.class.superclass then Set[self.in, @aka]
138
+ when ::Set then Set[self.in, *@aka]
139
+ end
140
+ end
141
+
142
+ # Take an additional CYI, store it locally, and memoize it as an alias for this CYO.
143
+ def add_aka(taxa)
144
+ taxa = taxa.is_a?(::CHECKING::YOU::IN) ? taxa : self.class.superclass.new(*taxa)
145
+ ::CHECKING::YOU::INSTANCE_NEEDLEMAKER.call(:@aka, taxa, self)
146
+ self.class.all_night[taxa] = self
147
+ end
148
+
149
+ # Forget a CYI alias of this Type. Capable of unsetting the "real" CYI as well if desired.
150
+ def remove_aka(taxa)
151
+ taxa = taxa.is_a?(::CHECKING::YOU::IN) ? taxa : self.class.superclass.new(*taxa)
152
+ self.class.all_night.delete(taxa) if self.class.all_night[taxa] === self
153
+ end
154
+
155
+ attr_reader :parents, :children
156
+
157
+ # Take an additional CYO, store it locally as our parent, and ask it to add ourselves as its child.
158
+ def add_parent(parent_cyo)
159
+ ::CHECKING::YOU::INSTANCE_NEEDLEMAKER.call(:@parents, parent_cyo, self)
160
+ parent_cyo.add_child(self) unless parent_cyo.children&.include?(self)
161
+ end
162
+
163
+ # Take an additional CYO, store it locally as our child, and ask it to add ourselves as its parent.
164
+ def add_child(child_cyo)
165
+ ::CHECKING::YOU::INSTANCE_NEEDLEMAKER.call(:@children, child_cyo, self)
166
+ child_cyo.add_parent(self) unless child_cyo.parents&.include?(self)
167
+ end
168
+
169
+ # Get a `Set` of this CYO and all of its parent CYOs, at minimum just `Set[self]`.
170
+ def adults_table
171
+ return case @parents
172
+ when nil then Set[self]
173
+ when self.class, self.class.superclass then Set[self, @parents]
174
+ when ::Set then Set[self, *@parents]
175
+ end
176
+ end
177
+
178
+ # Get a `Set` of this CYO and all of its child CYOs, at minimum just `Set[self]`.
179
+ def kids_table
180
+ return case @children
181
+ when nil then Set[self]
182
+ when self.class, self.class.superclass then Set[self, @children]
183
+ when ::Set then Set[self, *@children]
184
+ end
185
+ end
186
+
187
+ # Get a `Set` of this CYO and all parents and children, at minimum just `Set[self]`.
188
+ def family_tree; self.kids_table | self.adults_table; end
189
+
190
+ # Storage for descriptions (`<comment>`), acrnyms, suitable iconography, and other boring metadata, e.g.:
191
+ # <mime-type type="application/vnd.oasis.opendocument.text">
192
+ # <comment>ODT document</comment>
193
+ # <acronym>ODT</acronym>
194
+ # <expanded-acronym>OpenDocument Text</expanded-acronym>
195
+ # <generic-icon name="x-office-document"/>
196
+ # […]
197
+ # </mini-type>
198
+ attr_accessor :description
199
+
200
+ end
201
+
202
+ # IETF Media-Type parser and methods that use that parser.
203
+ require_relative 'auslandsgesprach' unless defined? ::CHECKING::YOU::IN::AUSLANDSGESPRÄCH
204
+ ::CHECKING::YOU::IN.extend(::CHECKING::YOU::IN::AUSLANDSGESPRÄCH)
205
+ ::CHECKING::YOU::IN.include(::CHECKING::YOU::IN::INLANDGESPRÄCH)
206
+ ::CHECKING::YOU::OUT.extend(::CHECKING::YOU::OUT::AUSLANDSGESPRÄCH)
207
+
208
+ # Content matching à la `libmagic`/`file`.
209
+ require_relative 'sweet_sweet_love_magic' unless defined? ::CHECKING::YOU::SweetSweet♥Magic
210
+ ::CHECKING::YOU::OUT.extend(::CHECKING::YOU::SweetSweet♡Magic)
211
+ ::CHECKING::YOU::OUT.prepend(::CHECKING::YOU::SweetSweet♥Magic)
212
+
213
+ # Methods for loading type data from `shared-mime-info` package XML files.
214
+ require_relative 'ghost_revival' unless defined? ::CHECKING::YOU::GHOST_REVIVAL
215
+ ::CHECKING::YOU::OUT.extend(::CHECKING::YOU::OUT::GHOST_REVIVAL)
@@ -0,0 +1,202 @@
1
+ require 'set' unless defined? ::Set
2
+ require 'pathname' unless defined? ::Pathname
3
+
4
+
5
+ # This file defines/imports various utility Modules/procs/etc that should be available
6
+ # to all other CYO components without `including`/`extending`.
7
+ require_relative 'party_starter/weighted_action' unless defined? ::CHECKING::YOU::WeightedAction
8
+ require_relative 'party_starter/stick_around' unless defined? ::CHECKING::YOU::StickAround
9
+
10
+ class CHECKING::YOU
11
+
12
+ # The following two `proc`s handle classwide-memoization and instance-level assignment
13
+ # for values that may be Enumerable but often refer to only a single Object.
14
+ #
15
+ # For example, most `Postfix`es (file extensions) will only ever belong to a single CYO Object,
16
+ # but a handful represent possibly-multiple types, like how `.doc` can be an MSWord file or WordPad RTF.
17
+ #
18
+ # These assignment procs take a storage haystack, a needle to store, and the CYO receiver to which the needle refers.
19
+ # They will set `haystack[needle] => CYO` if that needle is unique and unset, or they will convert
20
+ # an existing single `haystack[needle] => CYO` assignment to `haystack[needle] => Set[existingCYO, newCYO]`.
21
+ #
22
+ # This is an admittedly-annoying complexity-for-performance tradeoff with the goal of allocating
23
+ # as few spurious containers as possible instead of explicitly initializing a Set for every needle
24
+ # when most of them would wastefully be a Set of just a single thing.
25
+ CLASS_NEEDLEMAKER = proc { |haystack, needle, receiver|
26
+ # Create the container if this is the very first invocation.
27
+ receiver.class.instance_variable_set(haystack, Hash.new(nil)) unless receiver.class.instance_variable_defined?(haystack)
28
+
29
+ # Set the `haystack` Hash's `needle` key to the `receiver` if the `key` is unset, otherwise
30
+ # to a `Set` of the existing value plus `receiver` if that value is not `receiver` already.
31
+ receiver.class.instance_variable_get(haystack).tap { |awen|
32
+ case awen[needle]
33
+ when nil then awen[needle] = receiver
34
+ when ::Set then awen[needle].add(receiver)
35
+ when receiver.class then awen[needle] = Set[awen[needle], receiver] unless awen[needle] == receiver
36
+ end
37
+ }
38
+ }
39
+
40
+ # This is the instance-level version of the above, e.g. a CYO with only one Postfix
41
+ # will assign `cyo.:@postfixes = Postfix`, and a CYO with many Postfixes will assign
42
+ # e.g. `cyo.:@postfixes = Set[post, fix, es, …]`.
43
+ INSTANCE_NEEDLEMAKER = proc { |haystack, needle, receiver|
44
+ if receiver.instance_variable_defined?(haystack) then
45
+ receiver.instance_variable_get(haystack).add(needle)
46
+ else
47
+ receiver.instance_variable_set(haystack, Set[needle])
48
+ end
49
+ }
50
+
51
+
52
+ # Test a Pathname representing an extant file whose contents and metadata we can use.
53
+ # This is separated into a lambda due to the complexity, since the entry-point might
54
+ # be given a String that could represent a Media Type, a hypothetical path,
55
+ # an extant path, or even raw stream contents. It could be given a Pathname representing
56
+ # either a hypothetical or extant file. It could be given an IO/Stream object.
57
+ # Several input possibilities will end up callin this lambda.
58
+ #
59
+ # Some of this complexity is my fault, since I'm doing a lot of variable juggling
60
+ # to avoid as many new-Object-allocations as possible in the name of performance
61
+ # since this library is the very core-est core of DistorteD;
62
+ # things like assigning Hash values to single CYO objects the first time that key is stored
63
+ # then replacing that value with a Set iff that key needs to reference any additional CYO.
64
+ #
65
+ # - `::from_xattr` can return `nil` or a single `CYO` depending on filesystem extended attributes.
66
+ # It is very very unlikely that most people will ever use this, but I think it's cool 8)
67
+ #
68
+ # - `::from_postfix` can return `nil`, `CYO`, or `Set` since I decided to store Postfixes
69
+ # separately from freeform globs since file-extension matches are the vast majority of globs.
70
+ # Postfixes avoid needing to be weighted since they all represent the same final pathname component
71
+ # and should never result in multiple conflicting Postfix key matches.
72
+ # A single Postfix key can represent multiple CYOs, though; hence the possible `Set`.
73
+ #
74
+ # - `::from_glob` can return `nil` or `Hash` since even a single match will include the weighted key.
75
+ #
76
+ # - `::from_content` can return `nil` or `Hash` based on a `libmagic`-style match of file/stream contents.
77
+ # Many common types can be determined from the first four bytes alone, but we support matching
78
+ # arbitrarily-long sequences against arbitrarily-big byte range boundaries.
79
+ # These keys will also be weighted, even for a single match.
80
+ TEST_EXTANT_PATHNAME = -> (pathname, so_deep: true, only_one_match: true) {
81
+
82
+ # Never return empty Enumerables.
83
+ # Yielding-self to this proc will `nil`-ify anything that's `:empty?`
84
+ # and will pass any non-Enumerable Objects through.
85
+ point_zero = proc { _1.respond_to?(:empty) ? (_1.empty? ? nil : _1) : _1 }
86
+
87
+ # Our matching block will return a single CYO when possible, and can optionally
88
+ # return multiple CYO matches for ambiguous files/streams.
89
+ # Multiple matching must be opted into with `only_one_match: false` so it doesn't need to be
90
+ # checked by every caller that's is fine with best-effort and wants to minimize allocations.
91
+ one_or_eight = proc { |huh|
92
+ case
93
+ when huh.nil? then nil
94
+ when huh.respond_to?(:empty?), huh.respond_to?(:first?)
95
+ if huh.empty? then nil
96
+ elsif huh.size == 1 then huh.is_a?(::Hash) ? huh.values.first : huh.first
97
+ elsif huh.size > 1 and only_one_match then huh.is_a?(::Hash) ? huh.values.first : huh.first
98
+ else huh
99
+ end
100
+ else huh
101
+ end
102
+ }
103
+
104
+ # Test all "glob" matches against all child Types of all "magic" matches to allow for
105
+ # nuanced detection of ambiguous streams where a `magic` match returns multiple possibilities,
106
+ # e.g. using a `.doc` Postfix-match to choose a `text-plain` glob-match for non-Word `.doc` files
107
+ # or to choose a `application/msword` glob-match over a more generic `application/x-ole-storage`
108
+ # magic-match when the magic weights alone are not enough information to make the correct choice.
109
+ # irb> ::CHECKING::YOU::OUT::from_postfix('doc')
110
+ # => #<Set: {#<CHECKING::YOU::OUT application/msword>, #<CHECKING::YOU::OUT text/plain>}>
111
+ #
112
+ # Again, a lot of the complexity here is "my fault" in that I could avoid it by explicitly using
113
+ # the same data structures for all the different inputs, but I need this to be as fast
114
+ # and as low-overhead as possible which means avoiding allocations of things like
115
+ # Enumerables that end up holding only a single other object.
116
+ # Obviously that leads to a lot of variation in result values from helper methods,
117
+ # so I'll own that here instead of ever making callsites deal with it.
118
+ #
119
+ # This `proc`'s output will introduce a little more of that same complexity since it will be `nil`
120
+ # if either input is `nil`, will be a single CYO if there is only one union match,
121
+ # or a `Set` if there are still multiple possibilities.
122
+ magic_children = proc { |glob, magic|
123
+ # NOTE: CYO deviates from `shared-mime-info`'s behavior very slightly here!
124
+ #
125
+ # `shared-mime-info`'s "Recommended checking order" documentation sez:
126
+ # "If any of the mimetypes resulting from a glob match is equal to or a subclass of the result
127
+ # from the magic sniffing, use this as the result. This allows us for example to distinguish text files
128
+ # called 'foo.doc' from MS-Word files with the same name, as the magic match for the MS-Word file would be
129
+ # `application/x-ole-storage` which the MS-Word type inherits."
130
+ #
131
+ # Our behavior is identical except it allows glob matches which are a *superclass* of a
132
+ # magic-match in addition to subclass or equal-to, i.e. using `:family_tree` for comparison here
133
+ # instead of using `:kids_table`. There might be a downside to this that I haven't found yet
134
+ # but it allows CYO to better match some things, e.g. matching a `'.flv'` video file as
135
+ # `'video/x-flv'` instead of as `'video/x-javafx'`, since fd.o has the latter as a subclass of the former.
136
+ case [glob, magic]
137
+ in ::NilClass, * then nil
138
+ in *, ::NilClass then nil
139
+ in ::Set, ::Hash then glob & magic.values.to_set.map(&:family_tree).reduce(&:&)
140
+ in ::Set, ::CHECKING::YOU::OUT then glob & magic.kids_table
141
+ in ::Hash, ::Hash then glob.values.to_set & magic.values.to_set.map(&:family_tree).reduce(&:&)
142
+ in ::CHECKING::YOU::OUT, ::Hash then magic.values.to_set.map(&:family_tree).reduce(&:&)&.include?(glob) ? glob : nil
143
+ in ::Hash, ::CHECKING::YOU::OUT then glob.values.to_set & magic.kids_table
144
+ in ::CHECKING::YOU::OUT, ::CHECKING::YOU::OUT then glob == magic ? glob : nil
145
+ else nil
146
+ end.yield_self(&point_zero)
147
+ }
148
+
149
+ # "If a MIME type is provided explicitly (eg, by a ContentType HTTP header, a MIME email attachment,
150
+ # an extended attribute or some other means) then that should be used instead of guessing."
151
+ # This will probably always be `nil` since this is a niche feature, but we have to test it first.
152
+ ::CHECKING::YOU::OUT::from_xattr(pathname) || begin
153
+
154
+ # "Start by doing a glob match of the filename. Keep only globs with the biggest weight."
155
+ # "If the patterns are different, keep only matched with the longest pattern."
156
+ # If after this, there is one or more matching glob, and all the matching globs result in
157
+ # the same mimetype, use that mimetype as the result."
158
+ # This can be `nil`, `CYO`, a `Set` of Postfix matches, or a `Hash` of weighted Glob matches.
159
+ glob_matched = ::CHECKING::YOU::OUT::from_pathname(pathname)
160
+
161
+ # "If the glob matching fails or results in multiple conflicting mimetypes,
162
+ # read the contents of the file and do magic sniffing on it.
163
+ # This can be `nil` or a `Hash` of weighted magic matches.
164
+ magic_matched = (glob_matched.nil? || glob_matched.is_a?(Enumerable) || so_deep) ? ::CHECKING::YOU::OUT::from_content(pathname) : nil
165
+
166
+ # Make a decision based on the two possible matches above plus a third match category
167
+ # based on a union between the glob match and all children of all magic matches.
168
+ # See the relevant proc above. Its result will always be `nil` if either input is `nil`.
169
+ #
170
+ # "If there was no glob match, use the magic match as the result."
171
+ # "Otherwise use the result of the glob match that has the highest weight."
172
+ return case [glob_matched, magic_matched, magic_children.call(glob_matched, magic_matched)]
173
+ in ::NilClass, ::Hash, ::NilClass then LEGENDARY_HEAVY_GLOW.call(magic_matched, :weight)
174
+ in ::CHECKING::YOU::OUT, ::NilClass, ::NilClass then glob_matched
175
+ in ::Set, ::NilClass, ::NilClass then glob_matched
176
+ in ::Hash, ::NilClass, ::NilClass then LEGENDARY_HEAVY_GLOW.call(glob_matched, [:weight, :length])
177
+ in *, ::CHECKING::YOU::OUT => only_one_type then only_one_type
178
+ in ::Set, ::Hash, ::Set => magic_children then
179
+ # Choose the union-matched type having the the heaviest magic-matched weight.
180
+ LEGENDARY_HEAVY_GLOW.call(magic_matched.keep_if { |_magic, cyo| magic_children.include?(cyo) }, :weight)
181
+ in ::Hash, ::Hash, ::Set => magic_children then
182
+ # Choose the union-matched type having the heaviest glob-matched weight,
183
+ # and then additionally the longest glob string if there are still multiple matches.
184
+ LEGENDARY_HEAVY_GLOW.call(glob_matched.keep_if { |_glob, cyo| magic_children.include?(cyo) }, [:weight, :length])
185
+ in ::CHECKING::YOU::OUT, ::Hash, ::NilClass then glob_matched
186
+ in ::CHECKING::YOU::OUT, ::Hash, ::Set => magic_children then
187
+ # Choose the single glob-matched type iff it was also magic-matched,
188
+ # otherwise choose the heaviest magic-matched type.
189
+ magic_matched.values.include?(glob_matched) ? glob_matched : LEGENDARY_HEAVY_GLOW.call(magic_matched, :weight)
190
+ in ::NilClass, ::NilClass, ::NilClass then
191
+ # "If no magic rule matches the data (or if the content is not available),
192
+ # use the default type of application/octet-stream for binary data, or text/plain for textual data."
193
+ # "Note: Checking the first 128 bytes of the file for ASCII control characters is a good way to guess
194
+ # whether a file is binary or text, but note that files with high-bit-set characters should still be
195
+ # treated as text since these can appear in UTF-8 text, unlike control characters.
196
+ ::CHECKING::YOU::OUT::from_ietf_media_type('application/octet-stream')
197
+ else nil
198
+ end.yield_self(&one_or_eight)
199
+ end # ::CHECKING::YOU::OUT::from_xattr(pathname) || begin
200
+ } # TEST_EXTANT_PATHNAME
201
+
202
+ end # class CHECKING::YOU
@@ -0,0 +1,260 @@
1
+ require 'pathname' unless defined?(::Pathname)
2
+
3
+
4
+ class CHECKING::YOU
5
+ # Provide case-optional String-like keys for Postfixes, Globs, etc.
6
+ #
7
+ # From Ruby's `Hash` docs: "Two objects refer to the same hash key when their hash value is identical
8
+ # and the two objects are eql? to each other"
9
+ # I tried to subclass String and just override `:eql?` and `:hash` for case-insensitive lookups,
10
+ # but it turns out not be that easy due to MRI's C comparison functions for String, Symbol, etc.
11
+ #
12
+ # It was super-confusing because I could call e.g. `'DOC'.eql? 'doc'` manually and get `true`,
13
+ # but it would always fail to work when used as a `Hash` key, when calling `uniq`, or in a `Set`:
14
+ #
15
+ # irb(main):049:1* Lol = Class.new(String).tap {
16
+ # irb(main):050:1* _1.define_method(:hash) do; self[0..5].downcase!.hash; end;
17
+ # irb(main):051:1* _1.define_method(:eql?) do |lol|; self[0..5].casecmp?(lol[0..5]); end;
18
+ # irb(main):052:1* _1.alias_method(:==, :eql?)
19
+ # irb(main):053:0> }
20
+ # irb(main):054:0> fart = Lol.new("abcdefg")
21
+ # irb(main):055:0> butt = Lol.new("abcdefgh")
22
+ # irb(main):056:0> fart == butt
23
+ # => true
24
+ # irb(main):057:0> fart.eql? butt
25
+ # => true
26
+ # irb(main):058:0> fart.hash
27
+ # => 1243221847611081438
28
+ # irb(main):059:0> butt.hash
29
+ # => 1243221847611081438
30
+ # irb(main):060:0> {fart => "smella"}[butt]
31
+ # => nil
32
+ # irb(main):061:0> {fart => "smella"}[fart]
33
+ # => "smella"
34
+ #
35
+ # I'm not the first to run into this, as I found when searching for `"rb_str_hash_cmp"`:
36
+ # https://kate.io/blog/strange-hash-instances-in-ruby/
37
+ #
38
+ # To work around this I will explicitly `downcase` the actual String subclass' value
39
+ # and just let the hashes collide for differently-cased values, then `eql?` will decide.
40
+ # This is still slower than the all-C String code but is the fastest method I've found
41
+ # to achieve this without doubling my Object allocations by wrapping each String in a Struct.
42
+ StickAround = Class.new(::String) do
43
+
44
+ # Be case-insensitive by default so we can match any filename.
45
+ DEFAULT_SENSITIVITY = false
46
+
47
+ # These may be weighted just like byte sequences.
48
+ include WeightedAction
49
+
50
+ # This class needs to support being instantiated without a value due to the way our XML data gets loaded,
51
+ # but the superclass `String` has a default `str=""` argument here that works perfectly for that need.
52
+ def initialize(str=-'', *args, case_sensitive: DEFAULT_SENSITIVITY, **kwargs)
53
+ # Prime `#replace` to treat its next `String` as case-sensitive iff we were told.
54
+ instance_variable_set(:@case_sensitive, case_sensitive) if case_sensitive == true
55
+
56
+ # Don't pass an initial `str` value to `super` if we were given one,
57
+ # because `#replace` has case-sensitivity-handling functionality that must be called.
58
+ super(str, *args, **kwargs)
59
+ self.replace(str) unless str.empty?
60
+ end
61
+
62
+ # Mark intent to be case-sensitive. Our source data's `<glob>` Attributes are parsed one at a time,
63
+ # so we won't know at the time of instantiation if we want to be case sensitive.
64
+ def case_sensitive=(sensitivity)
65
+ # Don't bother allocating an IVar if we're just going to be the default (case-insensitive)
66
+ if sensitivity == false then
67
+ remove_instance_variable(:@case_sensitive) if instance_variable_defined?(:@case_sensitive)
68
+ else
69
+ instance_variable_set(:@case_sensitive, sensitivity)
70
+ end
71
+ end
72
+
73
+ # Return our case-sensitive String variation iff we are marked case-sensitive *and* have a String value,
74
+ # otherwise just return our frozen deduplicated self value.
75
+ def itself
76
+ instance_variable_get(:@case_sensitive)&.is_a?(::String) ? instance_variable_get(:@case_sensitive) : self
77
+ end
78
+
79
+ def case_sensitive
80
+ instance_variable_get(:@case_sensitive)&.is_a?(::String) ? instance_variable_get(:@case_sensitive) : nil
81
+ end
82
+
83
+ # Set an appropriate value for ourselves given a variety of input.
84
+ # Even though this is called `#replace` here and in `String`, this method will often be used
85
+ # to set initial instance values due to nondeterministic attribute order while parsing our XML data.
86
+ def replace(otra, case_sensitive: DEFAULT_SENSITIVITY)
87
+ # Extract a usable value from different input types/formats.
88
+ #
89
+ # `File::extname` will return the last dotted component of a String, prepended with the leading dot,
90
+ # e.g. `File::extname("hello.jpg")` => `".jpg"`. We will prepend an asterisk to these to make a glob pattern.
91
+ #
92
+ # `File::extname` will be an empty String for input Strings which contain no dotted components
93
+ # or only have a leading dot, e.g. `File::extname(".bash_profile") => `""`.
94
+ newbuild = case otra
95
+ when self.class then -otra.to_s
96
+ when ::Symbol then -otra.name
97
+ when ::Pathname then otra.extname.empty? ? otra.basename.to_s.-@ : otra.extname.prepend(-?*).-@
98
+ when ::String then (File.extname(otra).empty? or -otra[-1] == -?*) ? -otra : -File.extname(otra).prepend(-?*)
99
+ else -otra.to_s
100
+ end
101
+
102
+ # The `super` call in this condition statement will explicitly set the `self` value to the downcased version of our key,
103
+ # but we will then compare `super`'s return value to its input to decide if we should store a case-sensitive value too.
104
+ #
105
+ # If the computed key is already downcase we could still be case-sensitive if we were boolean-marked as such,
106
+ # otherwise we have no need for the IVar and can remove it if one is set.
107
+ #
108
+ # Explicitly check if the IVar == `true`, not just truthiness, because it may also be a `String`
109
+ # if we are `#replace`ing a previous case-sensitive value.
110
+ #
111
+ # NOTE: There is a hole in the logic here where any non-downcased input will cause case-sensitivity,
112
+ # but this is necessary since our XML parsing might give us a `pattern` attribute callback
113
+ # before we'd had a chance to set a `case-insensitive` mark.
114
+ # All of the `case-sensitive="true"` `<glob>`s in current fd.o XML have an upper-case component,
115
+ # so this hack will make sure we don't discard the proper-cased `String` if we see that callback before the mark.
116
+ if (super(-newbuild.downcase(:fold)) != newbuild) or case_sensitive or (instance_variable_get(:@case_sensitive) == true) then
117
+ instance_variable_set(:@case_sensitive, newbuild)
118
+ else
119
+ remove_instance_variable(:@case_sensitive) if instance_variable_defined?(:@case_sensitive)
120
+ end
121
+ self # return the new downcased value we just set when we called `super`
122
+ end # replace
123
+
124
+ # Return a boolean describing our case-sensitivity status.
125
+ def case_sensitive?
126
+ # The same-name IVar could contain a (non-default) boolean value, but it's far more likely to contain
127
+ # the desired-case variation of the `self` String. In that case this returns `true` instead of the value.
128
+ case instance_variable_get(:@case_sensitive)
129
+ when ::String then true # We have stored a String case-variation.
130
+ when ::TrueClass then true # We have been marked for case-sensitivity next `#replace`.
131
+ else false # NilClass, FalseClass, or anything else.
132
+ end
133
+ end
134
+
135
+ # Returns case-optional boolean equality between this `StickAround` and a given object `StickAround` or `String`.
136
+ # This is one of two methods necessary for matching Hash keys, but this method will be called only if `self#hash`
137
+ # and `otra#hash` return the same Integer value, complicated by the fact that MRI's C implementation of `rb_str_hash_cmp`
138
+ # won't use our overriden version of `#hash`.
139
+ # That's why we downcase ourselves in `#replace` and store case variations separately.
140
+ def eql?(otra)
141
+ # https://ruby-doc.org/core/File.html#method-c-fnmatch-3F
142
+ #
143
+ # The `File` Class has kinda-poorly-documented Integer constants to control the behavior of `File::fnmatch?`.
144
+ # If this feels non-Ruby-ish it's because this is a POSIX thing:
145
+ # https://pubs.opengroup.org/onlinepubs/9699919799/functions/fnmatch.html
146
+ #
147
+ # irb(main):061:0> File::constants::keep_if { _1.to_s.include?('FNM_') }
148
+ # => [:FNM_CASEFOLD, :FNM_EXTGLOB, :FNM_SYSCASE, :FNM_NOESCAPE, :FNM_PATHNAME, :FNM_DOTMATCH, :FNM_SHORTNAME]
149
+ # irb(main):062:0> File::constants::keep_if { _1.to_s.include?('FNM_') }.map(&File::method(:const_get))
150
+ # => [8, 16, 0, 1, 2, 4, 0]
151
+ #
152
+ #
153
+ # - `File::FNM_PATHNAME` controls wildcards in the haystack matching `File::SEPARATOR` in the needle:
154
+ #
155
+ # irb> File.fnmatch?('*.jpg', '/hello.jpg', File::FNM_PATHNAME)
156
+ # => false
157
+ # irb> File.fnmatch?('*.jpg', '/hello.jpg')
158
+ # => true
159
+ # irb> File.fnmatch?('*.jpg', 'hello.jpg', File::FNM_PATHNAME)
160
+ # => true
161
+ # irb> File.fnmatch?('*.jpg', 'hello.jpg')
162
+ # => true
163
+ #
164
+ #
165
+ # - `File::FNM_DOTMATCH` controls wildcard in the haystack matching `.` in the needle, like *nix-style "hidden" files:
166
+ #
167
+ # irb> File.fnmatch?('*.jpg', '.hello.jpg', File::FNM_DOTMATCH)
168
+ # => true
169
+ # irb> File.fnmatch?('*.jpg', '.hello.jpg')
170
+ # => false
171
+ #
172
+ #
173
+ # - `File::FNM_EXTGLOB` controls support for brace-delimited glob syntax for haystacks:
174
+ #
175
+ # irb> File.fnmatch?('*.jp{e,}g', 'hello.jpg', File::FNM_EXTGLOB)
176
+ # => true
177
+ # irb> File.fnmatch?('*.jp{e,}g', 'hello.jpeg', File::FNM_EXTGLOB)
178
+ # => true
179
+ # irb> File.fnmatch?('*.jp{e,}g', 'hello.jpeg')
180
+ # => false
181
+ # irb> File.fnmatch?('*.jp{e,}g', 'hello.jpg')
182
+ # => false
183
+ #
184
+ #
185
+ # - `File::FNM_CASEFOLD` and `File::FNM_SYSCASE` control the case-sensitivity when matching,
186
+ # either by folding (explicit case-insensitivity) or by matching the behavior of the host operating system,
187
+ # *not* the behavior of any specific filesystem on that OS (https://bugs.ruby-lang.org/issues/15363),
188
+ # e.g. case-sensitive on BSD/Linux:
189
+ #
190
+ # irb> RUBY_PLATFORM
191
+ # => "x86_64-linux"
192
+ # irb> File.fnmatch?('LOICENSE', 'loicense', File::FNM_SYSCASE)
193
+ # => false
194
+ # irb> File.fnmatch?('LOICENSE', 'loicense', File::FNM_CASEFOLD)
195
+ # => true
196
+ # irb> File.fnmatch?('LOICENSE', 'loicense')
197
+ # => false
198
+ #
199
+ #
200
+ # - `File::FNM_NOESCAPE` (ominously) controls matching escape sequences literally:
201
+ # https://github.com/ruby/ruby/blob/master/doc/syntax/literals.rdoc#label-Strings
202
+ #
203
+ # irb> File.fnmatch?("*.jpg\\", 'hello.jpg', File::FNM_NOESCAPE)
204
+ # => false
205
+ # irb> File.fnmatch?("*.jpg\\", 'hello.jpg')
206
+ # => true
207
+ #
208
+ #
209
+ # - `File::FNM_SHORTNAME` seems to control eight-dot-three filename matching, per the documentation:
210
+ # "Makes patterns to match short names if existing. Valid only on Microsoft Windows."
211
+ #
212
+ #
213
+ # - Multiple of these Integer Constants can be bitwise-`OR`ed together for simultaneous use:
214
+ #
215
+ # irb> File.fnmatch?('*.jp{e,}g', '/root/.HeLlO.jPEg', File::FNM_EXTGLOB | File::FNM_CASEFOLD | File::FNM_DOTMATCH)
216
+ # => true
217
+ # irb> File.fnmatch?('*.jp{e,}g', '/root/.HeLlO.jPEg', File::FNM_EXTGLOB | File::FNM_CASEFOLD | File::FNM_DOTMATCH | File::FNM_PATHNAME)
218
+ # => false
219
+ File.fnmatch?(
220
+ self.itself, # Haystack
221
+ otra.itself, # Needle
222
+ File::FNM_DOTMATCH |
223
+ File::FNM_EXTGLOB |
224
+ (
225
+ # Support testing `otra` as either another `StickAround` or as a plain `String`,
226
+ # in which case it will not have a method `#case_sensitive?`.
227
+ # Use our own case-sensitivity setting when comparing against plain `Strings`.
228
+ (self.case_sensitive? or (otra.respond_to?(:case_sensitive?) ? otra.case_sensitive? : self.case_sensitive?)) ?
229
+ 0 : File::FNM_CASEFOLD
230
+ )
231
+ )
232
+ end # eql?
233
+
234
+ # Hash-key usage depends on `#eql?`, but `:==` should have identical behavior for our own uses.
235
+ alias_method(:==, :eql?)
236
+
237
+ # Return an Integer hash value for this object. This method and `#eql?` are used by `Hash`, `Set`, and `#uniq` to
238
+ # associate separate Objects with each other for deduplication or for use as `Hash` keys.
239
+ # The `eql?` method will be called only *after* two Integer `#hash` values match!
240
+ #
241
+ # NOTE: MRI will not use this function in many cases!
242
+ # It has C implementations of methods like `rb_str_hash_cmp` for `Hash` lookups, and this is usually a Good Thing™
243
+ # since it makes `Hash`es fast when using `String` or `Symbol` as keys.
244
+ # Subclassing built-in types like `String` allows/forces us to use these same accelerated code paths,
245
+ # and it was incredibly confusing for me why my custom String subclass was behaving so strangely
246
+ # when used as a Hash key until I had a hunch to read MRI's `string.c` and `hash.c` and confirmed.
247
+ # I found this write-up once I knew to search for "rb_str_hash_cmp": https://kate.io/blog/strange-hash-instances-in-ruby/
248
+ #
249
+ # I'm going to define this anyway because it could still be useful in certain corner cases, but be aware of the above!
250
+ # This is the reason I explicitly `downcase` our self value in `#replace`, because otherwise the Hash keys will never match
251
+ # and `#eql?` will never even be called.
252
+ def hash
253
+ if self.include?(-?*) and not self.start_with?(-?*) then self[...6].downcase!.hash
254
+ elsif self.include?(-?*) and not File.extname(self).empty? then File.extname(self).delete_prefix!(-?.)
255
+ else super
256
+ end
257
+ end
258
+
259
+ end # StickAround
260
+ end # class CHECKING::YOU