elasticgraph-support 0.18.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,293 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ module ElasticGraph
10
+ module Support
11
+ # Models a set of `::Time` objects, but does so using one or more `::Range` objects.
12
+ # This is done so that we can support unbounded sets (such as "all times after midnight
13
+ # on date X").
14
+ #
15
+ # Internally, this is a simple wrapper around a set of `::Range` objects. Those ranges take
16
+ # a few different forms:
17
+ #
18
+ # - ALL: a range with no bounds, which implicitly contains all `::Time`s. (It's like the
19
+ # integer set from negative to positive infinity).
20
+ # - An open range: a range with only an upper or lower bound (but not the other).
21
+ # - A closed range: a range with an upper and lower bound.
22
+ # - An empty range: a range that contains no `::Time`s, by virtue of its bounds having no overlap.
23
+ class TimeSet < ::Data.define(:ranges)
24
+ # Factory method to construct a `TimeSet` using a range with the given bounds.
25
+ def self.of_range(gt: nil, gte: nil, lt: nil, lte: nil)
26
+ if gt && gte
27
+ raise ArgumentError, "TimeSet got two lower bounds, but can have only one (gt: #{gt.inspect}, gte: #{gte.inspect})"
28
+ end
29
+
30
+ if lt && lte
31
+ raise ArgumentError, "TimeSet got two upper bounds, but can have only one (lt: #{lt.inspect}, lte: #{lte.inspect})"
32
+ end
33
+
34
+ # To be able to leverage Ruby's Range class, we need to convert to the "inclusive" ("or equal")
35
+ # form. This cuts down on the number of test cases we need to write and also Ruby's range lets
36
+ # you control whether the end of a range is inclusive or exclusive, but doesn't let you control
37
+ # the beginning of the range.
38
+ #
39
+ # This is safe to do because our datastores only work with `::Time`s at millisecond granularity,
40
+ # so `> t` is equivalent to `>= (t + 1ms)` and `< t` is equivalent to `<= (t - 1ms)`.
41
+ lower_bound = gt&.+(CONSECUTIVE_TIME_INCREMENT) || gte
42
+ upper_bound = lt&.-(CONSECUTIVE_TIME_INCREMENT) || lte
43
+
44
+ of_range_objects(_ = [RangeFactory.build_non_empty(lower_bound, upper_bound)].compact)
45
+ end
46
+
47
+ # Factory method to construct a `TimeSet` from a collection of `::Time` objects.
48
+ # Internally we convert it to a set of `::Range` objects, one per unique time.
49
+ def self.of_times(times)
50
+ of_range_objects(times.map { |t| ::Range.new(t, t) })
51
+ end
52
+
53
+ # Factory method to construct a `TimeSet` from a previously built collection of
54
+ # ::Time ranges. Mostly used internally by `TimeSet` and in tests.
55
+ def self.of_range_objects(range_objects)
56
+ # Use our singleton EMPTY or ALL instances if we can to save on memory.
57
+ return EMPTY if range_objects.empty?
58
+ first_range = _ = range_objects.first
59
+ return ALL if first_range.begin.nil? && first_range.end.nil?
60
+
61
+ new(range_objects)
62
+ end
63
+
64
+ # Returns a new `TimeSet` containing `::Time`s common to this set and `other_set`.
65
+ def intersection(other_set)
66
+ # Here we rely on the distributive and commutative properties of set algebra:
67
+ #
68
+ # https://en.wikipedia.org/wiki/Algebra_of_sets
69
+ # A ∩ (B ∪ C) = (A ∩ B) ∪ (A ∩ C) (distributive property)
70
+ # A ∩ B = B ∩ A (commutative property)
71
+ #
72
+ # We can combine these properties to see how the intersection of sets of ranges would work:
73
+ # (A₁ ∪ A₂) ∩ (B₁ ∪ B₂)
74
+ # = ((A₁ ∪ A₂) ∩ B₁) ∪ ((A₁ ∪ A₂) ∩ B₂) (expanding based on distributive property)
75
+ # = (B₁ ∩ (A₁ ∪ A₂)) ∪ (B₂ ∩ (A₁ ∪ A₂)) (rearranging based on commutative property)
76
+ # = ((B₁ ∩ A₁) ∪ (B₁ ∩ A₂)) ∪ ((B₂ ∩ A₁) ∪ (B₂ ∩ A₂)) (expanding based on distributive property)
77
+ # = (B₁ ∩ A₁) ∪ (B₁ ∩ A₂) ∪ (B₂ ∩ A₁) ∪ (B₂ ∩ A₂) (removing excess parens)
78
+ # = union of (intersection of each pair)
79
+ intersected_ranges = ranges.to_a.product(other_set.ranges.to_a)
80
+ .filter_map { |r1, r2| intersect_ranges(r1, r2) }
81
+
82
+ TimeSet.of_range_objects(intersected_ranges)
83
+ end
84
+
85
+ # Returns a new `TimeSet` containing `::Time`s that are in either this set or `other_set`.
86
+ def union(other_set)
87
+ TimeSet.of_range_objects(ranges.union(other_set.ranges))
88
+ end
89
+
90
+ # Returns true if the given `::Time` is a member of this `TimeSet`.
91
+ def member?(time)
92
+ ranges.any? { |r| r.cover?(time) }
93
+ end
94
+
95
+ # Returns true if this `TimeSet` and the given one have a least one time in common.
96
+ def intersect?(other_set)
97
+ other_set.ranges.any? do |r1|
98
+ ranges.any? do |r2|
99
+ ranges_intersect?(r1, r2)
100
+ end
101
+ end
102
+ end
103
+
104
+ # Returns true if this TimeSet contains no members.
105
+ def empty?
106
+ ranges.empty?
107
+ end
108
+
109
+ # Returns a new `TimeSet` containing the difference between this `TimeSet` and the given one.
110
+ def -(other)
111
+ new_ranges = other.ranges.to_a.reduce(ranges.to_a) do |accum, other_range|
112
+ accum.flat_map do |self_range|
113
+ if ranges_intersect?(self_range, other_range)
114
+ # Since the ranges intersect, `self_range` must be reduced some how. Depending on what kind of
115
+ # intersection we have (e.g. exact equality, `self_range` fully inside `other_range`, `other_range`
116
+ # fully inside `self_range`, partial overlap where `self_range` begins before `other_range`, or partial
117
+ # overlap where `self_range` ends after `other_range`), we may have a part of `self_range` that comes
118
+ # before `other_range`, a part of `self_range` that comes after `other_range`, both, or neither. Below
119
+ # we build the before and after parts as candidates, but then ignore any resulting ranges that are
120
+ # invalid, which leaves us with the correct result, without having to explicitly handle each possible case.
121
+
122
+ # @type var candidates: ::Array[timeRange]
123
+ candidates = []
124
+
125
+ if (other_range_begin = other_range.begin)
126
+ # This represents the parts of `self_range` that come _before_ `other_range`.
127
+ candidates << Range.new(self_range.begin, other_range_begin - CONSECUTIVE_TIME_INCREMENT)
128
+ end
129
+
130
+ if (other_range_end = other_range.end)
131
+ # This represents the parts of `self_range` that come _after_ `other_range`.
132
+ candidates << Range.new(other_range_end + CONSECUTIVE_TIME_INCREMENT, self_range.end)
133
+ end
134
+
135
+ # While some of the ranges produced above may be invalid (due to being descending), we don't have to
136
+ # filter them out here because `#initialize` takes care of it.
137
+ candidates
138
+ else
139
+ # Since the ranges don't intersect, there is nothing to remove from `self_range`; just return it unmodified.
140
+ [self_range]
141
+ end
142
+ end
143
+ end
144
+
145
+ TimeSet.of_range_objects(new_ranges)
146
+ end
147
+
148
+ def negate
149
+ ALL - self
150
+ end
151
+
152
+ private
153
+
154
+ private_class_method :new # use `of_range`, `of_times`, or `of_range_objects` instead.
155
+
156
+ # To ensure immutability, we override this to freeze the set. For convenience, we allow the `ranges`
157
+ # arg to be an array, and convert to a set here. In addition, we take care of normalizing to the most
158
+ # optimal form by merging overlapping ranges here, and ignore descending ranges.
159
+ def initialize(ranges:)
160
+ normalized_ranges = ranges
161
+ .reject { |r| descending_range?(r) }
162
+ .to_set
163
+ .then { |rs| merge_overlapping_or_adjacent_ranges(rs) }
164
+ .freeze
165
+
166
+ super(ranges: normalized_ranges)
167
+ end
168
+
169
+ # Returns true if at least one ::Time exists in both ranges.
170
+ def ranges_intersect?(r1, r2)
171
+ r1.cover?(r2.begin) || r1.cover?(r2.end) || r2.cover?(r1.begin) || r2.cover?(r1.end)
172
+ end
173
+
174
+ # The amount to add to a time to get the next consecutive time, based
175
+ # on the level of granularity we support. According to the Elasticsearch docs[1],
176
+ # it only supports millisecond granularity, so that's all we support:
177
+ #
178
+ # > Internally, dates are converted to UTC (if the time-zone is specified) and
179
+ # > stored as a long number representing milliseconds-since-the-epoch.
180
+ #
181
+ # We want exact precision here, so we are avoiding using a float for this, preferring
182
+ # to use a rational instead.
183
+ #
184
+ # [1] https://www.elastic.co/guide/en/elasticsearch/reference/7.15/date.html
185
+ CONSECUTIVE_TIME_INCREMENT = Rational(1, 1000)
186
+
187
+ # Returns true if the given ranges are adjacent with no room for any ::Time
188
+ # objects to exist between the ranges given the millisecond granularity we operate at.
189
+ def adjacent?(r1, r2)
190
+ r1.end&.+(CONSECUTIVE_TIME_INCREMENT)&.==(r2.begin) || r2.end&.+(CONSECUTIVE_TIME_INCREMENT)&.==(r1.begin) || false
191
+ end
192
+
193
+ # Combines the given ranges into a new range that only contains the common subset of ::Time objects.
194
+ # Returns `nil` if there is no intersection.
195
+ def intersect_ranges(r1, r2)
196
+ RangeFactory.build_non_empty(
197
+ [r1.begin, r2.begin].compact.max,
198
+ [r1.end, r2.end].compact.min
199
+ )
200
+ end
201
+
202
+ # Helper method that attempts to merge the given set of ranges into an equivalent
203
+ # set that contains fewer ranges in it but covers the same set of ::Time objects.
204
+ # As an example, consider these two ranges:
205
+ #
206
+ # - 2020-05-01 to 2020-07-01
207
+ # - 2020-06-01 to 2020-08-01
208
+ #
209
+ # These two ranges can safely be merged into a single range of 2020-05-01 to 2020-08-01.
210
+ # Technically speaking, this is not required; we can just return a TimeSet containing
211
+ # multiple ranges. However, the goal of a TimeSet is to represent a set of Time objects
212
+ # as minimally as possible, and to that end it is useful to merge ranges when possible.
213
+ # While it adds a bit of complexity to merge ranges like this, it'll simplify future
214
+ # calculations involving a TimeSet.
215
+ def merge_overlapping_or_adjacent_ranges(all_ranges)
216
+ # We sometimes have to apply this merge algorithm multiple times in order to fully merge
217
+ # the ranges into their minimal form. For example, consider these three ranges:
218
+ #
219
+ # - 2020-05-01 to 2020-07-01
220
+ # - 2020-06-01 to 2020-09-01
221
+ # - 2020-08-01 to 2020-10-01
222
+ #
223
+ # Ultimately, we can merge these into a single range of 2020-05-01 to 2020-10-01, but
224
+ # our algorithm isn't able to do that in a single pass. On the first pass it'll produce
225
+ # two merged ranges (2020-05-01 to 2020-09-01 and 2020-06-01 to 2020-10-01); after we
226
+ # apply the algorithm again it is then able to produce the final merged range.
227
+ # Since we can't predict how many iterations it'll take, we loop here, and break as
228
+ # soon as there is no more progress to be made.
229
+ #
230
+ # While we can't predice how many iterations it'll take, we can put an upper bound on it:
231
+ # it should take no more than `all_ranges.size` times, because every iteration should shrink
232
+ # `all_ranges` by at least one element--if not, that iteration didn't make any progress
233
+ # (and we're done anyway).
234
+ all_ranges.size.times do
235
+ # Given our set of ranges, any range is potentially mergeable with any other range.
236
+ # Here we determine which pairs of ranges are mergeable.
237
+ mergeable_range_pairs = all_ranges.to_a.combination(2).select do |r1, r2|
238
+ ranges_intersect?(r1, r2) || adjacent?(r1, r2)
239
+ end
240
+
241
+ # If there are no mergeable pairs, we're done!
242
+ return all_ranges if mergeable_range_pairs.empty?
243
+
244
+ # For each pair of mergeable ranges, build a merged range.
245
+ merged_ranges = mergeable_range_pairs.filter_map do |r1, r2|
246
+ RangeFactory.build_non_empty(
247
+ nil_or(:min, from: [r1.begin, r2.begin]),
248
+ nil_or(:max, from: [r1.end, r2.end])
249
+ )
250
+ end
251
+
252
+ # Update `all_ranges` based on the merges performed so far.
253
+ unmergeable_ranges = all_ranges - mergeable_range_pairs.flatten
254
+ all_ranges = unmergeable_ranges.union(_ = merged_ranges)
255
+ end
256
+
257
+ all_ranges
258
+ end
259
+
260
+ # Helper method for `merge_overlapping_or_adjacent_ranges` used to return the most "lenient" range boundary value.
261
+ # `nil` is used for a beginless or endless range, so we return that if available; otherwise
262
+ # we apply `min_or_max`.`
263
+ def nil_or(min_or_max, from:)
264
+ return nil if from.include?(nil)
265
+ from.public_send(min_or_max)
266
+ end
267
+
268
+ def descending_range?(range)
269
+ # If either edge is `nil` it cannot be descending.
270
+ return false if (range_begin = range.begin).nil?
271
+ return false if (range_end = range.end).nil?
272
+
273
+ # Otherwise we just compare the edges to determine if it's descending.
274
+ range_begin > range_end
275
+ end
276
+
277
+ # An instance in which all `::Time`s fit.
278
+ ALL = new([::Range.new(nil, nil)])
279
+ # Singleton instance that's empty.
280
+ EMPTY = new([])
281
+
282
+ module RangeFactory
283
+ # Helper method for building a range from the given bounds. Returns either
284
+ # a built range, or, if the given bounds produce an empty range, returns nil.
285
+ def self.build_non_empty(lower_bound, upper_bound)
286
+ if lower_bound.nil? || upper_bound.nil? || lower_bound <= upper_bound
287
+ ::Range.new(lower_bound, upper_bound)
288
+ end
289
+ end
290
+ end
291
+ end
292
+ end
293
+ end
@@ -0,0 +1,108 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ module ElasticGraph
10
+ module Support
11
+ module TimeUtil
12
+ NANOS_PER_SECOND = 1_000_000_000
13
+ NANOS_PER_MINUTE = NANOS_PER_SECOND * 60
14
+ NANOS_PER_HOUR = NANOS_PER_MINUTE * 60
15
+
16
+ # Simple helper function to convert a local time string (such as `03:45:12` or `12:30:43.756`)
17
+ # to an integer value between 0 and 24 * 60 * 60 * 1,000,000,000 - 1 representing the nano of day
18
+ # for the local time value.
19
+ #
20
+ # This is meant to match the behavior of Java's `LocalTime#toNanoOfDay()` API:
21
+ # https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/time/LocalTime.html#toNanoOfDay()
22
+ #
23
+ # This is specifically useful when we need to work with local time values in a script: by converting
24
+ # a local time parameter to nano-of-day, our script can more efficiently compare values, avoiding the
25
+ # need to parse the same local time parameters over and over again as it applies the script to each
26
+ # document.
27
+ #
28
+ # Note: this method assumes the given `local_time_string` is well-formed. You'll get an exception if
29
+ # you provide a malformed value, but no effort has been put into giving a clear error message. The
30
+ # caller is expected to have already validated that the `local_time_string` is formatted correctly.
31
+ def self.nano_of_day_from_local_time(local_time_string)
32
+ hours_str, minutes_str, full_seconds_str = local_time_string.split(":")
33
+ seconds_str, subseconds_str = (_ = full_seconds_str).split(".")
34
+
35
+ hours = Integer(_ = hours_str, 10)
36
+ minutes = Integer(_ = minutes_str, 10)
37
+ seconds = Integer(seconds_str, 10)
38
+ nanos = Integer(subseconds_str.to_s.ljust(9, "0"), 10)
39
+
40
+ (hours * NANOS_PER_HOUR) + (minutes * NANOS_PER_MINUTE) + (seconds * NANOS_PER_SECOND) + nanos
41
+ end
42
+
43
+ # Helper method for advancing time. Unfortunately, Ruby's core `Time` type does not directly support this.
44
+ # ActiveSupport (from rails) provides this functionality, but we don't depend on rails at all and don't
45
+ # want to add such a heavyweight dependency for such a small thing.
46
+ #
47
+ # Luckily, our needs are quite limited, which makes this a much simpler problem then a general purpose `time.advance(...)` API:
48
+ #
49
+ # - We only need to support year, month, day, and hour advances.
50
+ # - We only ever need to advance a single unit.
51
+ #
52
+ # This provides a simple, correct implementation for that constrained problem space.
53
+ def self.advance_one_unit(time, unit)
54
+ case unit
55
+ when :year
56
+ with_updated(time, year: time.year + 1)
57
+ when :month
58
+ maybe_next_month =
59
+ if time.month == 12
60
+ with_updated(time, year: time.year + 1, month: 1)
61
+ else
62
+ with_updated(time, month: time.month + 1)
63
+ end
64
+
65
+ # If the next month has fewer days than the month of `time`, then it can "spill over" to a day
66
+ # from the first week of the month following that. For example, if the date of `time` was 2021-01-31
67
+ # and we add a month, it attempts to go to `2021-02-31` but such a date doesn't exist--instead
68
+ # `maybe_next_month` will be on `2021-03-03` because of the overflow. Here we correct for that.
69
+ #
70
+ # Our assumption (which we believe to be correct) is that every time this happens, both of these are true:
71
+ # - `time.day` is near the end of its month
72
+ # - `maybe_next_month.day` is near the start of its month
73
+ #
74
+ # ...and furthermore, we do not believe there is any other case where `time.day` and `maybe_next_month.day` can differ.
75
+ if time.day > maybe_next_month.day
76
+ corrected_date = maybe_next_month.to_date - maybe_next_month.day
77
+ with_updated(time, year: corrected_date.year, month: corrected_date.month, day: corrected_date.day)
78
+ else
79
+ maybe_next_month
80
+ end
81
+ when :day
82
+ next_day = time.to_date + 1
83
+ with_updated(time, year: next_day.year, month: next_day.month, day: next_day.day)
84
+ when :hour
85
+ time + 3600
86
+ end
87
+ end
88
+
89
+ private_class_method def self.with_updated(time, year: time.year, month: time.month, day: time.day)
90
+ # UTC needs to be treated special here due to an oddity of Ruby's Time class:
91
+ #
92
+ # > Time.utc(2021, 12, 2, 12, 30, 30).iso8601
93
+ # => "2021-12-02T12:30:30Z"
94
+ # > Time.new(2021, 12, 2, 12, 30, 30, 0).iso8601
95
+ # => "2021-12-02T12:30:30+00:00"
96
+ #
97
+ # We want to preserve the `Z` suffix on the ISO8601 representation of the advanced time
98
+ # (if it was there on the original time), so we use the `::Time.utc` method here to do that.
99
+ # Non-UTC time must use `::Time.new(...)` with a UTC offset, though.
100
+ if time.utc?
101
+ ::Time.utc(year, month, day, time.hour, time.min, time.sec.to_r + time.subsec)
102
+ else
103
+ ::Time.new(year, month, day, time.hour, time.min, time.sec.to_r + time.subsec, time.utc_offset)
104
+ end
105
+ end
106
+ end
107
+ end
108
+ end
@@ -0,0 +1,67 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "json"
10
+
11
+ module ElasticGraph
12
+ module Support
13
+ # Responsible for encoding `Untyped` values into strings. This logic lives here in `elasticgraph-support`
14
+ # so that it can be shared between the `Untyped` indexing preparer (which lives in `elasticgraph-indexer`)
15
+ # and the `Untyped` coercion adapter (which lives in `elasticgraph-graphql`). It is important that these
16
+ # share the same logic so that the string values we attempt to filter on at query time match the string values
17
+ # we indexed when given the semantically equivalent untyped data.
18
+ #
19
+ # Note: change this class with care. Changing the behavior to make `encode` produce different strings may result
20
+ # in breaking queries if the `Untyped`s stored in the index were indexed using previous encoding logic.
21
+ # A backfill into the datastore will likely be required to avoid this issue.
22
+ module UntypedEncoder
23
+ # Encodes the given untyped value to a String so it can be indexed in a Elasticsearch/OpenSearch `keyword` field.
24
+ def self.encode(value)
25
+ return nil if value.nil?
26
+ # Note: we use `fast_generate` here instead of `generate`. They basically act the same, except
27
+ # `generate` includes an extra check for self-referential data structures. `value` here ultimately
28
+ # comes out of a parsed JSON document (e.g. either from an ElasticGraph event at indexing time, or
29
+ # as a GraphQL query variable at search time), and JSON cannot express self-referential data
30
+ # structures, so we do not have to worry about that happening.
31
+ #
32
+ # ...but even if it did, we would get an error either way: `JSON.generate` would raise
33
+ # `JSON::NestingError` whereas `:JSON.fast_generate` would give us a `SystemStackError`.
34
+ ::JSON.fast_generate(canonicalize(value))
35
+ end
36
+
37
+ # Decodes a previously encoded Untyped value, returning its original value.
38
+ def self.decode(string)
39
+ return nil if string.nil?
40
+ ::JSON.parse(string)
41
+ end
42
+
43
+ # Helper method that converts `value` to a canonical form before we dump it as JSON.
44
+ # We do this because we index each JSON value as a `keyword` in the index, and we want
45
+ # equality filters on a JSON value field to consider equivalent JSON objects to be equal
46
+ # even if their normally generated JSON is not the same. For example, we want ElasticGraph
47
+ # to treat these two as being equivalent:
48
+ #
49
+ # {"a": 1, "b": 2} vs {"b": 2, "a": 1}
50
+ #
51
+ # To achieve this, we ensure JSON objects are generated in sorted order, and we use this same
52
+ # logic both at indexing time and also at query time when we are filtering.
53
+ private_class_method def self.canonicalize(value)
54
+ case value
55
+ when ::Hash
56
+ value
57
+ .sort_by { |k, v| k.to_s }
58
+ .to_h { |k, v| [k, canonicalize(v)] }
59
+ when ::Array
60
+ value.map { |v| canonicalize(v) }
61
+ else
62
+ value
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,15 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ module ElasticGraph
10
+ # The version of all ElasticGraph gems.
11
+ VERSION = "0.18.0.0"
12
+
13
+ # Steep weirdly expects this here...
14
+ # @dynamic self.define_schema
15
+ end