elasticgraph-support 0.18.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,293 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ module ElasticGraph
10
+ module Support
11
+ # Models a set of `::Time` objects, but does so using one or more `::Range` objects.
12
+ # This is done so that we can support unbounded sets (such as "all times after midnight
13
+ # on date X").
14
+ #
15
+ # Internally, this is a simple wrapper around a set of `::Range` objects. Those ranges take
16
+ # a few different forms:
17
+ #
18
+ # - ALL: a range with no bounds, which implicitly contains all `::Time`s. (It's like the
19
+ # integer set from negative to positive infinity).
20
+ # - An open range: a range with only an upper or lower bound (but not the other).
21
+ # - A closed range: a range with an upper and lower bound.
22
+ # - An empty range: a range that contains no `::Time`s, by virtue of its bounds having no overlap.
23
+ class TimeSet < ::Data.define(:ranges)
24
+ # Factory method to construct a `TimeSet` using a range with the given bounds.
25
+ def self.of_range(gt: nil, gte: nil, lt: nil, lte: nil)
26
+ if gt && gte
27
+ raise ArgumentError, "TimeSet got two lower bounds, but can have only one (gt: #{gt.inspect}, gte: #{gte.inspect})"
28
+ end
29
+
30
+ if lt && lte
31
+ raise ArgumentError, "TimeSet got two upper bounds, but can have only one (lt: #{lt.inspect}, lte: #{lte.inspect})"
32
+ end
33
+
34
+ # To be able to leverage Ruby's Range class, we need to convert to the "inclusive" ("or equal")
35
+ # form. This cuts down on the number of test cases we need to write and also Ruby's range lets
36
+ # you control whether the end of a range is inclusive or exclusive, but doesn't let you control
37
+ # the beginning of the range.
38
+ #
39
+ # This is safe to do because our datastores only work with `::Time`s at millisecond granularity,
40
+ # so `> t` is equivalent to `>= (t + 1ms)` and `< t` is equivalent to `<= (t - 1ms)`.
41
+ lower_bound = gt&.+(CONSECUTIVE_TIME_INCREMENT) || gte
42
+ upper_bound = lt&.-(CONSECUTIVE_TIME_INCREMENT) || lte
43
+
44
+ of_range_objects(_ = [RangeFactory.build_non_empty(lower_bound, upper_bound)].compact)
45
+ end
46
+
47
+ # Factory method to construct a `TimeSet` from a collection of `::Time` objects.
48
+ # Internally we convert it to a set of `::Range` objects, one per unique time.
49
+ def self.of_times(times)
50
+ of_range_objects(times.map { |t| ::Range.new(t, t) })
51
+ end
52
+
53
+ # Factory method to construct a `TimeSet` from a previously built collection of
54
+ # ::Time ranges. Mostly used internally by `TimeSet` and in tests.
55
+ def self.of_range_objects(range_objects)
56
+ # Use our singleton EMPTY or ALL instances if we can to save on memory.
57
+ return EMPTY if range_objects.empty?
58
+ first_range = _ = range_objects.first
59
+ return ALL if first_range.begin.nil? && first_range.end.nil?
60
+
61
+ new(range_objects)
62
+ end
63
+
64
+ # Returns a new `TimeSet` containing `::Time`s common to this set and `other_set`.
65
+ def intersection(other_set)
66
+ # Here we rely on the distributive and commutative properties of set algebra:
67
+ #
68
+ # https://en.wikipedia.org/wiki/Algebra_of_sets
69
+ # A ∩ (B ∪ C) = (A ∩ B) ∪ (A ∩ C) (distributive property)
70
+ # A ∩ B = B ∩ A (commutative property)
71
+ #
72
+ # We can combine these properties to see how the intersection of sets of ranges would work:
73
+ # (A₁ ∪ A₂) ∩ (B₁ ∪ B₂)
74
+ # = ((A₁ ∪ A₂) ∩ B₁) ∪ ((A₁ ∪ A₂) ∩ B₂) (expanding based on distributive property)
75
+ # = (B₁ ∩ (A₁ ∪ A₂)) ∪ (B₂ ∩ (A₁ ∪ A₂)) (rearranging based on commutative property)
76
+ # = ((B₁ ∩ A₁) ∪ (B₁ ∩ A₂)) ∪ ((B₂ ∩ A₁) ∪ (B₂ ∩ A₂)) (expanding based on distributive property)
77
+ # = (B₁ ∩ A₁) ∪ (B₁ ∩ A₂) ∪ (B₂ ∩ A₁) ∪ (B₂ ∩ A₂) (removing excess parens)
78
+ # = union of (intersection of each pair)
79
+ intersected_ranges = ranges.to_a.product(other_set.ranges.to_a)
80
+ .filter_map { |r1, r2| intersect_ranges(r1, r2) }
81
+
82
+ TimeSet.of_range_objects(intersected_ranges)
83
+ end
84
+
85
+ # Returns a new `TimeSet` containing `::Time`s that are in either this set or `other_set`.
86
+ def union(other_set)
87
+ TimeSet.of_range_objects(ranges.union(other_set.ranges))
88
+ end
89
+
90
+ # Returns true if the given `::Time` is a member of this `TimeSet`.
91
+ def member?(time)
92
+ ranges.any? { |r| r.cover?(time) }
93
+ end
94
+
95
+ # Returns true if this `TimeSet` and the given one have a least one time in common.
96
+ def intersect?(other_set)
97
+ other_set.ranges.any? do |r1|
98
+ ranges.any? do |r2|
99
+ ranges_intersect?(r1, r2)
100
+ end
101
+ end
102
+ end
103
+
104
+ # Returns true if this TimeSet contains no members.
105
+ def empty?
106
+ ranges.empty?
107
+ end
108
+
109
+ # Returns a new `TimeSet` containing the difference between this `TimeSet` and the given one.
110
+ def -(other)
111
+ new_ranges = other.ranges.to_a.reduce(ranges.to_a) do |accum, other_range|
112
+ accum.flat_map do |self_range|
113
+ if ranges_intersect?(self_range, other_range)
114
+ # Since the ranges intersect, `self_range` must be reduced some how. Depending on what kind of
115
+ # intersection we have (e.g. exact equality, `self_range` fully inside `other_range`, `other_range`
116
+ # fully inside `self_range`, partial overlap where `self_range` begins before `other_range`, or partial
117
+ # overlap where `self_range` ends after `other_range`), we may have a part of `self_range` that comes
118
+ # before `other_range`, a part of `self_range` that comes after `other_range`, both, or neither. Below
119
+ # we build the before and after parts as candidates, but then ignore any resulting ranges that are
120
+ # invalid, which leaves us with the correct result, without having to explicitly handle each possible case.
121
+
122
+ # @type var candidates: ::Array[timeRange]
123
+ candidates = []
124
+
125
+ if (other_range_begin = other_range.begin)
126
+ # This represents the parts of `self_range` that come _before_ `other_range`.
127
+ candidates << Range.new(self_range.begin, other_range_begin - CONSECUTIVE_TIME_INCREMENT)
128
+ end
129
+
130
+ if (other_range_end = other_range.end)
131
+ # This represents the parts of `self_range` that come _after_ `other_range`.
132
+ candidates << Range.new(other_range_end + CONSECUTIVE_TIME_INCREMENT, self_range.end)
133
+ end
134
+
135
+ # While some of the ranges produced above may be invalid (due to being descending), we don't have to
136
+ # filter them out here because `#initialize` takes care of it.
137
+ candidates
138
+ else
139
+ # Since the ranges don't intersect, there is nothing to remove from `self_range`; just return it unmodified.
140
+ [self_range]
141
+ end
142
+ end
143
+ end
144
+
145
+ TimeSet.of_range_objects(new_ranges)
146
+ end
147
+
148
+ def negate
149
+ ALL - self
150
+ end
151
+
152
+ private
153
+
154
+ private_class_method :new # use `of_range`, `of_times`, or `of_range_objects` instead.
155
+
156
+ # To ensure immutability, we override this to freeze the set. For convenience, we allow the `ranges`
157
+ # arg to be an array, and convert to a set here. In addition, we take care of normalizing to the most
158
+ # optimal form by merging overlapping ranges here, and ignore descending ranges.
159
+ def initialize(ranges:)
160
+ normalized_ranges = ranges
161
+ .reject { |r| descending_range?(r) }
162
+ .to_set
163
+ .then { |rs| merge_overlapping_or_adjacent_ranges(rs) }
164
+ .freeze
165
+
166
+ super(ranges: normalized_ranges)
167
+ end
168
+
169
+ # Returns true if at least one ::Time exists in both ranges.
170
+ def ranges_intersect?(r1, r2)
171
+ r1.cover?(r2.begin) || r1.cover?(r2.end) || r2.cover?(r1.begin) || r2.cover?(r1.end)
172
+ end
173
+
174
+ # The amount to add to a time to get the next consecutive time, based
175
+ # on the level of granularity we support. According to the Elasticsearch docs[1],
176
+ # it only supports millisecond granularity, so that's all we support:
177
+ #
178
+ # > Internally, dates are converted to UTC (if the time-zone is specified) and
179
+ # > stored as a long number representing milliseconds-since-the-epoch.
180
+ #
181
+ # We want exact precision here, so we are avoiding using a float for this, preferring
182
+ # to use a rational instead.
183
+ #
184
+ # [1] https://www.elastic.co/guide/en/elasticsearch/reference/7.15/date.html
185
+ CONSECUTIVE_TIME_INCREMENT = Rational(1, 1000)
186
+
187
+ # Returns true if the given ranges are adjacent with no room for any ::Time
188
+ # objects to exist between the ranges given the millisecond granularity we operate at.
189
+ def adjacent?(r1, r2)
190
+ r1.end&.+(CONSECUTIVE_TIME_INCREMENT)&.==(r2.begin) || r2.end&.+(CONSECUTIVE_TIME_INCREMENT)&.==(r1.begin) || false
191
+ end
192
+
193
+ # Combines the given ranges into a new range that only contains the common subset of ::Time objects.
194
+ # Returns `nil` if there is no intersection.
195
+ def intersect_ranges(r1, r2)
196
+ RangeFactory.build_non_empty(
197
+ [r1.begin, r2.begin].compact.max,
198
+ [r1.end, r2.end].compact.min
199
+ )
200
+ end
201
+
202
+ # Helper method that attempts to merge the given set of ranges into an equivalent
203
+ # set that contains fewer ranges in it but covers the same set of ::Time objects.
204
+ # As an example, consider these two ranges:
205
+ #
206
+ # - 2020-05-01 to 2020-07-01
207
+ # - 2020-06-01 to 2020-08-01
208
+ #
209
+ # These two ranges can safely be merged into a single range of 2020-05-01 to 2020-08-01.
210
+ # Technically speaking, this is not required; we can just return a TimeSet containing
211
+ # multiple ranges. However, the goal of a TimeSet is to represent a set of Time objects
212
+ # as minimally as possible, and to that end it is useful to merge ranges when possible.
213
+ # While it adds a bit of complexity to merge ranges like this, it'll simplify future
214
+ # calculations involving a TimeSet.
215
+ def merge_overlapping_or_adjacent_ranges(all_ranges)
216
+ # We sometimes have to apply this merge algorithm multiple times in order to fully merge
217
+ # the ranges into their minimal form. For example, consider these three ranges:
218
+ #
219
+ # - 2020-05-01 to 2020-07-01
220
+ # - 2020-06-01 to 2020-09-01
221
+ # - 2020-08-01 to 2020-10-01
222
+ #
223
+ # Ultimately, we can merge these into a single range of 2020-05-01 to 2020-10-01, but
224
+ # our algorithm isn't able to do that in a single pass. On the first pass it'll produce
225
+ # two merged ranges (2020-05-01 to 2020-09-01 and 2020-06-01 to 2020-10-01); after we
226
+ # apply the algorithm again it is then able to produce the final merged range.
227
+ # Since we can't predict how many iterations it'll take, we loop here, and break as
228
+ # soon as there is no more progress to be made.
229
+ #
230
+ # While we can't predice how many iterations it'll take, we can put an upper bound on it:
231
+ # it should take no more than `all_ranges.size` times, because every iteration should shrink
232
+ # `all_ranges` by at least one element--if not, that iteration didn't make any progress
233
+ # (and we're done anyway).
234
+ all_ranges.size.times do
235
+ # Given our set of ranges, any range is potentially mergeable with any other range.
236
+ # Here we determine which pairs of ranges are mergeable.
237
+ mergeable_range_pairs = all_ranges.to_a.combination(2).select do |r1, r2|
238
+ ranges_intersect?(r1, r2) || adjacent?(r1, r2)
239
+ end
240
+
241
+ # If there are no mergeable pairs, we're done!
242
+ return all_ranges if mergeable_range_pairs.empty?
243
+
244
+ # For each pair of mergeable ranges, build a merged range.
245
+ merged_ranges = mergeable_range_pairs.filter_map do |r1, r2|
246
+ RangeFactory.build_non_empty(
247
+ nil_or(:min, from: [r1.begin, r2.begin]),
248
+ nil_or(:max, from: [r1.end, r2.end])
249
+ )
250
+ end
251
+
252
+ # Update `all_ranges` based on the merges performed so far.
253
+ unmergeable_ranges = all_ranges - mergeable_range_pairs.flatten
254
+ all_ranges = unmergeable_ranges.union(_ = merged_ranges)
255
+ end
256
+
257
+ all_ranges
258
+ end
259
+
260
+ # Helper method for `merge_overlapping_or_adjacent_ranges` used to return the most "lenient" range boundary value.
261
+ # `nil` is used for a beginless or endless range, so we return that if available; otherwise
262
+ # we apply `min_or_max`.`
263
+ def nil_or(min_or_max, from:)
264
+ return nil if from.include?(nil)
265
+ from.public_send(min_or_max)
266
+ end
267
+
268
+ def descending_range?(range)
269
+ # If either edge is `nil` it cannot be descending.
270
+ return false if (range_begin = range.begin).nil?
271
+ return false if (range_end = range.end).nil?
272
+
273
+ # Otherwise we just compare the edges to determine if it's descending.
274
+ range_begin > range_end
275
+ end
276
+
277
+ # An instance in which all `::Time`s fit.
278
+ ALL = new([::Range.new(nil, nil)])
279
+ # Singleton instance that's empty.
280
+ EMPTY = new([])
281
+
282
+ module RangeFactory
283
+ # Helper method for building a range from the given bounds. Returns either
284
+ # a built range, or, if the given bounds produce an empty range, returns nil.
285
+ def self.build_non_empty(lower_bound, upper_bound)
286
+ if lower_bound.nil? || upper_bound.nil? || lower_bound <= upper_bound
287
+ ::Range.new(lower_bound, upper_bound)
288
+ end
289
+ end
290
+ end
291
+ end
292
+ end
293
+ end
@@ -0,0 +1,108 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ module ElasticGraph
10
+ module Support
11
+ module TimeUtil
12
+ NANOS_PER_SECOND = 1_000_000_000
13
+ NANOS_PER_MINUTE = NANOS_PER_SECOND * 60
14
+ NANOS_PER_HOUR = NANOS_PER_MINUTE * 60
15
+
16
+ # Simple helper function to convert a local time string (such as `03:45:12` or `12:30:43.756`)
17
+ # to an integer value between 0 and 24 * 60 * 60 * 1,000,000,000 - 1 representing the nano of day
18
+ # for the local time value.
19
+ #
20
+ # This is meant to match the behavior of Java's `LocalTime#toNanoOfDay()` API:
21
+ # https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/time/LocalTime.html#toNanoOfDay()
22
+ #
23
+ # This is specifically useful when we need to work with local time values in a script: by converting
24
+ # a local time parameter to nano-of-day, our script can more efficiently compare values, avoiding the
25
+ # need to parse the same local time parameters over and over again as it applies the script to each
26
+ # document.
27
+ #
28
+ # Note: this method assumes the given `local_time_string` is well-formed. You'll get an exception if
29
+ # you provide a malformed value, but no effort has been put into giving a clear error message. The
30
+ # caller is expected to have already validated that the `local_time_string` is formatted correctly.
31
+ def self.nano_of_day_from_local_time(local_time_string)
32
+ hours_str, minutes_str, full_seconds_str = local_time_string.split(":")
33
+ seconds_str, subseconds_str = (_ = full_seconds_str).split(".")
34
+
35
+ hours = Integer(_ = hours_str, 10)
36
+ minutes = Integer(_ = minutes_str, 10)
37
+ seconds = Integer(seconds_str, 10)
38
+ nanos = Integer(subseconds_str.to_s.ljust(9, "0"), 10)
39
+
40
+ (hours * NANOS_PER_HOUR) + (minutes * NANOS_PER_MINUTE) + (seconds * NANOS_PER_SECOND) + nanos
41
+ end
42
+
43
+ # Helper method for advancing time. Unfortunately, Ruby's core `Time` type does not directly support this.
44
+ # ActiveSupport (from rails) provides this functionality, but we don't depend on rails at all and don't
45
+ # want to add such a heavyweight dependency for such a small thing.
46
+ #
47
+ # Luckily, our needs are quite limited, which makes this a much simpler problem then a general purpose `time.advance(...)` API:
48
+ #
49
+ # - We only need to support year, month, day, and hour advances.
50
+ # - We only ever need to advance a single unit.
51
+ #
52
+ # This provides a simple, correct implementation for that constrained problem space.
53
+ def self.advance_one_unit(time, unit)
54
+ case unit
55
+ when :year
56
+ with_updated(time, year: time.year + 1)
57
+ when :month
58
+ maybe_next_month =
59
+ if time.month == 12
60
+ with_updated(time, year: time.year + 1, month: 1)
61
+ else
62
+ with_updated(time, month: time.month + 1)
63
+ end
64
+
65
+ # If the next month has fewer days than the month of `time`, then it can "spill over" to a day
66
+ # from the first week of the month following that. For example, if the date of `time` was 2021-01-31
67
+ # and we add a month, it attempts to go to `2021-02-31` but such a date doesn't exist--instead
68
+ # `maybe_next_month` will be on `2021-03-03` because of the overflow. Here we correct for that.
69
+ #
70
+ # Our assumption (which we believe to be correct) is that every time this happens, both of these are true:
71
+ # - `time.day` is near the end of its month
72
+ # - `maybe_next_month.day` is near the start of its month
73
+ #
74
+ # ...and furthermore, we do not believe there is any other case where `time.day` and `maybe_next_month.day` can differ.
75
+ if time.day > maybe_next_month.day
76
+ corrected_date = maybe_next_month.to_date - maybe_next_month.day
77
+ with_updated(time, year: corrected_date.year, month: corrected_date.month, day: corrected_date.day)
78
+ else
79
+ maybe_next_month
80
+ end
81
+ when :day
82
+ next_day = time.to_date + 1
83
+ with_updated(time, year: next_day.year, month: next_day.month, day: next_day.day)
84
+ when :hour
85
+ time + 3600
86
+ end
87
+ end
88
+
89
+ private_class_method def self.with_updated(time, year: time.year, month: time.month, day: time.day)
90
+ # UTC needs to be treated special here due to an oddity of Ruby's Time class:
91
+ #
92
+ # > Time.utc(2021, 12, 2, 12, 30, 30).iso8601
93
+ # => "2021-12-02T12:30:30Z"
94
+ # > Time.new(2021, 12, 2, 12, 30, 30, 0).iso8601
95
+ # => "2021-12-02T12:30:30+00:00"
96
+ #
97
+ # We want to preserve the `Z` suffix on the ISO8601 representation of the advanced time
98
+ # (if it was there on the original time), so we use the `::Time.utc` method here to do that.
99
+ # Non-UTC time must use `::Time.new(...)` with a UTC offset, though.
100
+ if time.utc?
101
+ ::Time.utc(year, month, day, time.hour, time.min, time.sec.to_r + time.subsec)
102
+ else
103
+ ::Time.new(year, month, day, time.hour, time.min, time.sec.to_r + time.subsec, time.utc_offset)
104
+ end
105
+ end
106
+ end
107
+ end
108
+ end
@@ -0,0 +1,67 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "json"
10
+
11
+ module ElasticGraph
12
+ module Support
13
+ # Responsible for encoding `Untyped` values into strings. This logic lives here in `elasticgraph-support`
14
+ # so that it can be shared between the `Untyped` indexing preparer (which lives in `elasticgraph-indexer`)
15
+ # and the `Untyped` coercion adapter (which lives in `elasticgraph-graphql`). It is important that these
16
+ # share the same logic so that the string values we attempt to filter on at query time match the string values
17
+ # we indexed when given the semantically equivalent untyped data.
18
+ #
19
+ # Note: change this class with care. Changing the behavior to make `encode` produce different strings may result
20
+ # in breaking queries if the `Untyped`s stored in the index were indexed using previous encoding logic.
21
+ # A backfill into the datastore will likely be required to avoid this issue.
22
+ module UntypedEncoder
23
+ # Encodes the given untyped value to a String so it can be indexed in a Elasticsearch/OpenSearch `keyword` field.
24
+ def self.encode(value)
25
+ return nil if value.nil?
26
+ # Note: we use `fast_generate` here instead of `generate`. They basically act the same, except
27
+ # `generate` includes an extra check for self-referential data structures. `value` here ultimately
28
+ # comes out of a parsed JSON document (e.g. either from an ElasticGraph event at indexing time, or
29
+ # as a GraphQL query variable at search time), and JSON cannot express self-referential data
30
+ # structures, so we do not have to worry about that happening.
31
+ #
32
+ # ...but even if it did, we would get an error either way: `JSON.generate` would raise
33
+ # `JSON::NestingError` whereas `:JSON.fast_generate` would give us a `SystemStackError`.
34
+ ::JSON.fast_generate(canonicalize(value))
35
+ end
36
+
37
+ # Decodes a previously encoded Untyped value, returning its original value.
38
+ def self.decode(string)
39
+ return nil if string.nil?
40
+ ::JSON.parse(string)
41
+ end
42
+
43
+ # Helper method that converts `value` to a canonical form before we dump it as JSON.
44
+ # We do this because we index each JSON value as a `keyword` in the index, and we want
45
+ # equality filters on a JSON value field to consider equivalent JSON objects to be equal
46
+ # even if their normally generated JSON is not the same. For example, we want ElasticGraph
47
+ # to treat these two as being equivalent:
48
+ #
49
+ # {"a": 1, "b": 2} vs {"b": 2, "a": 1}
50
+ #
51
+ # To achieve this, we ensure JSON objects are generated in sorted order, and we use this same
52
+ # logic both at indexing time and also at query time when we are filtering.
53
+ private_class_method def self.canonicalize(value)
54
+ case value
55
+ when ::Hash
56
+ value
57
+ .sort_by { |k, v| k.to_s }
58
+ .to_h { |k, v| [k, canonicalize(v)] }
59
+ when ::Array
60
+ value.map { |v| canonicalize(v) }
61
+ else
62
+ value
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,15 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ module ElasticGraph
10
+ # The version of all ElasticGraph gems.
11
+ VERSION = "0.18.0.0"
12
+
13
+ # Steep weirdly expects this here...
14
+ # @dynamic self.define_schema
15
+ end