nvfuser-cu121-torch25 0.2.25.dev20250201__cp312-cp312-manylinux_2_28_x86_64.whl
Sign up to get free protection for your applications and to get access to all the features.
- nvfuser/_C.cpython-312-x86_64-linux-gnu.so +0 -0
- nvfuser/__init__.py +618 -0
- nvfuser/__init__.pyi +4 -0
- nvfuser/contrib/__init__.py +9 -0
- nvfuser/contrib/nn/__init__.py +13 -0
- nvfuser/contrib/nn/normalization.py +725 -0
- nvfuser/include/nvfuser/alias_analysis.h +116 -0
- nvfuser/include/nvfuser/bfs.h +929 -0
- nvfuser/include/nvfuser/codegen.h +26 -0
- nvfuser/include/nvfuser/compute_at.h +28 -0
- nvfuser/include/nvfuser/compute_at_map.h +394 -0
- nvfuser/include/nvfuser/contiguity.h +351 -0
- nvfuser/include/nvfuser/cuda_utils.h +50 -0
- nvfuser/include/nvfuser/debug.h +50 -0
- nvfuser/include/nvfuser/device_lower/analysis/bank_conflict.h +53 -0
- nvfuser/include/nvfuser/device_lower/analysis/circular_buffer.h +109 -0
- nvfuser/include/nvfuser/device_lower/analysis/device_version.h +65 -0
- nvfuser/include/nvfuser/device_lower/analysis/divisible_split.h +28 -0
- nvfuser/include/nvfuser/device_lower/analysis/fused_reduction.h +36 -0
- nvfuser/include/nvfuser/device_lower/analysis/index_compute.h +322 -0
- nvfuser/include/nvfuser/device_lower/analysis/predicate_elimination.h +71 -0
- nvfuser/include/nvfuser/device_lower/analysis/sync_information.h +47 -0
- nvfuser/include/nvfuser/device_lower/analysis/tensor_memory.h +65 -0
- nvfuser/include/nvfuser/device_lower/analysis/thread_predicate.h +158 -0
- nvfuser/include/nvfuser/device_lower/analysis/tma.h +93 -0
- nvfuser/include/nvfuser/device_lower/analysis/trivial_broadcast.h +75 -0
- nvfuser/include/nvfuser/device_lower/id_model_options.h +135 -0
- nvfuser/include/nvfuser/device_lower/lower2device.h +391 -0
- nvfuser/include/nvfuser/device_lower/pass/alias_memory.h +37 -0
- nvfuser/include/nvfuser/device_lower/pass/allocation.h +32 -0
- nvfuser/include/nvfuser/device_lower/pass/circular_buffer.h +191 -0
- nvfuser/include/nvfuser/device_lower/pass/expr_sort.h +17 -0
- nvfuser/include/nvfuser/device_lower/pass/fusion_simplifier.h +21 -0
- nvfuser/include/nvfuser/device_lower/pass/grid_serialization.h +26 -0
- nvfuser/include/nvfuser/device_lower/pass/index.h +200 -0
- nvfuser/include/nvfuser/device_lower/pass/inline_ptx.h +16 -0
- nvfuser/include/nvfuser/device_lower/pass/insert_syncs.h +39 -0
- nvfuser/include/nvfuser/device_lower/pass/instrument.h +24 -0
- nvfuser/include/nvfuser/device_lower/pass/loop_rotation.h +150 -0
- nvfuser/include/nvfuser/device_lower/pass/loops.h +68 -0
- nvfuser/include/nvfuser/device_lower/pass/magic_zero.h +86 -0
- nvfuser/include/nvfuser/device_lower/pass/misaligned_vectorization.h +118 -0
- nvfuser/include/nvfuser/device_lower/pass/predicate.h +23 -0
- nvfuser/include/nvfuser/device_lower/pass/replace_size.h +24 -0
- nvfuser/include/nvfuser/device_lower/pass/scalar_hoist.h +115 -0
- nvfuser/include/nvfuser/device_lower/pass/unroll.h +98 -0
- nvfuser/include/nvfuser/device_lower/pass/vectorize_welford.h +45 -0
- nvfuser/include/nvfuser/device_lower/pass/warp_reduce.h +23 -0
- nvfuser/include/nvfuser/device_lower/utils.h +382 -0
- nvfuser/include/nvfuser/device_lower/validation.h +74 -0
- nvfuser/include/nvfuser/disjoint_set.h +556 -0
- nvfuser/include/nvfuser/dispatch.h +334 -0
- nvfuser/include/nvfuser/driver_api.h +49 -0
- nvfuser/include/nvfuser/dynamic_transform.h +316 -0
- nvfuser/include/nvfuser/dynamic_type/C++20/type_traits +37 -0
- nvfuser/include/nvfuser/dynamic_type/dynamic_type.h +969 -0
- nvfuser/include/nvfuser/dynamic_type/error.h +24 -0
- nvfuser/include/nvfuser/dynamic_type/type_traits.h +703 -0
- nvfuser/include/nvfuser/evaluator_common.h +295 -0
- nvfuser/include/nvfuser/exceptions.h +283 -0
- nvfuser/include/nvfuser/expr_evaluator.h +125 -0
- nvfuser/include/nvfuser/expr_simplifier.h +218 -0
- nvfuser/include/nvfuser/flatbuffers/allocator.h +68 -0
- nvfuser/include/nvfuser/flatbuffers/array.h +253 -0
- nvfuser/include/nvfuser/flatbuffers/base.h +486 -0
- nvfuser/include/nvfuser/flatbuffers/buffer.h +154 -0
- nvfuser/include/nvfuser/flatbuffers/buffer_ref.h +53 -0
- nvfuser/include/nvfuser/flatbuffers/code_generator.h +80 -0
- nvfuser/include/nvfuser/flatbuffers/code_generators.h +234 -0
- nvfuser/include/nvfuser/flatbuffers/default_allocator.h +64 -0
- nvfuser/include/nvfuser/flatbuffers/detached_buffer.h +114 -0
- nvfuser/include/nvfuser/flatbuffers/flatbuffer_builder.h +1225 -0
- nvfuser/include/nvfuser/flatbuffers/flatbuffers.h +272 -0
- nvfuser/include/nvfuser/flatbuffers/flatc.h +130 -0
- nvfuser/include/nvfuser/flatbuffers/flex_flat_util.h +36 -0
- nvfuser/include/nvfuser/flatbuffers/flexbuffers.h +1889 -0
- nvfuser/include/nvfuser/flatbuffers/grpc.h +300 -0
- nvfuser/include/nvfuser/flatbuffers/hash.h +127 -0
- nvfuser/include/nvfuser/flatbuffers/idl.h +1359 -0
- nvfuser/include/nvfuser/flatbuffers/minireflect.h +420 -0
- nvfuser/include/nvfuser/flatbuffers/reflection.h +522 -0
- nvfuser/include/nvfuser/flatbuffers/reflection_generated.h +1471 -0
- nvfuser/include/nvfuser/flatbuffers/registry.h +128 -0
- nvfuser/include/nvfuser/flatbuffers/stl_emulation.h +513 -0
- nvfuser/include/nvfuser/flatbuffers/string.h +64 -0
- nvfuser/include/nvfuser/flatbuffers/struct.h +53 -0
- nvfuser/include/nvfuser/flatbuffers/table.h +168 -0
- nvfuser/include/nvfuser/flatbuffers/util.h +731 -0
- nvfuser/include/nvfuser/flatbuffers/vector.h +393 -0
- nvfuser/include/nvfuser/flatbuffers/vector_downward.h +273 -0
- nvfuser/include/nvfuser/flatbuffers/verifier.h +317 -0
- nvfuser/include/nvfuser/fusion.h +511 -0
- nvfuser/include/nvfuser/fusion_guard.h +37 -0
- nvfuser/include/nvfuser/fusion_profiler.h +311 -0
- nvfuser/include/nvfuser/fusion_segmenter.h +751 -0
- nvfuser/include/nvfuser/global_allocator.h +27 -0
- nvfuser/include/nvfuser/grouped_reduction.h +47 -0
- nvfuser/include/nvfuser/host_ir/container.h +60 -0
- nvfuser/include/nvfuser/host_ir/executor.h +152 -0
- nvfuser/include/nvfuser/host_ir/host_ir.h +320 -0
- nvfuser/include/nvfuser/host_ir/lower.h +35 -0
- nvfuser/include/nvfuser/id_model/circular_buffer_indexing.h +56 -0
- nvfuser/include/nvfuser/id_model/contiguity.h +166 -0
- nvfuser/include/nvfuser/id_model/id_model.h +359 -0
- nvfuser/include/nvfuser/id_model/id_model_index_compute.h +81 -0
- nvfuser/include/nvfuser/id_model/indexing.h +208 -0
- nvfuser/include/nvfuser/id_model/indexing_traversal.h +72 -0
- nvfuser/include/nvfuser/id_model/indexing_utils.h +62 -0
- nvfuser/include/nvfuser/id_model/loop_promotion.h +180 -0
- nvfuser/include/nvfuser/id_model/predicate_indexing.h +104 -0
- nvfuser/include/nvfuser/id_model/schedule.h +54 -0
- nvfuser/include/nvfuser/id_model/to_string.h +87 -0
- nvfuser/include/nvfuser/id_model/transform_replay.h +58 -0
- nvfuser/include/nvfuser/id_model/utils.h +176 -0
- nvfuser/include/nvfuser/id_model/validation_utils.h +55 -0
- nvfuser/include/nvfuser/index_compute.h +651 -0
- nvfuser/include/nvfuser/instrumentation.h +107 -0
- nvfuser/include/nvfuser/ir/all_nodes.h +14 -0
- nvfuser/include/nvfuser/ir/base_nodes.h +687 -0
- nvfuser/include/nvfuser/ir/builder.h +215 -0
- nvfuser/include/nvfuser/ir/builder_passkey.h +29 -0
- nvfuser/include/nvfuser/ir/cloner.h +185 -0
- nvfuser/include/nvfuser/ir/container.h +226 -0
- nvfuser/include/nvfuser/ir/graphviz.h +119 -0
- nvfuser/include/nvfuser/ir/interface_nodes.h +957 -0
- nvfuser/include/nvfuser/ir/internal_base_nodes.h +744 -0
- nvfuser/include/nvfuser/ir/internal_nodes.h +2792 -0
- nvfuser/include/nvfuser/ir/iostream.h +98 -0
- nvfuser/include/nvfuser/ir/printer.h +57 -0
- nvfuser/include/nvfuser/ir/utils.h +801 -0
- nvfuser/include/nvfuser/iter_visitor.h +661 -0
- nvfuser/include/nvfuser/kernel.h +299 -0
- nvfuser/include/nvfuser/kernel_db/kernel_db.h +109 -0
- nvfuser/include/nvfuser/kernel_db/utils.h +37 -0
- nvfuser/include/nvfuser/kernel_ir.h +1457 -0
- nvfuser/include/nvfuser/kernel_ir_dispatch.h +147 -0
- nvfuser/include/nvfuser/linked_hash_map.h +97 -0
- nvfuser/include/nvfuser/logical_domain_map.h +577 -0
- nvfuser/include/nvfuser/macros.h +23 -0
- nvfuser/include/nvfuser/mma_type.h +257 -0
- nvfuser/include/nvfuser/multidevice/c10d_mock.h +175 -0
- nvfuser/include/nvfuser/multidevice/communication.h +232 -0
- nvfuser/include/nvfuser/multidevice/communicator.h +179 -0
- nvfuser/include/nvfuser/multidevice/device_mesh.h +95 -0
- nvfuser/include/nvfuser/multidevice/executor.h +107 -0
- nvfuser/include/nvfuser/multidevice/multidevice.h +18 -0
- nvfuser/include/nvfuser/multidevice/utils.h +187 -0
- nvfuser/include/nvfuser/non_divisible_split.h +86 -0
- nvfuser/include/nvfuser/opaque_type.h +129 -0
- nvfuser/include/nvfuser/ops/alias.h +192 -0
- nvfuser/include/nvfuser/ops/all_ops.h +13 -0
- nvfuser/include/nvfuser/ops/arith.h +712 -0
- nvfuser/include/nvfuser/ops/composite.h +130 -0
- nvfuser/include/nvfuser/ops/indexing.h +55 -0
- nvfuser/include/nvfuser/ops/normalization.h +263 -0
- nvfuser/include/nvfuser/ops/utils.h +127 -0
- nvfuser/include/nvfuser/options.h +313 -0
- nvfuser/include/nvfuser/parallel_dimension_map.h +95 -0
- nvfuser/include/nvfuser/parallel_type_bitmap.h +365 -0
- nvfuser/include/nvfuser/polymorphic_value.h +432 -0
- nvfuser/include/nvfuser/predicate_compute.h +213 -0
- nvfuser/include/nvfuser/python_frontend/distributed_tensor.h +50 -0
- nvfuser/include/nvfuser/python_frontend/fusion_cache.h +298 -0
- nvfuser/include/nvfuser/python_frontend/fusion_definition.h +372 -0
- nvfuser/include/nvfuser/python_frontend/fusion_record.h +3124 -0
- nvfuser/include/nvfuser/python_frontend/fusion_state.h +143 -0
- nvfuser/include/nvfuser/python_frontend/python_bindings.h +27 -0
- nvfuser/include/nvfuser/python_frontend/segmentation.h +246 -0
- nvfuser/include/nvfuser/python_frontend/translation.h +20 -0
- nvfuser/include/nvfuser/python_frontend/translation_utils.h +308 -0
- nvfuser/include/nvfuser/scheduler/all_schedulers.h +17 -0
- nvfuser/include/nvfuser/scheduler/ampere_multi_matmul.h +206 -0
- nvfuser/include/nvfuser/scheduler/cache_policy_refiner.h +19 -0
- nvfuser/include/nvfuser/scheduler/compile_time_info.h +322 -0
- nvfuser/include/nvfuser/scheduler/debug_utils.h +68 -0
- nvfuser/include/nvfuser/scheduler/expr_eval_sched.h +45 -0
- nvfuser/include/nvfuser/scheduler/heuristic.h +113 -0
- nvfuser/include/nvfuser/scheduler/hopper_multi_matmul.h +204 -0
- nvfuser/include/nvfuser/scheduler/mark_aliases.h +19 -0
- nvfuser/include/nvfuser/scheduler/matmul.h +40 -0
- nvfuser/include/nvfuser/scheduler/matmul_heuristic.h +293 -0
- nvfuser/include/nvfuser/scheduler/matmul_heuristic_plugin.h +65 -0
- nvfuser/include/nvfuser/scheduler/matmul_heuristic_plugin_api.h +99 -0
- nvfuser/include/nvfuser/scheduler/matmul_utils.h +54 -0
- nvfuser/include/nvfuser/scheduler/mma_utils.h +500 -0
- nvfuser/include/nvfuser/scheduler/multi_matmul.h +74 -0
- nvfuser/include/nvfuser/scheduler/no_op.h +48 -0
- nvfuser/include/nvfuser/scheduler/normalization_inner.h +49 -0
- nvfuser/include/nvfuser/scheduler/normalization_inner_outer.h +51 -0
- nvfuser/include/nvfuser/scheduler/normalization_outer.h +48 -0
- nvfuser/include/nvfuser/scheduler/normalization_utils.h +379 -0
- nvfuser/include/nvfuser/scheduler/pointwise.h +183 -0
- nvfuser/include/nvfuser/scheduler/pointwise_heuristic.h +118 -0
- nvfuser/include/nvfuser/scheduler/pointwise_utils.h +24 -0
- nvfuser/include/nvfuser/scheduler/reduction.h +43 -0
- nvfuser/include/nvfuser/scheduler/reduction_heuristic.h +339 -0
- nvfuser/include/nvfuser/scheduler/reduction_utils.h +159 -0
- nvfuser/include/nvfuser/scheduler/registry.h +97 -0
- nvfuser/include/nvfuser/scheduler/registry_utils.h +111 -0
- nvfuser/include/nvfuser/scheduler/resize.h +41 -0
- nvfuser/include/nvfuser/scheduler/resize_heuristic.h +67 -0
- nvfuser/include/nvfuser/scheduler/runtime_info.h +166 -0
- nvfuser/include/nvfuser/scheduler/scheduler_types.h +80 -0
- nvfuser/include/nvfuser/scheduler/transpose.h +114 -0
- nvfuser/include/nvfuser/scheduler/transpose_heuristic.h +164 -0
- nvfuser/include/nvfuser/scheduler/utils.h +771 -0
- nvfuser/include/nvfuser/scheduler/vectorize_helper.h +349 -0
- nvfuser/include/nvfuser/serde/factory.h +55 -0
- nvfuser/include/nvfuser/serde/fusion_cache_generated.h +4319 -0
- nvfuser/include/nvfuser/serde/fusion_record.h +124 -0
- nvfuser/include/nvfuser/serde/polymorphic_value.h +52 -0
- nvfuser/include/nvfuser/serde/utils.h +34 -0
- nvfuser/include/nvfuser/struct.inl +127 -0
- nvfuser/include/nvfuser/swizzle.h +54 -0
- nvfuser/include/nvfuser/sys_utils.h +40 -0
- nvfuser/include/nvfuser/tensor_metadata.h +118 -0
- nvfuser/include/nvfuser/tma.h +124 -0
- nvfuser/include/nvfuser/transform_iter.h +522 -0
- nvfuser/include/nvfuser/transform_replay.h +297 -0
- nvfuser/include/nvfuser/transform_rfactor.h +33 -0
- nvfuser/include/nvfuser/transform_view.h +136 -0
- nvfuser/include/nvfuser/type.h +1125 -0
- nvfuser/include/nvfuser/type_promotion.h +61 -0
- nvfuser/include/nvfuser/utils.h +619 -0
- nvfuser/include/nvfuser/val_graph.h +446 -0
- nvfuser/include/nvfuser/val_graph_visitor.h +259 -0
- nvfuser/include/nvfuser/validator_utils.h +92 -0
- nvfuser/include/nvfuser/vectorization_info.h +31 -0
- nvfuser/include/nvfuser/visibility.h +21 -0
- nvfuser/lib/libnvfuser_codegen.so +0 -0
- nvfuser/nvfuser_version.py +69 -0
- nvfuser/pytorch_utils.py +184 -0
- nvfuser/share/cmake/nvfuser/NvfuserConfig-release.cmake +20 -0
- nvfuser/share/cmake/nvfuser/NvfuserConfig.cmake +106 -0
- nvfuser/utils.py +18 -0
- nvfuser/version.py +1 -0
- nvfuser_cu121_torch25-0.2.25.dev20250201.dist-info/LICENSE +976 -0
- nvfuser_cu121_torch25-0.2.25.dev20250201.dist-info/METADATA +16 -0
- nvfuser_cu121_torch25-0.2.25.dev20250201.dist-info/RECORD +242 -0
- nvfuser_cu121_torch25-0.2.25.dev20250201.dist-info/WHEEL +5 -0
- nvfuser_cu121_torch25-0.2.25.dev20250201.dist-info/top_level.txt +1 -0
- nvfuser_cu121_torch25.libs/libnvToolsExt-847d78f2.so.1.0.0 +0 -0
@@ -0,0 +1,218 @@
|
|
1
|
+
// clang-format off
|
2
|
+
/*
|
3
|
+
* SPDX-FileCopyrightText: Copyright (c) 2023-present NVIDIA CORPORATION & AFFILIATES.
|
4
|
+
* All rights reserved.
|
5
|
+
* SPDX-License-Identifier: BSD-3-Clause
|
6
|
+
*/
|
7
|
+
// clang-format on
|
8
|
+
#pragma once
|
9
|
+
|
10
|
+
#include <exceptions.h>
|
11
|
+
#include <ir/all_nodes.h>
|
12
|
+
#include <visibility.h>
|
13
|
+
|
14
|
+
#include <vector>
|
15
|
+
|
16
|
+
// Note: [The Mathematics of Integer Arithmetic]
|
17
|
+
//
|
18
|
+
// We learnt arithmetic from as early as elementary school, and have been used
|
19
|
+
// to simplify expressions using rules like (a+b)/c = a/c+b/c. However, when we
|
20
|
+
// are dealing with integer arithmetic, which is the case for index and
|
21
|
+
// predicate simplification, lots of rules we learnt in elementary school no
|
22
|
+
// longer hold. For example, (1+1)/2 != 1/2+1/2 because the left hand side is 1
|
23
|
+
// and the right hand side is 0 + 0 = 0. So when considering adding a new
|
24
|
+
// simplification rule, we need to be very careful to make sure the rule is
|
25
|
+
// mathematically correct.
|
26
|
+
//
|
27
|
+
// Suggested reading matherials:
|
28
|
+
// - doc/math/abstract-algebra.md reviews abstract algebra, a theory that tells
|
29
|
+
// us which rule we are used to is still valid, and which is not.
|
30
|
+
// - doc/math/integer-division.md reviews the definitions and properties of div
|
31
|
+
// and mod in textbooks, it also describes some theorems that we proved
|
32
|
+
// ourselves that is useful for simplifying integer expressions.
|
33
|
+
// - doc/math/monotonic-function.md reviews the definition and properties of
|
34
|
+
// monotonic function.
|
35
|
+
//
|
36
|
+
// We can use the following rules to simplify integer expressions:
|
37
|
+
//
|
38
|
+
// A) Associativity of +: a + (b + c) = (a + b) + c
|
39
|
+
// B) Associativity of *: a * (b * c) = (a * b) * c
|
40
|
+
// C) Commutativity of +: a + b = b + a
|
41
|
+
// D) Commutativity of *: a * b = b * a
|
42
|
+
// E) Distributivity of * over +: a * (b + c) = (a * b) + (a * c)
|
43
|
+
// F) Distributivity of * over +: (a + b) * c = (a * c) + (b * c)
|
44
|
+
// G) (-a) / b = -(a / b) = a / (-b)
|
45
|
+
// H) (-a) % b = -(a % b) = a % (-b)
|
46
|
+
// I) If -|a| < r < |a|, then r % a = r, r / a = 0
|
47
|
+
// J) Distributivity of % over +:
|
48
|
+
// If compatible_sign(a, b), then (a + b) % c = (a % c + b % c) % c
|
49
|
+
// J.1) If compatible_sign(a, b) and a % c = 0, then (a + b) % c = b % c
|
50
|
+
// J.2) Let g = gcd(a, c). If compatible_sign(a, b), and -|g| < b < |g|
|
51
|
+
// then (a + b) % c = a % c + b
|
52
|
+
// K) Distributivity of % over *:
|
53
|
+
// If compatible_sign(a, b), then (a * b) % c = (a % c * b % c) % c
|
54
|
+
// L) If a is a multiple of b, then a % b = 0
|
55
|
+
// M) If b is a multiple of c, then we have: a*(b/c) = (a*b)/c
|
56
|
+
// N) a / (b * c) = (a / b) / c
|
57
|
+
// O) If d divides a and b, then a % b = ((a / d) % (b / d)) * d
|
58
|
+
// P) If b is a multiple of c, then a/(b/c) = (a*c)/b
|
59
|
+
// Q) If compatible_sign(a, b) and -|c| < a % c + b % c < |c|, then
|
60
|
+
// (a+b)/c = a/c + b/c
|
61
|
+
// Q.1) If compatible_sign(a, b) and a % c = 0, then (a+b)/c = a/c + b/c
|
62
|
+
// Q.2) Let g = gcd(a, c). If compatible_sign(a, b), and -|g| < b < |g|
|
63
|
+
// then (a + b) / c = a/c
|
64
|
+
//
|
65
|
+
// See doc/math/integer-division.md for proofs of these rules.
|
66
|
+
//
|
67
|
+
// Some examples on applying the above rules to simplify expressions:
|
68
|
+
//
|
69
|
+
// Example 7.1: Given that a >= 0 and b >= 0, simplify (a*4 + b) % 4
|
70
|
+
// Answer: (a*4 + b) % 4 = ((a*4)%4 + b%4) % 4 (Rule J)
|
71
|
+
// = (0 + b%4) % 4 (Rule L)
|
72
|
+
// = b % 4 % 4 (Basic math)
|
73
|
+
// = b % 4 (Rule I)
|
74
|
+
//
|
75
|
+
// Example 7.2: Given that 0 <= a < 3, simplify a % 4
|
76
|
+
// Answer: a % 4 = a (Rule I)
|
77
|
+
//
|
78
|
+
// Example 7.3: Simplify (a * 256) / 4
|
79
|
+
// Answer: (a * 256) / 4 = a * (256 / 4) (Rule M)
|
80
|
+
// = a * 64 (Basic math)
|
81
|
+
//
|
82
|
+
// Example 7.4: Simplify (a / 4) / 64
|
83
|
+
// Answer: (a / 4) / 64 = a / (4 * 64) (Rule N)
|
84
|
+
// = a / 256 (Basic math)
|
85
|
+
//
|
86
|
+
// Example 7.5: Simplify (a * 64) % 256 / 4
|
87
|
+
// Answer: (a * 64) % 256 / 4 = ((a % 4) * 64) / 4 (Rule O)
|
88
|
+
// = (a % 4) * (64 / 4) (Rule M)
|
89
|
+
// = (a % 4) * 16 (Basic math)
|
90
|
+
//
|
91
|
+
// Example 7.6: Simplify (a * 4) / 256
|
92
|
+
// Answer: (a * 4) / 256 = a / (256 / 4) (Rule P)
|
93
|
+
// = a / 64 (Basic math)
|
94
|
+
//
|
95
|
+
// Example 7.7: Given that a >= 0 and b >= 0, simplify (a * 256 + b) / 4
|
96
|
+
// Answer: because (a * 256) % 4 = 0, we have
|
97
|
+
// (a * 256 + b) / 4 = a * 256 / 4 + b / 4 (Rule Q)
|
98
|
+
// = a * (256 / 4) + b / 4 (Rule M)
|
99
|
+
// = a * 64 + b / 4 (Basic math)
|
100
|
+
//
|
101
|
+
// Example 7.8: Given that a >= 0 and 0 <= b < 4, simplify (a * 4 + b) / 4
|
102
|
+
// Answer: Similar to above, we have
|
103
|
+
// (a * 4 + b) / 4 = a + b / 4
|
104
|
+
// = a + 0 (Rule I)
|
105
|
+
// = a
|
106
|
+
|
107
|
+
namespace nvfuser {
|
108
|
+
|
109
|
+
// Information for a single variable. Possible values that this variable can
|
110
|
+
// take is: start, start + step, start + 2 * step, ... (< stop), which is
|
111
|
+
// similar to the loop variable of for loop:
|
112
|
+
// for variable in range(start, stop, step)
|
113
|
+
struct VarInfo {
|
114
|
+
Val* variable = nullptr;
|
115
|
+
// If this variable is an unrolled loop index. It is important to know this
|
116
|
+
// because unrolled loop index is compile constant to nvRTC. Note that a
|
117
|
+
// constant to nvRTC might not be a constant to nvFuser. For example, if I
|
118
|
+
// have loop
|
119
|
+
// #pragma unroll
|
120
|
+
// FOR i1 in ...:
|
121
|
+
// ...
|
122
|
+
// Then `i1` is a compile constant to nvRTC, but not a compile time constant
|
123
|
+
// to nvFuser.
|
124
|
+
bool is_unrolled_loop_index = false;
|
125
|
+
};
|
126
|
+
|
127
|
+
// Analyze expression register usage
|
128
|
+
enum class RegisterType { GeneralPurpose, Uniform, Immediate, Unknown };
|
129
|
+
RegisterType getRegisterType(Val* value);
|
130
|
+
|
131
|
+
// Simplify expressions with the given information of variables.
|
132
|
+
//
|
133
|
+
// The argument `variables` specifies which scalar are considered variable and
|
134
|
+
// some information about these variables. Any scalar not contained in
|
135
|
+
// `variables` are considered constants. Tensors are always considered as
|
136
|
+
// variables, regardless of if it is specified in `variables`.
|
137
|
+
//
|
138
|
+
// Note that in `variables`, the order matters. This order specifies how we
|
139
|
+
// should organize associative and commutative expressions. For example, if the
|
140
|
+
// `variables` is {a, b, c, d}, then we will simplify (a + d) + (c + b) as
|
141
|
+
// ((a + b) + c) + d. Tensors are always considered as at the right of all
|
142
|
+
// scalars, regardless of if it is inside `variables` or not.
|
143
|
+
// See note [Reordering associative and commutative operators] for detailed
|
144
|
+
// information about this reordering.
|
145
|
+
//
|
146
|
+
// Some simplifications like a*b/b -> a is always correct in valid case, but
|
147
|
+
// when there is an error (e.g. division-by-zero), these simplifications could
|
148
|
+
// potentially hide the error. The argument `preserve_error` specifies whether
|
149
|
+
// we should disable these optimization, unless we can prove there won't be an
|
150
|
+
// error.
|
151
|
+
NVF_API Val* simplifyExpr(
|
152
|
+
Val* value,
|
153
|
+
const std::list<VarInfo>& variables = {},
|
154
|
+
std::vector<Val*> assumptions = {},
|
155
|
+
bool preserve_error = false);
|
156
|
+
|
157
|
+
class Context;
|
158
|
+
namespace assoc_comm {
|
159
|
+
// The expression type that represents the flattened ops. For example, if I have
|
160
|
+
// out = a + b + 3 + c + 5, then I will have:
|
161
|
+
// FlattenedAssocCommOp {
|
162
|
+
// inputs: [a, b, 3, c, 5]
|
163
|
+
// outputs: [out]
|
164
|
+
// }
|
165
|
+
class FlattenedAssocCommOp : public Expr {
|
166
|
+
public:
|
167
|
+
using Expr::Expr;
|
168
|
+
|
169
|
+
FlattenedAssocCommOp(
|
170
|
+
IrBuilderPasskey passkey,
|
171
|
+
BinaryOpType op,
|
172
|
+
Val* out,
|
173
|
+
std::vector<Val*> terms);
|
174
|
+
|
175
|
+
NVFUSER_DECLARE_CLONE_AND_CREATE
|
176
|
+
|
177
|
+
const char* getOpString() const override;
|
178
|
+
|
179
|
+
// FlattenedAssocCommOp is unordered, so we should have
|
180
|
+
// FlattenedAdd(a, b)->sameAs(FlattenedAdd(b, a))
|
181
|
+
bool sameAs(const Statement* other) const override;
|
182
|
+
|
183
|
+
std::string toString(int indent_size = 0) const override;
|
184
|
+
|
185
|
+
std::string toInlineString(int indent_size = 0) const override;
|
186
|
+
|
187
|
+
DataType dtype() const {
|
188
|
+
return *output(0)->getDataType();
|
189
|
+
}
|
190
|
+
|
191
|
+
BinaryOpType getOpType() const {
|
192
|
+
return attribute<BinaryOpType>(0);
|
193
|
+
}
|
194
|
+
|
195
|
+
// Get a vector of inputs, sorted as the order given by `variables`. Note that
|
196
|
+
// the sorting key is the rightmost variable that an input depends on. For
|
197
|
+
// example, if I have two inputs.
|
198
|
+
// v1 = a * c
|
199
|
+
// v2 = b
|
200
|
+
// and variables is [a, b, c], then v2 < v1 because the rightmost depending
|
201
|
+
// variable of v2 is b, and the rightmost depending variable of v1 is c,
|
202
|
+
// and b < c. So in this example, this function will return [v2, v1].
|
203
|
+
// Tensors are always considered as variables and they are always considered
|
204
|
+
// as the rightmost.
|
205
|
+
std::vector<Val*> sortedInputs(const Context& context);
|
206
|
+
|
207
|
+
bool isTrivial() const {
|
208
|
+
return inputs().size() == 1;
|
209
|
+
}
|
210
|
+
|
211
|
+
std::vector<PolymorphicValue> evaluate(
|
212
|
+
const ExpressionEvaluator& ee,
|
213
|
+
const std::vector<PolymorphicValue>& inputs) const override;
|
214
|
+
};
|
215
|
+
|
216
|
+
} // namespace assoc_comm
|
217
|
+
|
218
|
+
} // namespace nvfuser
|
@@ -0,0 +1,68 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright 2021 Google Inc. All rights reserved.
|
3
|
+
*
|
4
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
* you may not use this file except in compliance with the License.
|
6
|
+
* You may obtain a copy of the License at
|
7
|
+
*
|
8
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
*
|
10
|
+
* Unless required by applicable law or agreed to in writing, software
|
11
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
* See the License for the specific language governing permissions and
|
14
|
+
* limitations under the License.
|
15
|
+
*/
|
16
|
+
|
17
|
+
#ifndef FLATBUFFERS_ALLOCATOR_H_
|
18
|
+
#define FLATBUFFERS_ALLOCATOR_H_
|
19
|
+
|
20
|
+
#include "flatbuffers/base.h"
|
21
|
+
|
22
|
+
namespace flatbuffers {
|
23
|
+
|
24
|
+
// Allocator interface. This is flatbuffers-specific and meant only for
|
25
|
+
// `vector_downward` usage.
|
26
|
+
class Allocator {
|
27
|
+
public:
|
28
|
+
virtual ~Allocator() {}
|
29
|
+
|
30
|
+
// Allocate `size` bytes of memory.
|
31
|
+
virtual uint8_t *allocate(size_t size) = 0;
|
32
|
+
|
33
|
+
// Deallocate `size` bytes of memory at `p` allocated by this allocator.
|
34
|
+
virtual void deallocate(uint8_t *p, size_t size) = 0;
|
35
|
+
|
36
|
+
// Reallocate `new_size` bytes of memory, replacing the old region of size
|
37
|
+
// `old_size` at `p`. In contrast to a normal realloc, this grows downwards,
|
38
|
+
// and is intended specifcally for `vector_downward` use.
|
39
|
+
// `in_use_back` and `in_use_front` indicate how much of `old_size` is
|
40
|
+
// actually in use at each end, and needs to be copied.
|
41
|
+
virtual uint8_t *reallocate_downward(uint8_t *old_p, size_t old_size,
|
42
|
+
size_t new_size, size_t in_use_back,
|
43
|
+
size_t in_use_front) {
|
44
|
+
FLATBUFFERS_ASSERT(new_size > old_size); // vector_downward only grows
|
45
|
+
uint8_t *new_p = allocate(new_size);
|
46
|
+
memcpy_downward(old_p, old_size, new_p, new_size, in_use_back,
|
47
|
+
in_use_front);
|
48
|
+
deallocate(old_p, old_size);
|
49
|
+
return new_p;
|
50
|
+
}
|
51
|
+
|
52
|
+
protected:
|
53
|
+
// Called by `reallocate_downward` to copy memory from `old_p` of `old_size`
|
54
|
+
// to `new_p` of `new_size`. Only memory of size `in_use_front` and
|
55
|
+
// `in_use_back` will be copied from the front and back of the old memory
|
56
|
+
// allocation.
|
57
|
+
void memcpy_downward(uint8_t *old_p, size_t old_size, uint8_t *new_p,
|
58
|
+
size_t new_size, size_t in_use_back,
|
59
|
+
size_t in_use_front) {
|
60
|
+
memcpy(new_p + new_size - in_use_back, old_p + old_size - in_use_back,
|
61
|
+
in_use_back);
|
62
|
+
memcpy(new_p, old_p, in_use_front);
|
63
|
+
}
|
64
|
+
};
|
65
|
+
|
66
|
+
} // namespace flatbuffers
|
67
|
+
|
68
|
+
#endif // FLATBUFFERS_ALLOCATOR_H_
|
@@ -0,0 +1,253 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright 2021 Google Inc. All rights reserved.
|
3
|
+
*
|
4
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
* you may not use this file except in compliance with the License.
|
6
|
+
* You may obtain a copy of the License at
|
7
|
+
*
|
8
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
*
|
10
|
+
* Unless required by applicable law or agreed to in writing, software
|
11
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
* See the License for the specific language governing permissions and
|
14
|
+
* limitations under the License.
|
15
|
+
*/
|
16
|
+
|
17
|
+
#ifndef FLATBUFFERS_ARRAY_H_
|
18
|
+
#define FLATBUFFERS_ARRAY_H_
|
19
|
+
|
20
|
+
#include <memory>
|
21
|
+
|
22
|
+
#include "flatbuffers/base.h"
|
23
|
+
#include "flatbuffers/stl_emulation.h"
|
24
|
+
#include "flatbuffers/vector.h"
|
25
|
+
|
26
|
+
namespace flatbuffers {
|
27
|
+
|
28
|
+
// This is used as a helper type for accessing arrays.
|
29
|
+
template<typename T, uint16_t length> class Array {
|
30
|
+
// Array<T> can carry only POD data types (scalars or structs).
|
31
|
+
typedef typename flatbuffers::bool_constant<flatbuffers::is_scalar<T>::value>
|
32
|
+
scalar_tag;
|
33
|
+
typedef
|
34
|
+
typename flatbuffers::conditional<scalar_tag::value, T, const T *>::type
|
35
|
+
IndirectHelperType;
|
36
|
+
|
37
|
+
public:
|
38
|
+
typedef uint16_t size_type;
|
39
|
+
typedef typename IndirectHelper<IndirectHelperType>::return_type return_type;
|
40
|
+
typedef VectorConstIterator<T, return_type> const_iterator;
|
41
|
+
typedef VectorReverseIterator<const_iterator> const_reverse_iterator;
|
42
|
+
|
43
|
+
// If T is a LE-scalar or a struct (!scalar_tag::value).
|
44
|
+
static FLATBUFFERS_CONSTEXPR bool is_span_observable =
|
45
|
+
(scalar_tag::value && (FLATBUFFERS_LITTLEENDIAN || sizeof(T) == 1)) ||
|
46
|
+
!scalar_tag::value;
|
47
|
+
|
48
|
+
FLATBUFFERS_CONSTEXPR uint16_t size() const { return length; }
|
49
|
+
|
50
|
+
return_type Get(uoffset_t i) const {
|
51
|
+
FLATBUFFERS_ASSERT(i < size());
|
52
|
+
return IndirectHelper<IndirectHelperType>::Read(Data(), i);
|
53
|
+
}
|
54
|
+
|
55
|
+
return_type operator[](uoffset_t i) const { return Get(i); }
|
56
|
+
|
57
|
+
// If this is a Vector of enums, T will be its storage type, not the enum
|
58
|
+
// type. This function makes it convenient to retrieve value with enum
|
59
|
+
// type E.
|
60
|
+
template<typename E> E GetEnum(uoffset_t i) const {
|
61
|
+
return static_cast<E>(Get(i));
|
62
|
+
}
|
63
|
+
|
64
|
+
const_iterator begin() const { return const_iterator(Data(), 0); }
|
65
|
+
const_iterator end() const { return const_iterator(Data(), size()); }
|
66
|
+
|
67
|
+
const_reverse_iterator rbegin() const {
|
68
|
+
return const_reverse_iterator(end());
|
69
|
+
}
|
70
|
+
const_reverse_iterator rend() const {
|
71
|
+
return const_reverse_iterator(begin());
|
72
|
+
}
|
73
|
+
|
74
|
+
const_iterator cbegin() const { return begin(); }
|
75
|
+
const_iterator cend() const { return end(); }
|
76
|
+
|
77
|
+
const_reverse_iterator crbegin() const { return rbegin(); }
|
78
|
+
const_reverse_iterator crend() const { return rend(); }
|
79
|
+
|
80
|
+
// Get a mutable pointer to elements inside this array.
|
81
|
+
// This method used to mutate arrays of structs followed by a @p Mutate
|
82
|
+
// operation. For primitive types use @p Mutate directly.
|
83
|
+
// @warning Assignments and reads to/from the dereferenced pointer are not
|
84
|
+
// automatically converted to the correct endianness.
|
85
|
+
typename flatbuffers::conditional<scalar_tag::value, void, T *>::type
|
86
|
+
GetMutablePointer(uoffset_t i) const {
|
87
|
+
FLATBUFFERS_ASSERT(i < size());
|
88
|
+
return const_cast<T *>(&data()[i]);
|
89
|
+
}
|
90
|
+
|
91
|
+
// Change elements if you have a non-const pointer to this object.
|
92
|
+
void Mutate(uoffset_t i, const T &val) { MutateImpl(scalar_tag(), i, val); }
|
93
|
+
|
94
|
+
// The raw data in little endian format. Use with care.
|
95
|
+
const uint8_t *Data() const { return data_; }
|
96
|
+
|
97
|
+
uint8_t *Data() { return data_; }
|
98
|
+
|
99
|
+
// Similarly, but typed, much like std::vector::data
|
100
|
+
const T *data() const { return reinterpret_cast<const T *>(Data()); }
|
101
|
+
T *data() { return reinterpret_cast<T *>(Data()); }
|
102
|
+
|
103
|
+
// Copy data from a span with endian conversion.
|
104
|
+
// If this Array and the span overlap, the behavior is undefined.
|
105
|
+
void CopyFromSpan(flatbuffers::span<const T, length> src) {
|
106
|
+
const auto p1 = reinterpret_cast<const uint8_t *>(src.data());
|
107
|
+
const auto p2 = Data();
|
108
|
+
FLATBUFFERS_ASSERT(!(p1 >= p2 && p1 < (p2 + length)) &&
|
109
|
+
!(p2 >= p1 && p2 < (p1 + length)));
|
110
|
+
(void)p1;
|
111
|
+
(void)p2;
|
112
|
+
CopyFromSpanImpl(flatbuffers::bool_constant<is_span_observable>(), src);
|
113
|
+
}
|
114
|
+
|
115
|
+
protected:
|
116
|
+
void MutateImpl(flatbuffers::true_type, uoffset_t i, const T &val) {
|
117
|
+
FLATBUFFERS_ASSERT(i < size());
|
118
|
+
WriteScalar(data() + i, val);
|
119
|
+
}
|
120
|
+
|
121
|
+
void MutateImpl(flatbuffers::false_type, uoffset_t i, const T &val) {
|
122
|
+
*(GetMutablePointer(i)) = val;
|
123
|
+
}
|
124
|
+
|
125
|
+
void CopyFromSpanImpl(flatbuffers::true_type,
|
126
|
+
flatbuffers::span<const T, length> src) {
|
127
|
+
// Use std::memcpy() instead of std::copy() to avoid performance degradation
|
128
|
+
// due to aliasing if T is char or unsigned char.
|
129
|
+
// The size is known at compile time, so memcpy would be inlined.
|
130
|
+
std::memcpy(data(), src.data(), length * sizeof(T));
|
131
|
+
}
|
132
|
+
|
133
|
+
// Copy data from flatbuffers::span with endian conversion.
|
134
|
+
void CopyFromSpanImpl(flatbuffers::false_type,
|
135
|
+
flatbuffers::span<const T, length> src) {
|
136
|
+
for (size_type k = 0; k < length; k++) { Mutate(k, src[k]); }
|
137
|
+
}
|
138
|
+
|
139
|
+
// This class is only used to access pre-existing data. Don't ever
|
140
|
+
// try to construct these manually.
|
141
|
+
// 'constexpr' allows us to use 'size()' at compile time.
|
142
|
+
// @note Must not use 'FLATBUFFERS_CONSTEXPR' here, as const is not allowed on
|
143
|
+
// a constructor.
|
144
|
+
#if defined(__cpp_constexpr)
|
145
|
+
constexpr Array();
|
146
|
+
#else
|
147
|
+
Array();
|
148
|
+
#endif
|
149
|
+
|
150
|
+
uint8_t data_[length * sizeof(T)];
|
151
|
+
|
152
|
+
private:
|
153
|
+
// This class is a pointer. Copying will therefore create an invalid object.
|
154
|
+
// Private and unimplemented copy constructor.
|
155
|
+
Array(const Array &);
|
156
|
+
Array &operator=(const Array &);
|
157
|
+
};
|
158
|
+
|
159
|
+
// Specialization for Array[struct] with access using Offset<void> pointer.
|
160
|
+
// This specialization used by idl_gen_text.cpp.
|
161
|
+
template<typename T, uint16_t length> class Array<Offset<T>, length> {
|
162
|
+
static_assert(flatbuffers::is_same<T, void>::value, "unexpected type T");
|
163
|
+
|
164
|
+
public:
|
165
|
+
typedef const void *return_type;
|
166
|
+
|
167
|
+
const uint8_t *Data() const { return data_; }
|
168
|
+
|
169
|
+
// Make idl_gen_text.cpp::PrintContainer happy.
|
170
|
+
return_type operator[](uoffset_t) const {
|
171
|
+
FLATBUFFERS_ASSERT(false);
|
172
|
+
return nullptr;
|
173
|
+
}
|
174
|
+
|
175
|
+
private:
|
176
|
+
// This class is only used to access pre-existing data.
|
177
|
+
Array();
|
178
|
+
Array(const Array &);
|
179
|
+
Array &operator=(const Array &);
|
180
|
+
|
181
|
+
uint8_t data_[1];
|
182
|
+
};
|
183
|
+
|
184
|
+
template<class U, uint16_t N>
|
185
|
+
FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span<U, N> make_span(Array<U, N> &arr)
|
186
|
+
FLATBUFFERS_NOEXCEPT {
|
187
|
+
static_assert(
|
188
|
+
Array<U, N>::is_span_observable,
|
189
|
+
"wrong type U, only plain struct, LE-scalar, or byte types are allowed");
|
190
|
+
return span<U, N>(arr.data(), N);
|
191
|
+
}
|
192
|
+
|
193
|
+
template<class U, uint16_t N>
|
194
|
+
FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span<const U, N> make_span(
|
195
|
+
const Array<U, N> &arr) FLATBUFFERS_NOEXCEPT {
|
196
|
+
static_assert(
|
197
|
+
Array<U, N>::is_span_observable,
|
198
|
+
"wrong type U, only plain struct, LE-scalar, or byte types are allowed");
|
199
|
+
return span<const U, N>(arr.data(), N);
|
200
|
+
}
|
201
|
+
|
202
|
+
template<class U, uint16_t N>
|
203
|
+
FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span<uint8_t, sizeof(U) * N>
|
204
|
+
make_bytes_span(Array<U, N> &arr) FLATBUFFERS_NOEXCEPT {
|
205
|
+
static_assert(Array<U, N>::is_span_observable,
|
206
|
+
"internal error, Array<T> might hold only scalars or structs");
|
207
|
+
return span<uint8_t, sizeof(U) * N>(arr.Data(), sizeof(U) * N);
|
208
|
+
}
|
209
|
+
|
210
|
+
template<class U, uint16_t N>
|
211
|
+
FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span<const uint8_t, sizeof(U) * N>
|
212
|
+
make_bytes_span(const Array<U, N> &arr) FLATBUFFERS_NOEXCEPT {
|
213
|
+
static_assert(Array<U, N>::is_span_observable,
|
214
|
+
"internal error, Array<T> might hold only scalars or structs");
|
215
|
+
return span<const uint8_t, sizeof(U) * N>(arr.Data(), sizeof(U) * N);
|
216
|
+
}
|
217
|
+
|
218
|
+
// Cast a raw T[length] to a raw flatbuffers::Array<T, length>
|
219
|
+
// without endian conversion. Use with care.
|
220
|
+
// TODO: move these Cast-methods to `internal` namespace.
|
221
|
+
template<typename T, uint16_t length>
|
222
|
+
Array<T, length> &CastToArray(T (&arr)[length]) {
|
223
|
+
return *reinterpret_cast<Array<T, length> *>(arr);
|
224
|
+
}
|
225
|
+
|
226
|
+
template<typename T, uint16_t length>
|
227
|
+
const Array<T, length> &CastToArray(const T (&arr)[length]) {
|
228
|
+
return *reinterpret_cast<const Array<T, length> *>(arr);
|
229
|
+
}
|
230
|
+
|
231
|
+
template<typename E, typename T, uint16_t length>
|
232
|
+
Array<E, length> &CastToArrayOfEnum(T (&arr)[length]) {
|
233
|
+
static_assert(sizeof(E) == sizeof(T), "invalid enum type E");
|
234
|
+
return *reinterpret_cast<Array<E, length> *>(arr);
|
235
|
+
}
|
236
|
+
|
237
|
+
template<typename E, typename T, uint16_t length>
|
238
|
+
const Array<E, length> &CastToArrayOfEnum(const T (&arr)[length]) {
|
239
|
+
static_assert(sizeof(E) == sizeof(T), "invalid enum type E");
|
240
|
+
return *reinterpret_cast<const Array<E, length> *>(arr);
|
241
|
+
}
|
242
|
+
|
243
|
+
template<typename T, uint16_t length>
|
244
|
+
bool operator==(const Array<T, length> &lhs,
|
245
|
+
const Array<T, length> &rhs) noexcept {
|
246
|
+
return std::addressof(lhs) == std::addressof(rhs) ||
|
247
|
+
(lhs.size() == rhs.size() &&
|
248
|
+
std::memcmp(lhs.Data(), rhs.Data(), rhs.size() * sizeof(T)) == 0);
|
249
|
+
}
|
250
|
+
|
251
|
+
} // namespace flatbuffers
|
252
|
+
|
253
|
+
#endif // FLATBUFFERS_ARRAY_H_
|